# Setup

In [1]:
import pandas as pd
import datetime
from pathlib import Path

## only  need these to reload utils
# import importlib
# import utils

# # After making changes to your_module_name.py, run this cell
# importlib.reload(utils)

from utils import get_todays_games, filter_data_on_change, aggregate_betting_data, get_complete_game_results, process_and_save_evaluated_bets

In [2]:
import os

In [3]:
cwd = os.getcwd()
print(cwd)


if cwd == '/Users/aaronsmith/Code/Void/First Void App/llm_betting_model/scripts':
    os.chdir("/Users/aaronsmith/Code/Void/First Void App/llm_betting_model")
# os.chdir("/home/pi/Documents")

/Users/aaronsmith/Code/llm_betting_model/scripts


In [6]:
### load txt file as a pandas df
file_path = '/Users/aaronsmith/Code/llm_betting_model/data/cbb_bets_claude.txt'
df = pd.read_csv(file_path, sep=',')

In [4]:
import plotly_express as px

In [13]:
    
def build_ncaa_prompt(model_version):
    try:
        df_all = pd.read_csv('./data/ncaa_bets_db.csv')
    except:
        df_all = pd.DataFrame()


    # Example usage:
    HEADERS = {
        'Authority': 'api.actionnetwork',
        'Accept': 'application/json',
        'Origin': 'https://www.actionnetwork.com',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
    }


    # Example usage:
    sport='ncaab'


    # Get today's date object
    today = datetime.date.today()

    # Define the desired string format
    date_format = '%Y%m%d'

    # Create the list using strftime() to format the dates
    date_str_list = [
        (today + datetime.timedelta(days=0)).strftime(date_format), # Today
        (today + datetime.timedelta(days=1)).strftime(date_format), # Tomorrow
        (today + datetime.timedelta(days=2)).strftime(date_format), # The next day
        (today + datetime.timedelta(days=3)).strftime(date_format)  # The day after
    ]

    df = get_todays_games(sport,date_str_list,HEADERS)
    df['date_scraped'] = datetime.datetime.now()

    df = df.loc[df['status'] == 'scheduled']

    df_all = pd.concat([df_all,df])
    df_all['date_scraped'] = pd.to_datetime(df_all['date_scraped'])

    dimension_cols = ['game_id', 'home_team', 'away_team']
    metric_cols = ['home_money_line', 'away_money_line','total_score','home_money_line','away_money_line']
    filtered_df = filter_data_on_change(df_all, dimension_cols, metric_cols)
    print(df_all.index.size)
    print(filtered_df.index.size)

    filtered_df.to_csv('./data/ncaa_bets_db.csv', index=False)

    filtered_df['start_time_pt'] = pd.to_datetime(filtered_df['start_time_pt'])


    group_by_columns = ['game_id', 'home_team', 'away_team','start_time']
    metric_columns = [
        'num_bets', 'home_money_line', 'home_ml_ticket_pct', 'home_ml_money_pct',
        'away_money_line', 'away_ml_ticket_pct', 'away_ml_money_pct', 'total_score',
        'over_odds', 'under_odds', 'over_ticket_pct', 'over_money_pct',
        'under_ticket_pct', 'under_money_pct', 'home_spread', 'home_spread_odds',
        'home_spread_ticket_pct', 'home_spread_money_pct', 'away_spread',
        'away_spread_odds', 'away_spread_ticket_pct', 'away_spread_money_pct'
    ]

    next_games_list = df['game_id'].unique().tolist()

    display(filtered_df.sample(4))

    games_list = filtered_df.loc[filtered_df['game_id'].isin(next_games_list)].groupby(['game_id','home_team','away_team','start_time_pt']).agg(
        rec_count=('date_scraped','size'),
        num_bets=('num_bets','last')
    
    
    
    ).sort_values('num_bets', ascending=False).head(30).reset_index()['game_id'].tolist()


    for col in metric_columns:
        if col in filtered_df.columns:
            # This converts the column to a numeric type.
            # Any string like 'N/A' will become NaN.
            filtered_df[col] = pd.to_numeric(filtered_df[col], errors='coerce')

    df_agg = aggregate_betting_data(filtered_df.loc[filtered_df['game_id'].isin(games_list)], group_by_columns, metric_columns)

    df_agg = df_agg.sort_values('start_time',ascending=True)

    # Create the home_team_spread column
    df_agg['home_team_spread'] = df_agg['home_team'] + " " + df_agg['home_spread_last'].apply(lambda x: f"{x:+.1f}")

    # Create the away_team_spread column (assuming this is the second column you wanted)
    df_agg['away_team_spread'] = df_agg['away_team'] + " " + df_agg['away_spread_last'].apply(lambda x: f"{x:+.1f}")

    # display(df_agg[['home_team','away_team','home_spread_first','home_spread_last','home_team_spread','away_team_spread']])

    # display(df_agg[['home_team','away_team','home_spread_first','home_spread_last','home_spread_ticket_pct_first','home_spread_ticket_pct_last']])


    df_hist = pd.read_csv('./data/ncaab_bet_picks_evaluated.csv')
    df_hist = df_hist.loc[df_hist['model'] == model_version]

    # df_hist = pd.DataFrame()

    df1_string = df_agg.to_csv(index=False)
    df2_string = df_hist.to_csv(index=False)

    timestamp_str = datetime.datetime.now()

    prompt = f"""
    You are my expert college basketball betting adviser.
    I will provide you with two datasets:

    Dataset 1: Betting lines for upcoming games (money line, over/under, spread with first/avg/last values)
    Dataset 2: Historical betting results to analyze what's working and what's not

    Your goal: Maximize ROI by learning from historical patterns.

    CRITICAL VALIDATION REQUIREMENTS
    1. HOME vs AWAY TEAM IDENTIFICATION - READ CAREFULLY
    The dataset has two columns: home_team and away_team
    MATCH NAMING CONVENTION (MANDATORY):

    ALWAYS use format: "home_team vs away_team"
    Example: If home_team=Thunder, away_team=Wizards → Write "Thunder vs Wizards"
    The home team is ALWAYS listed first, away team second
    This makes it crystal clear which team is playing at home

    BEFORE MAKING ANY PICK:

    Identify from the dataset: Which team is in the home_team column?
    Identify from the dataset: Which team is in the away_team column?
    Write the match as "home_team vs away_team"
    Determine which team you want to pick
    Set the binary indicator based on whether that team is home or away

    BINARY INDICATOR RULES:

    If you pick the HOME team's spread → bet_home_spread=1, bet_away_spread=0
    If you pick the AWAY team's spread → bet_away_spread=1, bet_home_spread=0
    If you pick the HOME team's ML → bet_home_ml=1, bet_away_ml=0
    If you pick the AWAY team's ML → bet_away_ml=1, bet_home_ml=0

    EXAMPLE:
    Dataset shows: home_team=Thunder, away_team=Wizards
    Match name: "Thunder vs Wizards"
    If picking Thunder -15.5: bet_home_spread=1 (Thunder is home)
    If picking Wizards +15.5: bet_away_spread=1 (Wizards is away)

    ---
    ### **2. ODDS AND LINES VALIDATION - NO EXCEPTIONS**

    **Use ONLY the "_last" column values:**
    - `home_money_line_last` for home team ML
    - `away_money_line_last` for away team ML
    - `home_spread_last` and `home_spread_odds_last` for home team spread
    - `away_spread_last` and `away_spread_odds_last` for away team spread
    - `total_score_last`, `over_odds_last`, `under_odds_last` for totals

    **NEVER:**
    - Invent odds
    - Approximate odds
    - Use "avg" or "first" values (only use for analysis of line movement)
    - Make a pick if the line is not in the dataset

    ---

    ### **3. SPREAD DIRECTION RULES - READ CAREFULLY**

    **Understanding Spread Signs:**
    - **NEGATIVE spread (-X.X)** = That team is FAVORED by X.X points
    - **POSITIVE spread (+X.X)** = That team is UNDERDOG getting X.X points

    **Examples:**
    - `home_spread_last = -5.5` means: Home team FAVORED by 5.5, Away team gets +5.5
    - `home_spread_last = +3.5` means: Home team UNDERDOG getting +3.5, Away team favored by -3.5
    - `away_spread_last = -7.0` means: Away team FAVORED by 7.0, Home team gets +7.0
    - `away_spread_last = +4.0` means: Away team UNDERDOG getting +4.0, Home team favored by -4.0

    **Critical Understanding:**
    - If `home_spread_last` is negative → home team is favorite
    - If `home_spread_last` is positive → home team is underdog
    - If `away_spread_last` is negative → away team is favorite
    - If `away_spread_last` is positive → away team is underdog

    ---

    ### **4. MANDATORY DOUBLE-CHECK PROCESS**

    **Before finalizing EACH pick, complete these steps:**

    □ **Step 1**: Look at dataset - which team is `home_team`, which is `away_team`?
    □ **Step 2**: Write match as "home_team vs away_team"
    □ **Step 3**: Decide which team I want to pick
    □ **Step 4**: Is that team home or away?
    □ **Step 5**: Look up the EXACT line for that team in the "_last" columns
    □ **Step 6**: Copy the EXACT odds from the corresponding "_odds_last" column
    □ **Step 7**: Verify the sign (+ or -) matches favorite/underdog position
    □ **Step 8**: Set binary indicator: bet_home_X=1 if home team, bet_away_X=1 if away team
    □ **Step 9**: Cross-check one final time before writing

    **If you are uncertain about ANY detail, SKIP THAT PICK rather than guess.**

    ---

    ### **5. PICK TYPES AND BINARY INDICATORS**

    Please aim to make around 10 picks -- you can pick more or less, but I want to have at least 10 and then we can use the confidence to determine success

    You can make six types of picks:

    | Pick Type | Columns to Use | Binary Indicators |
    |-----------|----------------|-------------------|
    | Home ML | `home_money_line_last` | `bet_home_ml=1, bet_away_ml=0` |
    | Away ML | `away_money_line_last` | `bet_away_ml=1, bet_home_ml=0` |
    | Home Spread | `home_spread_last`, `home_spread_odds_last` | `bet_home_spread=1, bet_away_spread=0` |
    | Away Spread | `away_spread_last`, `away_spread_odds_last` | `bet_away_spread=1, bet_home_spread=0` |
    | Over | `total_score_last`, `over_odds_last` | `bet_over=1, bet_under=0` |
    | Under | `total_score_last`, `under_odds_last` | `bet_under=1, bet_over=0` |

    **All other binary indicators must be set to 0.**

    ---

    ### **6. CONFIDENCE & UNITS**

    - Rank all picks by confidence (most confident = rank 1)
    - Provide **confidence %** as integer between 0-100
    - Assign units based on confidence:
    - **3 units**: Highest confidence (90%+)
    - **2 units**: Medium confidence (80-89%)
    - **1 unit**: Lower confidence (70-79%)

    ---

    ### **7. PREDICTED SCORE FORMAT**

    - Format: "HomeScore-AwayScore" (e.g., "115-112")
    - Home team score ALWAYS listed first
    - Away team score ALWAYS listed second
    - Double-check the order matches your match naming

    ---

    ## **OUTPUT FORMAT**

    ### **Part 1: Human-Readable Table**

    Create a table with these columns:
    - Rank
    - Match (format: "home_team vs away_team")
    - Home Team
    - Away Team
    - Pick (e.g., "Thunder -15.5" or "Wizards +15.5")
    - Odds
    - Units
    - Confidence %
    - Reason - a well thought out reason why you are making the pick that you are
    - Reason Code - a codified reason for the decision. I will use these to track patterns and repeat successful strategies. This should be a shorter form code that is reused across multiple datasets.
    - Predicted Score (format: "HomeScore-AwayScore")

    ### **Part 2: CSV Block (Copy/Paste Ready)**

    Exact structure with this header row:
    ```
    rank,game_id,start_time,match,pick,odds,units,confidence_pct,reason,predicted_score,bet_home_spread,bet_home_ml,bet_away_spread,bet_away_ml,bet_over,bet_under,home_money_line,away_money_line,tie_money_line,total_score,over_odds,under_odds,home_spread,home_spread_odds,away_spread,away_spread_odds,timestamp
    ```
    **CSV Requirements:**
    - `match`: Must use "home_team vs away_team" format
    - `home_team`: home team
    - `away_team`: away team
    - `pick`: State team name and line (e.g., "Thunder -15.5")
    - `predicted_score`: Format as "HomeScore-AwayScore"
    - `bet_home_spread`, `bet_home_ml`, `bet_away_spread`, `bet_away_ml`, `bet_over`, `bet_under`: Must be 0 or 1
    - `home_money_line`: Value from `home_money_line_last`
    - `away_money_line`: Value from `away_money_line_last`
    - `tie_money_line`: Always "N/A"
    - `total_score`: Value from `total_score_last`
    - `over_odds`: Value from `over_odds_last`
    - `under_odds`: Value from `under_odds_last`
    - `home_spread`: Value from `home_spread_last`
    - `home_spread_odds`: Value from `home_spread_odds_last`
    - `away_spread`: Value from `away_spread_last`
    - `away_spread_odds`: Value from `away_spread_odds_last`
    - `timestamp`: use the time of this prompt -- {timestamp_str}

    ---

    ## **FINAL VERIFICATION CHECKLIST**

    Before submitting your picks, verify:

    □ Every match uses "home_team vs away_team" format
    □ Every pick references the correct team (home or away)
    □ Every odds value is copied exactly from "_last" column
    □ Every binary indicator correctly reflects whether the picked team is home or away
    □ Every spread sign (+ or -) matches the favorite/underdog position
    □ Every predicted score is in "HomeScore-AwayScore" format
    □ All CSV columns match the exact structure required

    ---

    ## **EXAMPLE OF CORRECT PICK**

    **Dataset shows:**
    - game_id: 261702
    - home_team: Thunder
    - away_team: Wizards
    - home_spread_last: -15.5
    - home_spread_odds_last: -110

    **Correct Pick:**
    - Match: "Thunder vs Wizards"
    - Pick: "Thunder -15.5"
    - Odds: -110
    - Binary: bet_home_spread=1, bet_away_spread=0, all others=0
    - Predicted Score: "126-108" (Thunder score first)

    **CSV Line:**
    ```
    1,261702,2025-10-31T00:00:00.000Z,Thunder vs Wizards,Thunder -15.5,-110,3,96,"Reason here",126-108,1,0,0,0,0,0,-1200,750,N/A,231.5,-110,-109,-15.5,-110,15.5,-110,2025-10-30T18:30:00Z

    Remember: Accuracy is more important than quantity. Skip any pick where you have uncertainty.


    Here are the upcoming games and their odds:
    {df1_string}

    Here is the historical dataset of your betting advice and results:
    {df2_string}
        """
    print('-------')
    print('-------')
    print('-------')
    print('-------')

    # Now you can print the full prompt
    print(prompt)

    ## write prompt to a text file
    with open(f"./prompts/ncaab_prompt_{model_version}.txt", "w") as f:
        f.write(prompt) 

    return df_agg



In [35]:
HEADERS = {
    'Authority': 'api.actionnetwork',
    'Accept': 'application/json',
    'Origin': 'https://www.actionnetwork.com',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}

def process_results(model_name: str, picks_dir: Path, results_csv_path: Path):
    """
    Processes betting picks for a given model against a game results CSV.
    
    It finds missing game results, fetches them from an API, appends them
    to the main results CSV, and then evaluates all picks.
    """
    sport = 'ncaab'
    
    # === 1. Load Picks File ===
    picks_file = picks_dir / f'cbb_bets_{model_name}.txt'
    try:
        df_picks = pd.read_csv(picks_file)
    except FileNotFoundError:
        print(f"Error: Picks file not found at {picks_file}")
        return None # Exit function
    
    display(df_picks.sample(3))

    # === 2. Process Pick Timestamps ===
    # Step A: Ensure it is a string and remove hidden non-ASCII characters (like \u200b)
    df_picks['timestamp'] = (
        df_picks['timestamp']
        .astype(str)
        .str.replace(r'[^\x00-\x7F]+', '', regex=True)
    )

    # Step B: Convert using 'mixed' format, coercing unparseable rows to NaT (Not a Time)
    df_picks['timestamp'] = pd.to_datetime(df_picks['timestamp'], format='mixed', errors='coerce')

    # (Optional) Check if any rows failed to parse
    if df_picks['timestamp'].isna().any():
        print(f"Warning: {df_picks['timestamp'].isna().sum()} rows could not be parsed and are set to NaT.")

    df_picks['start_time_pt'] = (
        pd.to_datetime(df_picks['start_time'], utc=True)
        .dt.tz_convert('America/Los_Angeles')
    )
    df_picks['date'] = df_picks['start_time_pt'].dt.date
    df_picks['model'] = model_name

    # === 3. Load Existing Game Results ===
    # Check if the file exists *before* trying to read it.
    # This is key for knowing whether to write the header later.
    results_file_exists = results_csv_path.is_file()
    
    try:
        df_old_results = pd.read_csv(results_csv_path)
    except FileNotFoundError:
        print(f"Results file {results_csv_path} not found. A new one will be created.")
        df_old_results = pd.DataFrame() # Start with an empty DataFrame

    # === 4. Find Missing Results ===
    df_merge = pd.merge(
        df_picks[['rank', 'game_id', 'match', 'date', 'start_time', 'pick']],
        df_old_results,
        on='game_id',
        how='left',
        suffixes=('_pick', '_result')
    )
    missing_games = df_merge.loc[df_merge['status'] != 'complete']

    # === 5. Fetch and Append New Results (if any) ===
    df_new_results = pd.DataFrame() # Initialize as empty

    if not missing_games.empty:
        date_str_list = missing_games['date'].astype(str).str.replace('-', '').unique().tolist()
        
        if date_str_list:
            print(f"Found missing results for {len(date_str_list)} dates. Fetching...")
            df_new_results = get_complete_game_results(sport, date_str_list, HEADERS)
            
            if not df_new_results.empty:
                print(f"Appending {len(df_new_results)} new results to {results_csv_path}")
                # Append new data
                df_new_results.to_csv(
                    results_csv_path,
                    mode='a',
                    # Write header ONLY if the file didn't exist before
                    header=not results_file_exists, 
                    index=False
                )
            else:
                print("API call returned no new results.")
    else:
        print("No missing game results found. All picks are up-to-date.")

    # === 6. Combine All Results for Final Processing ===
    # *** THIS IS THE MAIN LOGIC FIX ***
    # Combine the old results and the brand-newly fetched results
    df_all_results = pd.concat([df_old_results, df_new_results], ignore_index=True)

    # Drop duplicates in case the API sent a game we already had
    if 'game_id' in df_all_results.columns and not df_all_results.empty:
        df_all_results = df_all_results.drop_duplicates(subset='game_id', keep='last')

    # === 7. Process and Save Evaluations ===
    # Pass the *complete* set of results (old + new)
    df_evaluated, df_evaluated_hist = process_and_save_evaluated_bets(
        df_picks, 
        df_all_results, 
        sport
    )

    if 'date' in df_evaluated_hist.columns:
        df_evaluated_hist['date'] = pd.to_datetime(df_evaluated_hist['date'])
    else:
        print("Warning: 'df_evaluated_hist' has no 'date' column to convert.")

    return df_evaluated_hist

# Load Hist

In [27]:
df_hist = pd.read_csv('./data/ncaab_bet_picks.csv')
model_list = df_hist['model'].unique().tolist()
print(model_list)
display(df_hist.sample(3))

['perp', 'claude', 'gemini']


Unnamed: 0,rank,game_id,start_time,match,pick,odds,units,confidence_pct,reason,predicted_score,bet_home_spread,bet_home_ml,bet_away_spread,bet_away_ml,bet_over,bet_under,home_money_line,away_money_line,tie_money_line,total_score,over_odds,under_odds,home_spread,home_spread_odds,away_spread,away_spread_odds,timestamp,start_time_pt,date,model,home_team,away_team,reason_code
121,10,271999,2025-11-15T20:00:00.000Z,SE Missouri vs St. Thomas,SE Missouri +2.5,-111,1,72,Road dog with sharp money backing (52% ML money despite 18% tickets). Contrarian play where sharp money supports underdog. Getting 2.5 points with sharp backing.,75-77,0,0,1,0,0,0,114.0,-135,,153.5,-105,-115,2.5,-111,-2.5,-111,2025-11-15 10:15:06.207347,2025-11-15 12:00:00-08:00,2025-11-15,claude,,,
30,11,266378,2025-11-13T00:00:00.000Z,Boston U vs Brown,Boston U -2.5,-117,1,75,Small home favorite under a possession and a half; HCA at this band is supportive relative to early-season big-chalk fades [web:13][web:6][web:15].,70-65,1,0,0,0,0,0,-162.0,136,,129.5,-115,-105,-2.5,-117,2.5,-104,2025-11-12 12:46:41.533925,2025-11-12 16:00:00-08:00,2025-11-12,perp,,,
10,1,266367,2025-11-11T23:30:00.000Z,Michigan vs Wake Forest,Michigan -15,-110,2,86,Home favorite with home_spread_last -15 and -110; market and totals imply separation consistent with heavy ML.,92-72,1,0,0,0,0,0,-1600.0,950,,168.5,-108,-110,-15.0,-110,15.0,-110,2025-11-11 14:10:22.997974,2025-11-11 15:30:00-08:00,2025-11-11,perp,,,


# Build Prompts

In [16]:

pd.set_option('display.max_colwidth', 500)
pd.options.display.max_columns = None
pd.set_option('display.max_rows', 500)

In [17]:
# df.sort_values('num_bets_last',ascending=False).head(10)

In [18]:
model_list

['perp', 'claude', 'gemini']

In [19]:
for model_name in model_list:
    df = build_ncaa_prompt(model_name)

Processing data for ncaab (date: 20251119)
global version
Fetching data from the Action Network API...
https://api.actionnetwork.com/web/v2/scoreboard/ncaab?bookIds=15,30,79,2988,75,123,71,68,69&periods=event&date=20251119&division=D1
Data successfully fetched.

--- Game Details ---
Game ID: 264919
League: ncaab
Matchup: Air Force vs Alabama St
Home Team ID: 973
Away Team ID: 892
Status: scheduled
Start Time: 2025-11-19T23:00:00.000Z
Home Score: N/A
Away Score: N/A
--------------------
Processing Game ID: 264919, Market ID: 15
Processing Game ID: 264919, Market ID: 30
Processing Game ID: 264919, Market ID: 68
Processing Game ID: 264919, Market ID: 69
Processing Game ID: 264919, Market ID: 71
Game ID: 264919, Market ID: 75 - No moneyline data available. Skipping...
Processing Game ID: 264919, Market ID: 123

--- Game Details ---
Game ID: 264901
League: ncaab
Matchup: Penn State vs Harvard
Home Team ID: 1006
Away Team ID: 1037
Status: scheduled
Start Time: 2025-11-19T23:30:00.000Z
Home S

  filtered_df['start_time_pt'] = pd.to_datetime(filtered_df['start_time_pt'])


Unnamed: 0,game_id,league_name,home_team,away_team,home_team_id,away_team_id,status,home_score,away_score,home_pitcher,home_pitcher_era,home_pitcher_k9,home_pitcher_ip,home_pitcher_starts,home_pitcher_win,home_pitcher_loss,away_pitcher,away_pitcher_era,away_pitcher_k9,away_pitcher_ip,away_pitcher_starts,away_pitcher_win,away_pitcher_loss,start_time,market_id,book_id,event_id,num_bets,home_money_line,home_ml_ticket_pct,home_ml_money_pct,away_money_line,away_ml_ticket_pct,away_ml_money_pct,tie_money_line,tie_ml_ticket_pct,tie_ml_money_pct,total_score,over_odds,under_odds,over_ticket_pct,over_money_pct,under_ticket_pct,under_money_pct,home_spread,home_spread_odds,home_spread_ticket_pct,home_spread_money_pct,away_spread,away_spread_odds,away_spread_ticket_pct,away_spread_money_pct,start_time_pt,date_scraped
230,264880,ncaab,UNC Wilmington,E. Carolina,1052,825,scheduled,,,,,,,,,,,,,,,,,2025-11-19T00:00:00.000Z,15,15.0,264880.0,2544.0,-259.0,98.0,99.0,220.0,2.0,1.0,,,,145.5,-115,-105,56,57,44,43,-6.5,-108,73,74,6.5,-112.0,27.0,26.0,2025-11-18 16:00:00-08:00,2025-11-18 23:25:03.148636
668,264897,ncaab,Santa Clara,Idaho State,1056,808,scheduled,,,,,,,,,,,,,,,,,2025-11-19T03:00:00.000Z,15,15.0,264897.0,1120.0,-2240.0,98.0,98.0,1100.0,2.0,2.0,,,,143.5,-110,-110,74,72,26,28,-15.5,-110,30,32,15.5,-111.0,70.0,68.0,2025-11-18 19:00:00-08:00,2025-11-18 20:06:24.715043
648,264874,ncaab,Washington,Southern U,758,897,scheduled,,,,,,,,,,,,,,,,,2025-11-19T02:30:00.000Z,15,15.0,264874.0,974.0,-4000.0,97.0,92.0,1800.0,3.0,8.0,,,,157.5,-113,-105,47,48,53,52,-24.5,-110,21,24,24.5,-110.0,79.0,76.0,2025-11-18 18:30:00-08:00,2025-11-18 20:06:24.715043
414,266427,ncaab,JMU,Towson,1046,1047,scheduled,,,,,,,,,,,,,,,,,2025-11-19T00:00:00.000Z,15,15.0,266427.0,2317.0,140.0,21.0,13.0,-166.0,79.0,87.0,,,,138.5,-115,-105,81,81,19,19,3.5,-115,50,58,-3.5,-105.0,50.0,42.0,2025-11-18 16:00:00-08:00,2025-11-18 20:06:27.536654


-------
-------
-------
-------

    You are my expert college basketball betting adviser.
    I will provide you with two datasets:

    Dataset 1: Betting lines for upcoming games (money line, over/under, spread with first/avg/last values)
    Dataset 2: Historical betting results to analyze what's working and what's not

    Your goal: Maximize ROI by learning from historical patterns.

    CRITICAL VALIDATION REQUIREMENTS
    1. HOME vs AWAY TEAM IDENTIFICATION - READ CAREFULLY
    The dataset has two columns: home_team and away_team
    MATCH NAMING CONVENTION (MANDATORY):

    ALWAYS use format: "home_team vs away_team"
    Example: If home_team=Thunder, away_team=Wizards → Write "Thunder vs Wizards"
    The home team is ALWAYS listed first, away team second
    This makes it crystal clear which team is playing at home

    BEFORE MAKING ANY PICK:

    Identify from the dataset: Which team is in the home_team column?
    Identify from the dataset: Which team is in the away_team c

  filtered_df['start_time_pt'] = pd.to_datetime(filtered_df['start_time_pt'])


Unnamed: 0,game_id,league_name,home_team,away_team,home_team_id,away_team_id,status,home_score,away_score,home_pitcher,home_pitcher_era,home_pitcher_k9,home_pitcher_ip,home_pitcher_starts,home_pitcher_win,home_pitcher_loss,away_pitcher,away_pitcher_era,away_pitcher_k9,away_pitcher_ip,away_pitcher_starts,away_pitcher_win,away_pitcher_loss,start_time,market_id,book_id,event_id,num_bets,home_money_line,home_ml_ticket_pct,home_ml_money_pct,away_money_line,away_ml_ticket_pct,away_ml_money_pct,tie_money_line,tie_ml_ticket_pct,tie_ml_money_pct,total_score,over_odds,under_odds,over_ticket_pct,over_money_pct,under_ticket_pct,under_money_pct,home_spread,home_spread_odds,home_spread_ticket_pct,home_spread_money_pct,away_spread,away_spread_odds,away_spread_ticket_pct,away_spread_money_pct,start_time_pt,date_scraped
115,266430,ncaab,Bowling Green,William & Mary,1000,1051,scheduled,,,,,,,,,,,,,,,,,2025-11-20T00:00:00.000Z,15,15.0,266430.0,974.0,-360.0,97.0,100.0,285.0,3.0,0.0,,,,167.5,-105,-110,40,40,60,60,-7.5,-110,73,78,7.5,-110.0,27.0,22.0,2025-11-19 16:00:00-08:00,2025-11-19 16:50:03.460857
1148,266442,ncaab,Purdue,Memphis,1003,817,scheduled,,,,,,,,,,,,,,,,,2025-11-20T23:00:00.000Z,15,15.0,266442.0,136.0,-1600.0,98.0,92.0,920.0,2.0,8.0,,,,159.5,-110,-110,55,55,45,45,-14.5,-110,0,0,14.5,-110.0,0.0,0.0,2025-11-20 15:00:00-08:00,2025-11-19 14:37:37.570611
29,264902,ncaab,Indiana St,LA Tech,772,863,scheduled,,,,,,,,,,,,,,,,,2025-11-20T00:00:00.000Z,15,15.0,264902.0,343.0,-175.0,78.0,90.0,145.0,22.0,10.0,,,,145.5,-110,-110,59,59,41,41,-3.5,-115,90,95,3.5,-105.0,10.0,5.0,2025-11-19 16:00:00-08:00,2025-11-19 08:38:19.995421
728,264768,ncaab,Wichita State,Loyola (IL),781,775,scheduled,,,,,,,,,,,,,,,,,2025-11-14T00:30:00.000Z,15,15.0,264768.0,8062.0,-225.0,96.0,96.0,185.0,4.0,4.0,,,,153.5,-105,-115,66,65,34,35,-5.5,-110,78,82,5.5,-111.0,22.0,18.0,2025-11-13 16:30:00-08:00,2025-11-13 15:00:25.961583


-------
-------
-------
-------

    You are my expert college basketball betting adviser.
    I will provide you with two datasets:

    Dataset 1: Betting lines for upcoming games (money line, over/under, spread with first/avg/last values)
    Dataset 2: Historical betting results to analyze what's working and what's not

    Your goal: Maximize ROI by learning from historical patterns.

    CRITICAL VALIDATION REQUIREMENTS
    1. HOME vs AWAY TEAM IDENTIFICATION - READ CAREFULLY
    The dataset has two columns: home_team and away_team
    MATCH NAMING CONVENTION (MANDATORY):

    ALWAYS use format: "home_team vs away_team"
    Example: If home_team=Thunder, away_team=Wizards → Write "Thunder vs Wizards"
    The home team is ALWAYS listed first, away team second
    This makes it crystal clear which team is playing at home

    BEFORE MAKING ANY PICK:

    Identify from the dataset: Which team is in the home_team column?
    Identify from the dataset: Which team is in the away_team c

  filtered_df['start_time_pt'] = pd.to_datetime(filtered_df['start_time_pt'])


Unnamed: 0,game_id,league_name,home_team,away_team,home_team_id,away_team_id,status,home_score,away_score,home_pitcher,home_pitcher_era,home_pitcher_k9,home_pitcher_ip,home_pitcher_starts,home_pitcher_win,home_pitcher_loss,away_pitcher,away_pitcher_era,away_pitcher_k9,away_pitcher_ip,away_pitcher_starts,away_pitcher_win,away_pitcher_loss,start_time,market_id,book_id,event_id,num_bets,home_money_line,home_ml_ticket_pct,home_ml_money_pct,away_money_line,away_ml_ticket_pct,away_ml_money_pct,tie_money_line,tie_ml_ticket_pct,tie_ml_money_pct,total_score,over_odds,under_odds,over_ticket_pct,over_money_pct,under_ticket_pct,under_money_pct,home_spread,home_spread_odds,home_spread_ticket_pct,home_spread_money_pct,away_spread,away_spread_odds,away_spread_ticket_pct,away_spread_money_pct,start_time_pt,date_scraped
44,264870,ncaab,Buffalo,Vermont,1001,1018,scheduled,,,,,,,,,,,,,,,,,2025-11-18T23:30:00.000Z,15,15.0,264870.0,3736.0,120.0,25.0,39.0,-142.0,75.0,61.0,,,,141.5,-106,-110,88,86,12,14,2.5,-112,44,35,-2.5,-108.0,56.0,65.0,2025-11-18 15:30:00-08:00,2025-11-18 22:27:54.878800
1087,264911,ncaab,Duquesne,Loyola (MD),1087,936,scheduled,,,,,,,,,,,,,,,,,2025-11-20T00:00:00.000Z,15,15.0,264911.0,277.0,-850.0,97.0,95.0,540.0,3.0,5.0,,,,147.5,-110,-110,56,61,44,39,-12.5,-111,0,0,12.5,-109.0,0.0,0.0,2025-11-19 16:00:00-08:00,2025-11-19 10:36:07.522313
1029,264782,ncaab,Washington St,Washington,750,758,scheduled,,,,,,,,,,,,,,,,,2025-11-15T04:00:00.000Z,15,15.0,264782.0,1342.0,320.0,4.0,3.0,-410.0,96.0,97.0,,,,158.5,-110,-111,52,52,48,48,8.5,-110,46,28,-8.5,-110.0,54.0,72.0,2025-11-14 20:00:00-08:00,2025-11-14 10:45:56.182855
1010,264810,ncaab,Villanova,Duquesne,1066,1087,scheduled,,,,,,,,,,,,,,,,,2025-11-16T01:00:00.000Z,15,15.0,264810.0,1625.0,-625.0,99.0,98.0,425.0,1.0,2.0,,,,150.5,-110,-110,36,37,64,63,-10.5,-110,56,80,10.5,-110.0,44.0,20.0,2025-11-15 17:00:00-08:00,2025-11-15 10:09:29.999661


-------
-------
-------
-------

    You are my expert college basketball betting adviser.
    I will provide you with two datasets:

    Dataset 1: Betting lines for upcoming games (money line, over/under, spread with first/avg/last values)
    Dataset 2: Historical betting results to analyze what's working and what's not

    Your goal: Maximize ROI by learning from historical patterns.

    CRITICAL VALIDATION REQUIREMENTS
    1. HOME vs AWAY TEAM IDENTIFICATION - READ CAREFULLY
    The dataset has two columns: home_team and away_team
    MATCH NAMING CONVENTION (MANDATORY):

    ALWAYS use format: "home_team vs away_team"
    Example: If home_team=Thunder, away_team=Wizards → Write "Thunder vs Wizards"
    The home team is ALWAYS listed first, away team second
    This makes it crystal clear which team is playing at home

    BEFORE MAKING ANY PICK:

    Identify from the dataset: Which team is in the home_team column?
    Identify from the dataset: Which team is in the away_team c

# Evaluate Results

In [32]:
model_list

['perp', 'claude', 'gemini']

In [33]:
model_list = ['perp','claude'
,'gemini'
]

In [36]:

for model_name in model_list:
    base_dir = Path('./data')
    results_file = Path('./data/ncaab_game_results.csv')
    print(model_name)
    df_evaluated_hist = process_results(model_name, base_dir, results_file)

perp


Unnamed: 0,rank,game_id,start_time,match,pick,odds,units,confidence_pct,reason,predicted_score,bet_home_spread,bet_home_ml,bet_away_spread,bet_away_ml,bet_over,bet_under,home_money_line,away_money_line,tie_money_line,total_score,over_odds,under_odds,home_spread,home_spread_odds,away_spread,away_spread_odds,timestamp
53,9,264771,2025-11-15T00:00:00.000Z,Towson vs Norfolk State,Towson -13.5,-106,1,76,Movement pushed Towson to -13.5 on last; laying under 14 with HCA is acceptable relative to extreme chalk fades [web:6][web:13][web:15].,75-60,1,0,0,0,0,0,-1000.0,650,,138.5,-105,-115,-13.5,-106,13.5,-113,2025-11-14 10:45:56.264017
66,10,266388,2025-11-15T19:00:00.000Z,Winthrop vs Mercer,Winthrop -2.5,-115,1,75,Very small home favorite; HCA plus non-con step-up supports laying under 3 [web:13][web:6][web:15].,77-73,1,0,0,0,0,0,-135.0,114,,163.5,-110,-110,-2.5,-115,2.5,-105,2025-11-15 10:15:03.538272
91,11,264865,2025-11-19T02:00:00.000Z,Boise State vs Wichita State,Boise State -9.5,-115,1,74,Borderline single/double-digit home fav; HCA and depth support a two-possession margin late [web:13][web:6][web:25].,77-67,1,0,0,0,0,0,-516.0,390,,150.5,-110,-110,-9.5,-115,9.5,-105,2025-11-18 07:15:57.987612


Found missing results for 1 dates. Fetching...
Processing data for ncaab (date: 20251119)
global version
Fetching data from the Action Network API...
https://api.actionnetwork.com/web/v2/scoreboard/ncaab?bookIds=15,30,79,2988,75,123,71,68,69&periods=event&date=20251119&division=D1
Data successfully fetched.

--- Game Details ---
Game ID: 264919
League: ncaab
Matchup: Air Force vs Alabama St
Home Team ID: 973
Away Team ID: 892
Status: scheduled
Start Time: 2025-11-19T23:00:00.000Z
Home Score: N/A
Away Score: N/A
--------------------
Processing Game ID: 264919, Market ID: 15
Processing Game ID: 264919, Market ID: 30
Processing Game ID: 264919, Market ID: 68
Processing Game ID: 264919, Market ID: 69
Processing Game ID: 264919, Market ID: 71
Game ID: 264919, Market ID: 75 - No moneyline data available. Skipping...
Processing Game ID: 264919, Market ID: 123

--- Game Details ---
Game ID: 264901
League: ncaab
Matchup: Penn State vs Harvard
Home Team ID: 1006
Away Team ID: 1037
Status: schedu

Unnamed: 0,model,bet_payout,units,bets,ROI
0,gemini,-8.434043,16,8,-52.712767
1,claude,-15.537216,124,70,-12.530013
2,perp,-22.041007,138,92,-15.971744


claude


Unnamed: 0,rank,game_id,start_time,match,pick,odds,units,confidence_pct,reason,predicted_score,bet_home_spread,bet_home_ml,bet_away_spread,bet_away_ml,bet_over,bet_under,home_money_line,away_money_line,tie_money_line,total_score,over_odds,under_odds,home_spread,home_spread_odds,away_spread,away_spread_odds,timestamp
7,8,271994,2025-11-12T02:00:00.000Z,Arizona vs N. Arizona,Over 162.5,-105,2,78,Over tickets jumped from 47% to 66% and over money from 45% to 65%. Strong late movement to over. Arizona will score at will at home and should push tempo. In-state rivalry often produces more scoring than expected.,98-71,0,0,0,0,1,0,-6500,2000,,162.5,-105,-115,-35.5,-115,35.5,-105,2025-11-11 14:10:22.997974
33,1,264781,2025-11-15T00:00:00.000Z,Iona vs Fordham,Iona -5.5,-110,3,92,Home favorite with overwhelming consensus (97% ML tickets 100% ML money 88% spread tickets 93% spread money). Medium-small spread with dominant support. Line stable showing confidence. Perfect profile matching our winners.,82-74,1,0,0,0,0,0,-245,200,,157.5,-110,-111,-5.5,-110,5.5,-110,2025-11-14 10:45:55.263775
24,3,264768,2025-11-14T00:30:00.000Z,Wichita State vs Loyola (IL),Wichita State -5.5,-110,3,88,Home favorite with dominant consensus (96% ML tickets/money 78% spread tickets 82% spread money). Medium-small spread with overwhelming support. Line stable showing book confidence. Matches Missouri -6.5 winner profile.,82-75,1,0,0,0,0,0,-225,185,,153.5,-105,-115,-5.5,-110,5.5,-111,2025-11-13 15:00:25.271884


Found missing results for 1 dates. Fetching...
Processing data for ncaab (date: 20251119)
global version
Fetching data from the Action Network API...
https://api.actionnetwork.com/web/v2/scoreboard/ncaab?bookIds=15,30,79,2988,75,123,71,68,69&periods=event&date=20251119&division=D1
Data successfully fetched.

--- Game Details ---
Game ID: 264919
League: ncaab
Matchup: Air Force vs Alabama St
Home Team ID: 973
Away Team ID: 892
Status: scheduled
Start Time: 2025-11-19T23:00:00.000Z
Home Score: N/A
Away Score: N/A
--------------------
Processing Game ID: 264919, Market ID: 15
Processing Game ID: 264919, Market ID: 30
Processing Game ID: 264919, Market ID: 68
Processing Game ID: 264919, Market ID: 69
Processing Game ID: 264919, Market ID: 71
Game ID: 264919, Market ID: 75 - No moneyline data available. Skipping...
Processing Game ID: 264919, Market ID: 123

--- Game Details ---
Game ID: 264901
League: ncaab
Matchup: Penn State vs Harvard
Home Team ID: 1006
Away Team ID: 1037
Status: schedu

Unnamed: 0,model,bet_payout,units,bets,ROI
0,gemini,-8.434043,16,8,-52.712767
1,claude,-15.537216,124,70,-12.530013
2,perp,-22.041007,138,92,-15.971744


gemini


Unnamed: 0,rank,game_id,start_time,match,pick,odds,units,confidence_pct,reason,predicted_score,bet_home_spread,bet_home_ml,bet_away_spread,bet_away_ml,bet_over,bet_under,home_money_line,away_money_line,tie_money_line,total_score,over_odds,under_odds,home_spread,home_spread_odds,away_spread,away_spread_odds,timestamp
1,2,264901,2025-11-19T23:30:00.000Z,Penn State vs Harvard,Harvard +12.5,-110,3,92,"Strong sharp support for the road dog. Harvard has 58% of the tickets but a massive 79% of the money, suggesting the line is inflated.",75-70,0,0,1,0,0,0,-950,645,,143.5,-110,-110,-12.5,-110,12.5,-110,2025-11-19 21:29:22.735240
7,8,266429,2025-11-20T03:00:00.000Z,Weber State vs Campbell,Under 159.5,-110,1,77,"Fade the public. 76% of tickets are on the Over, creating an inflated line. Betting the Under against the public consensus.",78-72,0,0,0,0,0,1,-147,123,,159.5,-110,-110,-2.5,-110,2.5,-110,2025-11-19 21:29:22.735240
2,3,264902,2025-11-20T00:00:00.000Z,Indiana St vs LA Tech,LA Tech +3.5,-115,2,88,"Classic sharp underdog split. The public is leaning Indiana St, but LA Tech commands 58% of the money on only 43% of the tickets.",74-73,0,0,1,0,0,0,-173,145,,147.0,-110,-110,-3.5,-105,3.5,-115,2025-11-19 21:29:22.735240


Found missing results for 1 dates. Fetching...
Processing data for ncaab (date: 20251119)
global version
Fetching data from the Action Network API...
https://api.actionnetwork.com/web/v2/scoreboard/ncaab?bookIds=15,30,79,2988,75,123,71,68,69&periods=event&date=20251119&division=D1
Data successfully fetched.

--- Game Details ---
Game ID: 264919
League: ncaab
Matchup: Air Force vs Alabama St
Home Team ID: 973
Away Team ID: 892
Status: scheduled
Start Time: 2025-11-19T23:00:00.000Z
Home Score: N/A
Away Score: N/A
--------------------
Processing Game ID: 264919, Market ID: 15
Processing Game ID: 264919, Market ID: 30
Processing Game ID: 264919, Market ID: 68
Processing Game ID: 264919, Market ID: 69
Processing Game ID: 264919, Market ID: 71
Game ID: 264919, Market ID: 75 - No moneyline data available. Skipping...
Processing Game ID: 264919, Market ID: 123

--- Game Details ---
Game ID: 264901
League: ncaab
Matchup: Penn State vs Harvard
Home Team ID: 1006
Away Team ID: 1037
Status: schedu

Unnamed: 0,model,bet_payout,units,bets,ROI
0,gemini,-8.434043,16,8,-52.712767
1,claude,-15.537216,124,70,-12.530013
2,perp,-22.041007,138,92,-15.971744


In [88]:
df_hist_agg = df_evaluated_hist.groupby(['model','date']).agg(
    bet_payout=('bet_payout','sum'),
    units=('units','sum'),
    bets=('game_id','count')
    ).assign(ROI=lambda x: x['bet_payout'] / x['units'] * 100).sort_values(['date','model'],ascending=True).reset_index()

df_hist_agg['CUMULATIVE_PAYOUT']=df_hist_agg.groupby('model')['bet_payout'].cumsum()
df_hist_agg['CUMULATIVE_BETS']=df_hist_agg.groupby('model')['bets'].cumsum()
df_hist_agg['CUMULATIVE_ROI'] = df_hist_agg['CUMULATIVE_PAYOUT'] / df_hist_agg['CUMULATIVE_BETS']

df_hist_agg.tail(10)

Unnamed: 0,model,date,bet_payout,units,bets,ROI,CUMULATIVE_PAYOUT,CUMULATIVE_BETS,CUMULATIVE_ROI
5,perp,2025-11-13,-2.668831,16,9,-16.680195,7.412879,41,0.180802
6,claude,2025-11-14,-4.977728,24,14,-20.740535,-4.376565,44,-0.099467
7,perp,2025-11-14,-9.55807,21,15,-45.51462,-2.145191,56,-0.038307
8,claude,2025-11-15,-9.656831,21,11,-45.98491,-14.033396,55,-0.255153
9,perp,2025-11-15,-12.375914,18,12,-68.755077,-14.521105,68,-0.213546
10,claude,2025-11-17,-4.05304,12,8,-33.77533,-18.086435,63,-0.287086
11,gemini,2025-11-17,-8.434043,16,8,-52.712767,-8.434043,8,-1.054255
12,perp,2025-11-17,-10.388206,18,12,-57.712258,-24.909311,80,-0.311366
13,claude,2025-11-18,1.640128,6,6,27.335466,-16.446307,69,-0.238352
14,perp,2025-11-18,5.868304,15,9,39.122028,-19.041007,89,-0.213944


In [89]:
fig = px.scatter(
    df_hist_agg,
    x='date',
        y='bet_payout',
        color='model',
        template='simple_white',

    )

fig.update_layout(
            # title=f"Active / Canceled Subscription Count for {customer_id}<br><sup>{viz_tag}</sup>",
            font_family='Futura',
            height=600,
            font_color='black',
            showlegend=True,
            hovermode='x unified'
        )
fig.update_traces(mode='lines+markers',
                    opacity=.75,
                    marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey'))
                    )
fig.update_xaxes(
            # title='Analytics Date',

                        )
fig.update_yaxes(
            # title='Product Count',

                        )

fig.show()

In [90]:
fig = px.scatter(
        df_hist_agg,
        x='date',
        y='CUMULATIVE_ROI',
        color='model',
        template='simple_white',

    )

fig.update_layout(
            # title=f"Active / Canceled Subscription Count for {customer_id}<br><sup>{viz_tag}</sup>",
            font_family='Futura',
            height=600,
            font_color='black',
            showlegend=True,
            hovermode='x unified'
        )
fig.update_traces(mode='lines+markers',
                    opacity=.75,
                    marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey'))
                    )
fig.update_xaxes(
            # title='Analytics Date',

                        )
fig.update_yaxes(
    tickformat = ',.0%'
            # title='Product Count',

                        )

fig.show()

In [40]:
df_evaluated_hist

Unnamed: 0,rank,model,date,game_id,match,home_score,away_score,pick,odds,units,bet_result,bet_payout
0,1,claude,2025-11-11,264701,Illinois vs Texas Tech,81,77,Over 168.5,-105,3,loss,-3.0
1,2,claude,2025-11-11,264732,Louisville vs Kentucky,96,88,Louisville -6.5,-110,3,win,2.727273
2,3,claude,2025-11-11,264721,UNC vs Radford,89,74,UNC -19.5,-112,3,loss,-3.0
3,4,claude,2025-11-11,266367,Michigan vs Wake Forest,85,84,Under 168.5,-110,2,loss,-2.0
4,5,claude,2025-11-11,266649,Gonzaga vs Creighton,90,63,Over 165.5,-105,2,loss,-2.0
5,6,claude,2025-11-11,264731,Florida vs Florida St,78,76,Florida St +17.5,-110,2,win,1.818182
6,7,claude,2025-11-11,264718,BYU vs Delaware,85,68,Over 164.5,-108,2,loss,-2.0
7,8,claude,2025-11-11,271994,Arizona vs N. Arizona,84,49,Over 162.5,-105,2,loss,-2.0
8,9,claude,2025-11-11,264714,Kansas vs Texas A&M-CC,77,46,Over 147.5,-105,1,loss,-1.0
9,10,claude,2025-11-11,264709,Wisconsin vs Ball State,86,55,Ball State +27.5,-110,1,loss,-1.0
