<a href="https://colab.research.google.com/github/gazuty/betfair-dashboard/blob/main/Results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- STEP 1: Master Updater ---

import os
import glob
import pandas as pd
import shutil
import hashlib

def update_betfair_master():
    BASE_FOLDER = '/content/drive/My Drive/Betfair'
    ARCHIVE_FOLDER = os.path.join(BASE_FOLDER, 'Archive')
    os.makedirs(ARCHIVE_FOLDER, exist_ok=True)

    MASTER_CSV = os.path.join(BASE_FOLDER, 'Betfair_Master.csv')
    BETTING_PATTERN = os.path.join(BASE_FOLDER, 'BettingPandL*.csv')
    RESULTS_FILE = os.path.join(BASE_FOLDER, 'Results Summary export 25-05-11 095900.csv')

    if os.path.exists(MASTER_CSV):
        df_master = pd.read_csv(MASTER_CSV)
        df_master['Settled date'] = pd.to_datetime(df_master['Settled date'], errors='coerce')
        df_master['Profit_Loss'] = pd.to_numeric(df_master['Profit_Loss'], errors='coerce')
        print(f"✅ Loaded master: {MASTER_CSV} ({len(df_master)} rows) Profit_Loss dtype: {df_master['Profit_Loss'].dtype}")
    else:
        print(f"⚠ No master found — starting fresh.")
        df_master = pd.DataFrame()

    # Find files
    new_files = glob.glob(BETTING_PATTERN)
    if os.path.exists(RESULTS_FILE):
        new_files.append(RESULTS_FILE)

    print(f"📂 Found {len(new_files)} new file(s) to process.")
    if not new_files:
        print("⚠ No new data files found — master remains unchanged.")
        return

    # Load and standardize
    dfs_new = []
    for file in new_files:
        print(f"📥 Processing: {os.path.basename(file)}")
        df = pd.read_csv(file)
        if 'Profit / loss' in df.columns and 'Profit/Loss (AUD)' in df.columns:
            df['Profit_Loss'] = df['Profit/Loss (AUD)'].fillna(df['Profit / loss'])
        elif 'Profit/Loss (AUD)' in df.columns:
            df['Profit_Loss'] = df['Profit_Loss (AUD)']
        elif 'Profit / loss' in df.columns:
            df['Profit_Loss'] = df['Profit / loss']
        else:
            print(f"⚠ Skipping {file} — no profit column found.")
            continue
        df['Profit_Loss'] = pd.to_numeric(df['Profit_Loss'], errors='coerce')
        dfs_new.append(df)

    if not dfs_new:
        print("⚠ No valid new data loaded — master remains unchanged.")
        return

    df_new = pd.concat(dfs_new, ignore_index=True)
    df_new['Settled date'] = pd.to_datetime(df_new['Settled date'], errors='coerce')
    df_new = df_new.dropna(subset=['Settled date']).reset_index(drop=True)

    df_master['key'] = df_master['Market'].astype(str) + "|" + df_master['Settled date'].dt.strftime('%Y-%m-%d %H:%M:%S') + "|" + df_master['Profit_Loss'].astype(str)
    df_new['key'] = df_new['Market'].astype(str) + "|" + df_new['Settled date'].dt.strftime('%Y-%m-%d %H:%M:%S') + "|" + df_new['Profit_Loss'].astype(str)

    df_new_unique = df_new[~df_new['key'].isin(df_master['key'])]
    print(f"✅ Identified {len(df_new_unique)} unique new row(s).")

    if not df_new_unique.empty:
        df_combined = pd.concat([df_master.drop(columns=['key']), df_new_unique.drop(columns=['key'])], ignore_index=True)
        df_combined['Profit_Loss'] = pd.to_numeric(df_combined['Profit_Loss'], errors='coerce')
        print(f"✅ Final master row count: {len(df_combined)}. Profit_Loss dtype: {df_combined['Profit_Loss'].dtype}")
        df_combined.to_csv(MASTER_CSV, index=False)
        print(f"✅ Master updated and saved to {MASTER_CSV}")
    else:
        print("⚠ No new rows added to master — no changes made.")

    # Archive files
    for file in new_files:
        shutil.move(file, os.path.join(ARCHIVE_FOLDER, os.path.basename(file)))
        print(f"📦 Archived {os.path.basename(file)}")

# Call the function to actually run it
update_betfair_master()


✅ Loaded master: /content/drive/My Drive/Betfair/Betfair_Master.csv (18431 rows) Profit_Loss dtype: float64
📂 Found 0 new file(s) to process.
⚠ No new data files found — master remains unchanged.


In [None]:
# --- STEP 2: Load Master ---

df = pd.read_csv(MASTER_CSV)
df['Settled date'] = pd.to_datetime(df['Settled date'], errors='coerce')
df['Profit_Loss'] = pd.to_numeric(df['Profit_Loss'], errors='coerce')
df = df.dropna(subset=['Settled date']).reset_index(drop=True)
print(f"✅ Loaded {len(df)} rows for analysis. Profit_Loss dtype: {df['Profit_Loss'].dtype}")


✅ Loaded 18431 rows for analysis. Profit_Loss dtype: float64


In [None]:
# --- STEP 3: Extract Sport, clean Track_Name, and Country ---

import re

df['Sport'] = df['Market'].str.extract(r'^([^/]+)/')[0].str.strip()
racing_df = df[df['Sport'].isin(['Horse Racing', 'Greyhound Racing'])].copy()
racing_df[['Track_Info', 'Event_Description']] = racing_df['Market'].str.extract(r'/\s*(.*?)\s*:\s*(.*)')

def extract_track_and_country(track_info):
    if pd.isna(track_info):
        return pd.Series([None, 'Unknown'])
    if '(' in track_info and ')' in track_info:
        inside = track_info.split('(')[1].replace(')', '').strip()
        country = inside.split()[0]
        track = track_info.split('(')[0].strip()
    else:
        track = track_info.strip()
        country = 'Unknown'
    return pd.Series([track, country])

def clean_track_name(track):
    if pd.isna(track):
        return None
    return re.sub(r'\b\d{1,2}(st|nd|rd|th)?\s\w+\b', '', track).strip()

racing_df[['Track_Name_Raw', 'Country']] = racing_df['Track_Info'].apply(extract_track_and_country)
racing_df['Track_Name'] = racing_df['Track_Name_Raw'].apply(clean_track_name)
df = df.merge(racing_df[['Market', 'Track_Name', 'Country']], on='Market', how='left')
print(f"✅ After feature extraction: {len(df)} rows, Profit_Loss dtype: {df['Profit_Loss'].dtype}")


✅ After feature extraction: 20347 rows, Profit_Loss dtype: float64


In [None]:
# --- STEP 4: Build complete summary tables (daily, cumulative, weekly, monthly, sport, country) ---

df['Day'] = df['Settled date'].dt.date
df['Month'] = df['Settled date'].dt.to_period('M').astype(str)
df['Week Starting'] = (
    df['Settled date'].dt.floor('D') -
    pd.to_timedelta(df['Settled date'].dt.weekday, unit='d')
)

by_day = df.groupby('Day')['Profit_Loss'].sum().reset_index()
by_day = by_day.sort_values('Day').reset_index(drop=True)
by_day['Cumulative_Profit_Loss'] = by_day['Profit_Loss'].cumsum()
by_day['Profit_Loss'] = pd.to_numeric(by_day['Profit_Loss'], errors='coerce').round(2)
by_day['Cumulative_Profit_Loss'] = pd.to_numeric(by_day['Cumulative_Profit_Loss'], errors='coerce').round(2)

by_week = df.groupby('Week Starting')['Profit_Loss'].sum().reset_index()
by_week = by_week.sort_values('Week Starting').reset_index(drop=True)
by_week['Profit_Loss'] = pd.to_numeric(by_week['Profit_Loss'], errors='coerce').round(2)

by_month = df.groupby('Month')['Profit_Loss'].sum().reset_index()
by_month['Profit_Loss'] = pd.to_numeric(by_month['Profit_Loss'], errors='coerce').round(2)

by_sport = df.groupby('Sport')['Profit_Loss'].sum().reset_index()
by_sport['Profit_Loss'] = pd.to_numeric(by_sport['Profit_Loss'], errors='coerce').round(2)

by_country = df.groupby('Country')['Profit_Loss'].sum().reset_index()
by_country['Profit_Loss'] = pd.to_numeric(by_country['Profit_Loss'], errors='coerce').round(2)

sport_daily = {}
for sport in df['Sport'].dropna().unique():
    temp = df[df['Sport'] == sport].groupby('Day')['Profit_Loss'].sum().reset_index()
    temp = temp.sort_values('Day').reset_index(drop=True)
    temp['Cumulative_Profit_Loss'] = temp['Profit_Loss'].cumsum()
    temp['Profit_Loss'] = pd.to_numeric(temp['Profit_Loss'], errors='coerce').round(2)
    temp['Cumulative_Profit_Loss'] = pd.to_numeric(temp['Cumulative_Profit_Loss'], errors='coerce').round(2)
    sport_daily[f"{sport} Daily"] = temp

# Terminal output for validation
print(f"✅ By Day rows: {len(by_day)}, dtype: {by_day['Profit_Loss'].dtype}")
print(f"✅ By Week rows: {len(by_week)}, dtype: {by_week['Profit_Loss'].dtype}")
print(f"✅ By Month rows: {len(by_month)}, dtype: {by_month['Profit_Loss'].dtype}")
print(f"✅ By Sport rows: {len(by_sport)}, dtype: {by_sport['Profit_Loss'].dtype}")
print(f"✅ By Country rows: {len(by_country)}, dtype: {by_country['Profit_Loss'].dtype}")


✅ By Day rows: 187, dtype: float64
✅ By Week rows: 27, dtype: float64
✅ By Month rows: 7, dtype: float64
✅ By Sport rows: 14, dtype: float64
✅ By Country rows: 8, dtype: float64


In [None]:
# --- STEP 6: Track summaries ---
track_df = df[df['Sport'].isin(['Horse Racing', 'Greyhound Racing'])] \
    .groupby(['Sport', 'Track_Name'])['Profit_Loss'].sum().reset_index()

track_df['Profit_Loss'] = track_df['Profit_Loss'].round(2)

tracks = {
    'Top Horse Tracks': track_df.query("Sport == 'Horse Racing'").nlargest(15, 'Profit_Loss'),
    'Bottom Horse Tracks': track_df.query("Sport == 'Horse Racing'").nsmallest(15, 'Profit_Loss'),
    'Top Greyhound Tracks': track_df.query("Sport == 'Greyhound Racing'").nlargest(15, 'Profit_Loss'),
    'Bottom Greyhound Tracks': track_df.query("Sport == 'Greyhound Racing'").nsmallest(15, 'Profit_Loss')
}

track_stats = track_df
print("✅ Track summaries built.")


✅ Track summaries built.


In [None]:
# --- STEP 7: Compute strike rates for Horse Racing and Greyhound Racing with min 50 bets ---

# Filter for racing sports
df_racing = df[df['Sport'].isin(['Horse Racing', 'Greyhound Racing'])].copy()

# Group and compute
strike_df = (
    df_racing.groupby(['Sport', 'Track_Name'])['Profit_Loss']
    .agg(
        total_bets='count',
        wins=lambda x: (x > 0).sum()
    )
    .reset_index()
)

# Calculate strike rate
strike_df['Strike_Rate'] = strike_df['wins'] / strike_df['total_bets']

# Filter for min 50 bets
strike_df_filtered = strike_df[strike_df['total_bets'] >= 50]

# Top/bottom
top_strike = strike_df_filtered.nlargest(10, 'Strike_Rate')
bottom_strike = strike_df_filtered.nsmallest(10, 'Strike_Rate')

# Preview
print("✅ Strike rates computed (min 50 bets).")
print(strike_df_filtered.head())


✅ Strike rates computed (min 50 bets).
              Sport   Track_Name  total_bets  wins  Strike_Rate
0  Greyhound Racing  Albion Park         302   177     0.586093
1  Greyhound Racing   Angle Park         158    89     0.563291
2  Greyhound Racing     Ballarat         212   112     0.528302
3  Greyhound Racing      Bendigo         150    70     0.466667
6  Greyhound Racing   Cannington         345   200     0.579710


In [None]:
# --- STEP 8: Prepare all_sheets for export ---
all_sheets = {
    'By Day': by_day,
    'By Month': by_month,
    'By Sport': by_sport,
    'By Country': by_country,
    "Track Stats": track_stats,
    "Top Strike Rates": top_strike,
    "Bottom Strike Rates": bottom_strike,
    **tracks,
    **sport_daily
}

print(f"✅ Prepared {len(all_sheets)} tables for Google Sheets export.")


✅ Prepared 25 tables for Google Sheets export.


In [None]:
print("📊 Top Horse Tracks preview:")
print(tracks['Top Horse Tracks'].head())
print("📊 Bottom Horse Tracks preview:")
print(tracks['Bottom Horse Tracks'].head())


📊 Top Horse Tracks preview:
            Sport Track_Name Profit_Loss
253  Horse Racing  Newcastle     1560.61
313  Horse Racing  Southwell     1551.13
167  Horse Racing  Geraldton     1400.78
56   Horse Racing    Aintree     1270.77
295  Horse Racing   Rosehill     1198.06
📊 Bottom Horse Tracks preview:
            Sport    Track_Name Profit_Loss
342  Horse Racing  Turfway Park     -336.93
116  Horse Racing      Chepstow     -183.07
213  Horse Racing     Lingfield     -168.89
358  Horse Racing     Wincanton     -146.82
343  Horse Racing     Uttoxeter     -105.35


In [None]:
# --- STEP 9: Export to Google Sheets ---

for name, df_out in all_sheets.items():
    # Ensure Profit_Loss is numeric, then convert to string to prevent Sheets misinterpretation
    if 'Profit_Loss' in df_out.columns:
        df_out['Profit_Loss'] = pd.to_numeric(df_out['Profit_Loss'], errors='coerce').round(2)
        df_out['Profit_Loss'] = df_out['Profit_Loss'].apply(lambda x: f"{x:.2f}" if pd.notnull(x) else "")
        print(f"✅ {name} Profit_Loss dtype before upload: {df_out['Profit_Loss'].dtype}")

    # Convert Week Starting to string to avoid unwanted formatting
    if 'Week Starting' in df_out.columns:
        df_out['Week Starting'] = df_out['Week Starting'].astype(str)

    # Round any remaining numeric columns
    for col in df_out.select_dtypes(include=['float', 'int']).columns:
        df_out[col] = df_out[col].round(2)

    try:
        ws = sh.worksheet(name)
        ws.clear()
    except gspread.exceptions.WorksheetNotFound:
        ws = sh.add_worksheet(title=name, rows=1000, cols=20)

    set_with_dataframe(ws, df_out)
    print(f"✅ Uploaded {name}")


✅ By Day Profit_Loss dtype before upload: object
✅ Uploaded By Day
✅ By Day Sorted Profit_Loss dtype before upload: object
✅ Uploaded By Day Sorted
✅ By Week Profit_Loss dtype before upload: object
✅ Uploaded By Week
✅ By Month Profit_Loss dtype before upload: object
✅ Uploaded By Month
✅ By Sport Profit_Loss dtype before upload: object
✅ Uploaded By Sport
✅ By Country Profit_Loss dtype before upload: object
✅ Uploaded By Country
✅ Snooker Daily Profit_Loss dtype before upload: object
✅ Uploaded Snooker Daily
✅ Ice Hockey Daily Profit_Loss dtype before upload: object
✅ Uploaded Ice Hockey Daily
✅ Horse Racing Daily Profit_Loss dtype before upload: object
✅ Uploaded Horse Racing Daily
✅ Golf Daily Profit_Loss dtype before upload: object
✅ Uploaded Golf Daily
✅ Politics Daily Profit_Loss dtype before upload: object
✅ Uploaded Politics Daily
✅ Tennis Daily Profit_Loss dtype before upload: object
✅ Uploaded Tennis Daily
✅ Greyhound Racing Daily Profit_Loss dtype before upload: object
✅ Upl