<a href="https://colab.research.google.com/github/gazuty/betfair-dashboard/blob/colab-stable-2025-08-10/betfair_dashboard_STABLE_2025_08_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [102]:
# 📊 Betfair Dashboard
# Built by Gazuty (c) 2025
# This notebook processes Betfair profit/loss data, builds analytics tables, and publishes outputs to Google Sheets.

# --- STEP 0: Configuration ---

import os
from datetime import datetime

# ─── Your Drive / folder paths ───
BASE_FOLDER       = '/content/drive/My Drive/Betfair'
MASTER_CSV        = os.path.join(BASE_FOLDER, 'Betfair_Master.csv')
ARCHIVE_FOLDER    = os.path.join(BASE_FOLDER, 'Archive')
BETTING_PATTERN   = os.path.join(BASE_FOLDER, 'BettingPandL*.csv')

# ─── Google Sheet settings ───
GOOGLE_SHEET_NAME = 'Betfair Dashboard'

# ─── Business rules ───
VALID_SPORTS      = ['Horse Racing', 'Greyhound Racing']
MIN_STRIKE_BETS   = 50

# ─── Setup ───
os.makedirs(ARCHIVE_FOLDER, exist_ok=True)

print("\u2705 Configuration loaded:")
print(f"  BASE_FOLDER        = {BASE_FOLDER}")
print(f"  MASTER_CSV         = {MASTER_CSV}")
print(f"  ARCHIVE_FOLDER     = {ARCHIVE_FOLDER}")
print(f"  BETTING_PATTERN    = {BETTING_PATTERN}")
print(f"  GOOGLE_SHEET_NAME  = {GOOGLE_SHEET_NAME}")
print(f"  VALID_SPORTS       = {VALID_SPORTS}")
print(f"  MIN_STRIKE_BETS    = {MIN_STRIKE_BETS}")


✅ Configuration loaded:
  BASE_FOLDER        = /content/drive/My Drive/Betfair
  MASTER_CSV         = /content/drive/My Drive/Betfair/Betfair_Master.csv
  ARCHIVE_FOLDER     = /content/drive/My Drive/Betfair/Archive
  BETTING_PATTERN    = /content/drive/My Drive/Betfair/BettingPandL*.csv
  GOOGLE_SHEET_NAME  = Betfair Dashboard
  VALID_SPORTS       = ['Horse Racing', 'Greyhound Racing']
  MIN_STRIKE_BETS    = 50


In [103]:
# --- STEP 1: Master Updater ---

import pandas as pd, glob, shutil

REQUIRED_COLS = ['Market', 'Settled date']

def update_betfair_master():
    print("\U0001F504 Starting master update")

    # 1⃣ Load or initialize master
    if os.path.exists(MASTER_CSV):
        df_master = pd.read_csv(MASTER_CSV)
        df_master['Settled date'] = pd.to_datetime(df_master['Settled date'], errors='coerce')
        df_master['Profit_Loss'] = pd.to_numeric(df_master['Profit_Loss'], errors='coerce')
        df_master = df_master.dropna(subset=['Settled date']).reset_index(drop=True)
        print(f"\u2705 Loaded master ({len(df_master)} rows)")
    else:
        print("\u26a0 No existing master found — starting fresh")
        df_master = pd.DataFrame(columns=REQUIRED_COLS + ['Profit_Loss'])

    # 2⃣ Gather raw files
    raw_files = glob.glob(BETTING_PATTERN)
    print(f"📂 Found {len(raw_files)} raw file(s)")

    if not raw_files:
        print("\u26a0 No raw files to process — exiting.")
        return

    # 3⃣ Process each file
    dfs = []
    for filepath in raw_files:
        fname = os.path.basename(filepath)
        print(f"📅 {fname}", end="")

        df = pd.read_csv(filepath)
        missing = [c for c in REQUIRED_COLS if c not in df.columns]
        if missing:
            print(f" → ❌ missing columns {missing}")
            continue

        profs = [c for c in df.columns if 'profit' in c.lower()]
        if not profs:
            print(" → ❌ no profit column found")
            continue

        pick = next((c for c in profs if 'aud' in c.lower()), profs[0])
        df['Profit_Loss'] = pd.to_numeric(df[pick], errors='coerce')
        df['Settled date'] = pd.to_datetime(df['Settled date'], errors='coerce')
        df = df[['Market', 'Settled date', 'Profit_Loss']].dropna(subset=['Settled date'])

        dfs.append(df)
        print(f" → {len(df)} rows from '{pick}'")

    if not dfs:
        print("\u26a0 No valid data loaded from raw files — exiting.")
        return

    # 4⃣ Combine and deduplicate
    df_new = pd.concat(dfs, ignore_index=True)
    df_new = df_new.dropna(subset=['Settled date']).reset_index(drop=True)

    df_master['_key'] = (
        df_master['Market'].astype(str) + "|" +
        df_master['Settled date'].dt.strftime('%Y-%m-%d %H:%M:%S') + "|" +
        df_master['Profit_Loss'].astype(str)
    )
    df_new['_key'] = (
        df_new['Market'].astype(str) + "|" +
        df_new['Settled date'].dt.strftime('%Y-%m-%d %H:%M:%S') + "|" +
        df_new['Profit_Loss'].astype(str)
    )

    df_unique = df_new[~df_new['_key'].isin(df_master['_key'])]
    print(f"\u2705 {len(df_unique)} unique new row(s) identified")

    # 5⃣ Merge and save
    if not df_unique.empty:
        df_combined = pd.concat([
            df_master.drop(columns=['_key']),
            df_unique.drop(columns=['_key'])
        ], ignore_index=True)
        df_combined.to_csv(MASTER_CSV, index=False)
        print(f"\u2705 Master updated ({len(df_combined)} rows) → {MASTER_CSV}")
    else:
        print("\u26a0 No new rows to add — master unchanged.")

    # 6⃣ Archive files
    for filepath in raw_files:
        fname = os.path.basename(filepath)
        shutil.move(filepath, os.path.join(ARCHIVE_FOLDER, fname))
        print(f"📦 Archived {fname}")

# Run the function
update_betfair_master()


🔄 Starting master update
✅ Loaded master (22387 rows)
📂 Found 0 raw file(s)
⚠ No raw files to process — exiting.


In [104]:
# --- STEP 2: Load Master ---

import pandas as pd

print(f"📂 Loading master from: {MASTER_CSV}")
df = pd.read_csv(MASTER_CSV)

# Ensure correct dtypes
df['Settled date'] = pd.to_datetime(df['Settled date'], errors='coerce')
df['Profit_Loss'] = pd.to_numeric(df['Profit_Loss'], errors='coerce')

# Drop rows with invalid dates
before = len(df)
df = df.dropna(subset=['Settled date']).reset_index(drop=True)
after = len(df)

print(f"✅ {after} rows loaded (dropped {before - after} invalid dates).")
print(f"   Profit_Loss dtype: {df['Profit_Loss'].dtype}")


📂 Loading master from: /content/drive/My Drive/Betfair/Betfair_Master.csv
✅ 22387 rows loaded (dropped 0 invalid dates).
   Profit_Loss dtype: float64


In [105]:
# --- STEP 3: Feature Extraction ---

# 1️⃣ Extract Sport from Market (first token before slash)
df['Sport'] = df['Market'].str.extract(r'^([^/]+)/')[0].str.strip()

# 2️⃣ Extract Track_Info and Event_Description for racing sports
racing_mask = df['Sport'].isin(VALID_SPORTS)
track_event = df.loc[racing_mask, 'Market'].str.extract(r'/\s*(.*?)\s*:\s*(.*)')
track_event.columns = ['Track_Info', 'Event_Description']
df.loc[racing_mask, ['Track_Info', 'Event_Description']] = track_event

# 3️⃣ Extract Country from parentheses in Track_Info
df['Country'] = df['Track_Info'].str.extract(r'\(([^)]+)\)')[0]

# 4️⃣ Fill missing country values
df['Country'] = df['Country'].fillna('UK')
df.loc[~df['Sport'].isin(VALID_SPORTS), 'Country'] = 'Unknown'

# 5️⃣ Clean up Track_Info to produce Track_Name (remove dates and country)
df['Track_Name'] = (
    df['Track_Info']
      .str.replace(r'\([^)]*\)', '', regex=True)
      .str.replace(r'\b\d{1,2}(?:st|nd|rd|th)?\s+\w+\b', '', regex=True)
      .str.strip()
)

# 6️⃣ Preview output
preview = df.loc[df['Track_Name'].notna(), ['Sport', 'Track_Name', 'Country']].drop_duplicates().head(10)
print("✅ Feature extraction complete — first few tracks:")
print(preview)


✅ Feature extraction complete — first few tracks:
               Sport     Track_Name Country
0       Horse Racing      Uttoxeter      UK
1   Greyhound Racing       Mandurah     AUS
2       Horse Racing      Lingfield      UK
3       Horse Racing        Warwick      UK
9   Greyhound Racing        Monmore      UK
11  Greyhound Racing        Romford      UK
20      Horse Racing   Philadelphia      US
23      Horse Racing  Turf Paradise      US
24      Horse Racing       Riverton     NZL
25      Horse Racing      Ellerslie     NZL


In [106]:
# --- STEP 4: Build summary tables (daily, weekly, monthly, sport, country) ---

print("🔧 STEP 4: Building summary tables...")

# 4.1️⃣ Daily Summary (chronological)
by_day = (
    df.groupby(df['Settled date'].dt.date)['Profit_Loss']
      .sum()
      .reset_index(name='Profit_Loss')
      .rename(columns={'Settled date': 'Day'})
)
by_day = by_day.sort_values('Day').reset_index(drop=True)
by_day['Cumulative_Profit_Loss'] = by_day['Profit_Loss'].cumsum()
by_day[['Profit_Loss', 'Cumulative_Profit_Loss']] = by_day[['Profit_Loss', 'Cumulative_Profit_Loss']].round(2)

# 4.2️⃣ Rolling Returns (start from 1 March)
by_day['Day'] = pd.to_datetime(by_day['Day'])
rolling_start = pd.to_datetime('2025-03-01')
rolling_df = by_day[by_day['Day'] >= rolling_start].copy()
rolling_df = rolling_df.set_index('Day')

rolling_df['Rolling 2w'] = by_day.set_index('Day')['Profit_Loss'].rolling(window='14D').sum()
rolling_df['Rolling 4w'] = by_day.set_index('Day')['Profit_Loss'].rolling(window='28D').sum()
rolling_df['Rolling 8w'] = by_day.set_index('Day')['Profit_Loss'].rolling(window='56D').sum()

rolling_df = rolling_df.drop(columns=['Profit_Loss', 'Cumulative_Profit_Loss']).reset_index()
rolling_df = rolling_df.round(2)
rolling_df.columns = ['Day', 'Rolling 2w', 'Rolling 4w', 'Rolling 8w']

# 4.3️⃣ Weekly Summary (week starts Sunday)
by_week = (
    df.set_index('Settled date')
      .resample('W-SUN')['Profit_Loss']
      .sum()
      .reset_index()
      .rename(columns={'Settled date': 'Week Starting'})
)
by_week['Profit_Loss'] = by_week['Profit_Loss'].round(2)

# 4.4️⃣ Rolling by Sport (Horse Racing & Greyhounds)
rolling_by_sport = {}
for sport in ['Horse Racing', 'Greyhound Racing']:
    sport_df = df[df['Sport'] == sport].copy()
    sport_by_day = (
        sport_df.groupby(sport_df['Settled date'].dt.date)['Profit_Loss']
        .sum()
        .reset_index(name='Profit_Loss')
        .rename(columns={'Settled date': 'Day'})
        .sort_values('Day')
        .reset_index(drop=True)
    )
    sport_by_day['Day'] = pd.to_datetime(sport_by_day['Day'])
    sport_by_day = sport_by_day.set_index('Day')

    result = sport_by_day.copy()
    result['Rolling 2w'] = sport_by_day['Profit_Loss'].rolling(window='14D').sum()
    result['Rolling 4w'] = sport_by_day['Profit_Loss'].rolling(window='28D').sum()
    result['Rolling 8w'] = sport_by_day['Profit_Loss'].rolling(window='56D').sum()

    result = result.drop(columns=['Profit_Loss']).reset_index()
    result = result[result['Day'] >= rolling_start].round(2)
    result.columns = ['Day', 'Rolling 2w', 'Rolling 4w', 'Rolling 8w']
    rolling_by_sport[sport] = result

# 4.5️⃣ Monthly Summary — using 'ME' to avoid deprecation warning
by_month = (
    df.set_index('Settled date')
      .resample('ME')['Profit_Loss']
      .sum()
      .reset_index()
)
by_month['Month'] = by_month['Settled date'].dt.to_period('M').astype(str)
by_month = by_month[['Month', 'Profit_Loss']]
by_month['Profit_Loss'] = by_month['Profit_Loss'].round(2)

# 4.6️⃣ Sport Summary
by_sport = (
    df.groupby('Sport')['Profit_Loss']
      .sum()
      .reset_index()
      .round({'Profit_Loss': 2})
)

# 4.7️⃣ Country Summary
by_country = (
    df.groupby('Country')['Profit_Loss']
      .sum()
      .reset_index()
      .round({'Profit_Loss': 2})
)

# 4.8️⃣ Daily Summaries per Sport (with cumulative P/L)
sport_daily = {}
for sport in df['Sport'].dropna().unique():
    temp = (
        df[df['Sport'] == sport]
          .groupby(df['Settled date'].dt.date)['Profit_Loss']
          .sum()
          .reset_index(name='Profit_Loss')
          .rename(columns={'Settled date': 'Day'})
          .sort_values('Day')
          .reset_index(drop=True)
    )
    temp['Cumulative_Profit_Loss'] = temp['Profit_Loss'].cumsum().round(2)
    temp['Profit_Loss'] = temp['Profit_Loss'].round(2)
    sport_daily[f"{sport} Daily"] = temp

# ✅ Summary Checks
print(f"✅ By Day: {len(by_day)} rows (last: {by_day['Day'].max().date()})")
print(f"✅ Rolling Returns: {len(rolling_df)} rows (last: {rolling_df['Day'].max().date()})")
for k, v in rolling_by_sport.items():
    print(f"✅ Rolling {k}: {len(v)} rows (last: {v['Day'].max().date()})")
print(f"✅ By Week: {len(by_week)} rows (last: {by_week['Week Starting'].max().date()})")
print(f"✅ By Month: {len(by_month)} rows (last: {by_month['Month'].max()})")
print(f"✅ By Sport: {len(by_sport)} sports → {by_sport['Sport'].tolist()}")
print(f"✅ By Country: {len(by_country)} countries → {by_country['Country'].tolist()}")


🔧 STEP 4: Building summary tables...
✅ By Day: 222 rows (last: 2025-08-10)
✅ Rolling Returns: 163 rows (last: 2025-08-10)
✅ Rolling Horse Racing: 163 rows (last: 2025-08-10)
✅ Rolling Greyhound Racing: 163 rows (last: 2025-08-10)
✅ By Week: 32 rows (last: 2025-08-10)
✅ By Month: 8 rows (last: 2025-08)
✅ By Sport: 16 sports → ['American Football', 'Basketball', 'Cricket', 'Cycling', 'Darts', 'Football', 'Gaelic Games', 'Golf', 'Greyhound Racing', 'Horse Racing', 'Ice Hockey', 'Motor Sport', 'Politics', 'Rugby Union', 'Snooker', 'Tennis']
✅ By Country: 8 countries → ['AUS', 'FRA', 'NZL', 'RSA', 'UAE', 'UK', 'US', 'Unknown']


In [107]:
# --- STEP 5: Track Summaries ---

# 1️⃣ Aggregate P/L per track for Horse and Greyhound Racing
track_df = (
    df[df['Sport'].isin(VALID_SPORTS)]
      .groupby(['Sport', 'Track_Name'], as_index=False)['Profit_Loss']
      .sum()
)
track_df['Profit_Loss'] = track_df['Profit_Loss'].round(2)

# 2️⃣ Create summary groups
tracks = {
    'Track Stats':               track_df,
    'Top Horse Tracks':          track_df.query("Sport == 'Horse Racing'").nlargest(15, 'Profit_Loss'),
    'Bottom Horse Tracks':       track_df.query("Sport == 'Horse Racing'").nsmallest(15, 'Profit_Loss'),
    'Top Greyhound Tracks':      track_df.query("Sport == 'Greyhound Racing'").nlargest(15, 'Profit_Loss'),
    'Bottom Greyhound Tracks':   track_df.query("Sport == 'Greyhound Racing'").nsmallest(15, 'Profit_Loss'),
}

# 3️⃣ Preview sample
print("✅ Track summaries built.")
print(" • Sample Track Stats:")
print(track_df.head())
print(" • Top Horse Tracks:")
print(tracks['Top Horse Tracks'][['Track_Name', 'Profit_Loss']].head())


✅ Track summaries built.
 • Sample Track Stats:
              Sport   Track_Name  Profit_Loss
0  Greyhound Racing  Albion Park        91.77
1  Greyhound Racing   Angle Park      -129.51
2  Greyhound Racing     Ballarat        42.44
3  Greyhound Racing      Bendigo         3.01
4  Greyhound Racing  Broken Hill        58.85
 • Top Horse Tracks:
    Track_Name  Profit_Loss
174  Geraldton      1400.78
308   Rosehill      1367.26
57     Aintree      1286.07
327  Southwell      1104.96
265  Newcastle       974.57


In [108]:
# --- STEP 6: Strike Rates ---

# 1️⃣ Filter to Horse & Greyhound Racing
df_racing = df[df['Sport'].isin(VALID_SPORTS)].copy()

# 2️⃣ Compute total bets and wins per track
strike_df = (
    df_racing
      .groupby(['Sport', 'Track_Name'])['Profit_Loss']
      .agg(
          total_bets='count',
          wins=lambda x: (x > 0).sum()
      )
      .reset_index()
)

# 3️⃣ Calculate strike rate
strike_df['Strike_Rate'] = (strike_df['wins'] / strike_df['total_bets']).round(4)

# 4️⃣ Filter by minimum bets threshold
strike_df_filtered = strike_df[strike_df['total_bets'] >= MIN_STRIKE_BETS].reset_index(drop=True)

# 5️⃣ Extract Top & Bottom Strike Rate Tracks
top_strike    = strike_df_filtered.nlargest(10, 'Strike_Rate').reset_index(drop=True)
bottom_strike = strike_df_filtered.nsmallest(10, 'Strike_Rate').reset_index(drop=True)

# 6️⃣ Preview
print(f"✅ Strike rates computed (min {MIN_STRIKE_BETS} bets):")
print("Top 10 Strike Rates:")
print(top_strike[['Sport', 'Track_Name', 'total_bets', 'wins', 'Strike_Rate']])
print("\nBottom 10 Strike Rates:")
print(bottom_strike[['Sport', 'Track_Name', 'total_bets', 'wins', 'Strike_Rate']])


✅ Strike rates computed (min 50 bets):
Top 10 Strike Rates:
          Sport Track_Name  total_bets  wins  Strike_Rate
0  Horse Racing   Rosehill          92    75       0.8152
1  Horse Racing    Chester          72    57       0.7917
2  Horse Racing       York          73    55       0.7534
3  Horse Racing   Saratoga         112    83       0.7411
4  Horse Racing  Ellerslie          55    40       0.7273
5  Horse Racing  Chantilly          74    53       0.7162
6  Horse Racing       Bath          55    39       0.7091
7  Horse Racing   Brighton          78    55       0.7051
8  Horse Racing    Newbury          93    65       0.6989
9  Horse Racing   Woodbine          63    44       0.6984

Bottom 10 Strike Rates:
              Sport    Track_Name  total_bets  wins  Strike_Rate
0  Greyhound Racing        Hobart         157    72       0.4586
1  Greyhound Racing  Q2 Parklands          95    45       0.4737
2      Horse Racing   Turffontein          95    45       0.4737
3  Greyhound Raci

In [109]:
# --- STEP 7: Prepare all_sheets for export ---

# 7.1️⃣ Ensure all_sheets exists
if 'all_sheets' not in locals():
    all_sheets = {}

# 7.2️⃣ Add core summaries
all_sheets.update({
    'By Day':           by_day,
    'By Day Sorted':    by_day.sort_values('Profit_Loss', ascending=False).reset_index(drop=True),
    'By Week':          by_week,
    'Cumulative':       by_day[['Day', 'Cumulative_Profit_Loss']].rename(columns={'Cumulative_Profit_Loss': 'Cumulative'}),
    'By Month':         by_month,
    'By Sport':         by_sport,
    'By Country':       by_country,
    'Rolling Returns':  rolling_df
})

# 7.3️⃣ Add track-level summaries
all_sheets.update({
    'Track Stats':             tracks['Track Stats'],
    'Top Horse Tracks':        tracks['Top Horse Tracks'],
    'Bottom Horse Tracks':     tracks['Bottom Horse Tracks'],
    'Top Greyhound Tracks':    tracks['Top Greyhound Tracks'],
    'Bottom Greyhound Tracks': tracks['Bottom Greyhound Tracks'],
})

# 7.4️⃣ Add strike rate summaries
all_sheets.update({
    'Top Strike Rates':    top_strike,
    'Bottom Strike Rates': bottom_strike,
})

# 7.5️⃣ Add daily summaries for each sport
all_sheets.update(sport_daily)

# 7.6️⃣ Add rolling returns by sport
for sport, df_rolling in rolling_by_sport.items():
    sheet_name = f"Rolling {sport}"
    all_sheets[sheet_name] = df_rolling

# 7.7️⃣ Final review of included sheets
print(f"✅ Prepared {len(all_sheets)} tables for export:")
for name in all_sheets:
    print(f"  • {name}")


✅ Prepared 33 tables for export:
  • By Day
  • By Day Sorted
  • By Week
  • Cumulative
  • By Month
  • By Sport
  • By Country
  • Rolling Returns
  • Track Stats
  • Top Horse Tracks
  • Bottom Horse Tracks
  • Top Greyhound Tracks
  • Bottom Greyhound Tracks
  • Top Strike Rates
  • Bottom Strike Rates
  • Snooker Daily
  • Ice Hockey Daily
  • Horse Racing Daily
  • Golf Daily
  • Politics Daily
  • Tennis Daily
  • Greyhound Racing Daily
  • Football Daily
  • Motor Sport Daily
  • Cricket Daily
  • Darts Daily
  • Basketball Daily
  • American Football Daily
  • Rugby Union Daily
  • Cycling Daily
  • Gaelic Games Daily
  • Rolling Horse Racing
  • Rolling Greyhound Racing


In [110]:
print("📊 Top Horse Tracks preview:")
print(tracks['Top Horse Tracks'].head())
print("📊 Bottom Horse Tracks preview:")
print(tracks['Bottom Horse Tracks'].head())


📊 Top Horse Tracks preview:
            Sport Track_Name  Profit_Loss
174  Horse Racing  Geraldton      1400.78
308  Horse Racing   Rosehill      1367.26
57   Horse Racing    Aintree      1286.07
327  Horse Racing  Southwell      1104.96
265  Horse Racing  Newcastle       974.57
📊 Bottom Horse Tracks preview:
            Sport    Track_Name  Profit_Loss
356  Horse Racing  Turfway Park      -336.93
303  Horse Racing         Ripon      -176.58
374  Horse Racing     Wincanton      -129.13
372  Horse Racing      Wetherby      -103.88
116  Horse Racing  Charles Town       -82.20


In [111]:
# Quick safety check: do master (df) totals for yesterday/today agree with any raw files present?
import pandas as pd, glob, pytz
from datetime import datetime, timedelta

assert 'df' in globals(), "Please run Steps 2–7 first (df should be in memory)."

BASE_FOLDER     = '/content/drive/My Drive/Betfair'
BETTING_PATTERN = f'{BASE_FOLDER}/BettingPandL*.csv'

au = pytz.timezone("Australia/Sydney")
y  = (datetime.now(au) - timedelta(days=1)).date()
t  = datetime.now(au).date()

# Master totals
df_m = df.copy()
if not pd.api.types.is_datetime64_any_dtype(df_m['Settled date']):
    df_m['Settled date'] = pd.to_datetime(df_m['Settled date'], errors='coerce')
by_day_master = df_m.groupby(df_m['Settled date'].dt.date)['Profit_Loss'].sum()
m_y = float(by_day_master.get(y, 0.0))
m_t = float(by_day_master.get(t, 0.0))

# Raw totals (if any files are still in the folder)
raw_files = sorted(glob.glob(BETTING_PATTERN))
r_y = r_t = 0.0
if raw_files:
    raws = []
    for p in raw_files:
        r = pd.read_csv(p)
        if 'Settled date' in r and any(c in r.columns for c in ['Profit/Loss (AUD)', 'Profit_Loss']):
            plcol = 'Profit/Loss (AUD)' if 'Profit/Loss (AUD)' in r.columns else 'Profit_Loss'
            r = r.rename(columns={plcol: 'Profit_Loss'})
            r['Settled date'] = pd.to_datetime(r['Settled date'], errors='coerce')
            r['Profit_Loss']  = pd.to_numeric(r['Profit_Loss'], errors='coerce')
            raws.append(r[['Settled date','Profit_Loss']])
    if raws:
        R = pd.concat(raws, ignore_index=True).dropna(subset=['Settled date'])
        R['day'] = R['Settled date'].dt.date
        r_y = float(R.loc[R['day']==y, 'Profit_Loss'].sum())
        r_t = float(R.loc[R['day']==t, 'Profit_Loss'].sum())

print(f"MASTER → {y}: {m_y:.2f} | {t}: {m_t:.2f}")
print(f" RAWS  → {y}: {r_y:.2f} | {t}: {r_t:.2f}  (0.00 if no raw files)")

ok = (abs(m_y - r_y) < 0.01 or r_y == 0.0) and (abs(m_t - r_t) < 0.01 or r_t == 0.0)
print("PASS ✅ — proceed to Step 8" if ok else "STOP ❌ — mismatch; tell me these numbers")


MASTER → 2025-08-09: 529.21 | 2025-08-10: 172.41
 RAWS  → 2025-08-09: 0.00 | 2025-08-10: 0.00  (0.00 if no raw files)
PASS ✅ — proceed to Step 8


In [112]:
# --- STEP 8 (safe/minimal writes): Export to Google Sheets ---

import time, gspread, pandas as pd
from datetime import date
from gspread.exceptions import APIError

SERVICE_JSON = "/content/drive/My Drive/Betfair/testsheets-257205-11522dd72797.json"
SHEET_ID     = "1Ia9OUbft4KtMgZd3kuMGHs267uhzTmpmzJqR5CkSXTM"  # or open by title

# ---- Helpers ----
def values_from_df(df: pd.DataFrame):
    out = df.copy()
    # round numeric, stringify everything for a single update call
    for c in out.select_dtypes(include=['float', 'int']).columns:
        out[c] = pd.to_numeric(out[c], errors='coerce').round(2)
    return [out.columns.tolist()] + out.fillna("").astype(str).values.tolist()

def retry_gs(call, *args, **kwargs):
    delay = 3
    for _ in range(6):  # ~ up to ~1 min
        try:
            return call(*args, **kwargs)
        except APIError as e:
            if "429" in str(e):
                print(f"⏳ Hit Sheets quota; retrying in {delay}s…")
                time.sleep(delay)
                delay = min(60, delay * 2)
            else:
                raise

# ---- Connect ----
gc = gspread.service_account(filename=SERVICE_JSON)
sh = gc.open_by_key(SHEET_ID)  # or: gc.open(GOOGLE_SHEET_NAME)
print(f"✅ Connected to '{sh.title}'")

# ---- Upload each prepared table with ONE update, no resize/clear ----
PER_SHEET_PAUSE = 1.2  # gentle pacing
for name, df_out in all_sheets.items():
    try:
        ws = sh.worksheet(name)
    except gspread.exceptions.WorksheetNotFound:
        # create once with a reasonable size; avoid resizing later
        rows = max(len(df_out) + 5, 1000)
        cols = max(len(df_out.columns) + 5, 20)
        ws = retry_gs(sh.add_worksheet, title=name, rows=rows, cols=cols)

    vals = values_from_df(df_out)
    retry_gs(ws.update, vals, range_name="A1", value_input_option="RAW")
    print(f"✅ Uploaded tab: {name}")
    time.sleep(PER_SHEET_PAUSE)

# ---- Dashboard KPIs (single write; no resize) ----
df_kpi = df.copy()
if 'Settled date' in df_kpi and not pd.api.types.is_datetime64_any_dtype(df_kpi['Settled date']):
    df_kpi['Settled date'] = pd.to_datetime(df_kpi['Settled date'], errors='coerce')
if 'Profit_Loss' in df_kpi and not pd.api.types.is_numeric_dtype(df_kpi['Profit_Loss']):
    df_kpi['Profit_Loss'] = pd.to_numeric(df_kpi['Profit_Loss'], errors='coerce')

total_profit = round(float(df_kpi['Profit_Loss'].sum()), 2)
total_bets   = int(len(df_kpi))
by_day = df_kpi.groupby(df_kpi['Settled date'].dt.date)['Profit_Loss'].sum()
best_day  = str(by_day.idxmax()) if not by_day.empty else ""
worst_day = str(by_day.idxmin()) if not by_day.empty else ""

kpis = [
    ['Metric', 'Value'],
    ['Total Profit/Loss', total_profit],
    ['Number of Bets', total_bets],
    ['Best Day', best_day],
    ['Worst Day', worst_day],
    ['Generated on', str(date.today())],
]

try:
    dash = sh.worksheet('Dashboard')
except gspread.exceptions.WorksheetNotFound:
    dash = retry_gs(sh.add_worksheet, title='Dashboard', rows=20, cols=5)

retry_gs(dash.update, kpis, range_name="A1", value_input_option="RAW")
print("✅ Dashboard KPIs updated (no resize)")


✅ Connected to 'Betfair Dashboard'
✅ Uploaded tab: By Day
✅ Uploaded tab: By Day Sorted
✅ Uploaded tab: By Week
✅ Uploaded tab: Cumulative
✅ Uploaded tab: By Month
✅ Uploaded tab: By Sport
✅ Uploaded tab: By Country
✅ Uploaded tab: Rolling Returns
✅ Uploaded tab: Track Stats
✅ Uploaded tab: Top Horse Tracks
✅ Uploaded tab: Bottom Horse Tracks
✅ Uploaded tab: Top Greyhound Tracks
✅ Uploaded tab: Bottom Greyhound Tracks
✅ Uploaded tab: Top Strike Rates
✅ Uploaded tab: Bottom Strike Rates
✅ Uploaded tab: Snooker Daily
✅ Uploaded tab: Ice Hockey Daily
✅ Uploaded tab: Horse Racing Daily
✅ Uploaded tab: Golf Daily
✅ Uploaded tab: Politics Daily
✅ Uploaded tab: Tennis Daily
✅ Uploaded tab: Greyhound Racing Daily
✅ Uploaded tab: Football Daily
✅ Uploaded tab: Motor Sport Daily
✅ Uploaded tab: Cricket Daily
✅ Uploaded tab: Darts Daily
✅ Uploaded tab: Basketball Daily
✅ Uploaded tab: American Football Daily
✅ Uploaded tab: Rugby Union Daily
✅ Uploaded tab: Cycling Daily
✅ Uploaded tab: Gaelic Ga