In [2]:
import os
import requests
import pandas as pd
from datetime import datetime, timedelta
import pytz


# === Config ===
API_KEY = "9eb35f3163ceac963da610e2c93c7abd"
HEADERS = {"x-apisports-key": API_KEY}
utc = pytz.utc
eastern = pytz.timezone("US/Eastern")

os.makedirs("data/daily", exist_ok=True)

# === Set Dates ===
today = datetime.now().astimezone(eastern).strftime("%Y-%m-%d")
yesterday = (datetime.now().astimezone(eastern) - timedelta(days=1)).strftime("%Y-%m-%d")

# === Utility ===
def safe_inning_scores(scores_dict):
    return scores_dict.get("innings", {}) if scores_dict else {}

def enrich_results_for_games(games):
    for game in games.values():
        try:
            url = f"https://v1.baseball.api-sports.io/games?id={game['game_id']}"
            response = requests.get(url, headers=HEADERS)
            g = response.json()["response"][0]

            if g["status"]["long"] != "Finished":
                continue  # Skip if game isn't done yet

            scores = g.get("scores", {})
            game["status"] = g["status"]["long"]
            game["home_score"] = scores["home"]["total"]
            game["away_score"] = scores["away"]["total"]

            if game["home_score"] is not None and game["away_score"] is not None:
                # Determine winner
                if game["home_score"] > game["away_score"]:
                    game["winner"] = game["home_team"]
                elif game["home_score"] < game["away_score"]:
                    game["winner"] = game["away_team"]
                else:
                    game["winner"] = "Draw"

                # Total result
                if game["total_line"] is not None:
                    total = game["home_score"] + game["away_score"]
                    game["total_result"] = "Over" if total > game["total_line"] else "Under"

            # Inning scores
            home_innings = safe_inning_scores(scores["home"])
            away_innings = safe_inning_scores(scores["away"])
            for i in range(1, 10):
                game[f"home_{i}"] = home_innings.get(str(i))
                game[f"away_{i}"] = away_innings.get(str(i))

        except Exception as e:
            print(f"⚠️ Error enriching game {game['game_id']}: {e}")

# === Step 1: Pull Today’s Games
def pull_games_and_odds(target_date):
    print(f"\n📅 Pulling game schedule and odds for {target_date}")
    api_dates = [target_date, (datetime.strptime(target_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")]
    games = {}

    for api_date in api_dates:
        url = f"https://v1.baseball.api-sports.io/games?league=1&season=2025&date={api_date}"
        data = requests.get(url, headers=HEADERS).json()

        for g in data.get("response", []):
            try:
                game_id = g["id"]
                utc_start = datetime.fromisoformat(g["date"].replace("Z", "+00:00"))
                et_start = utc_start.astimezone(eastern)
                if et_start.strftime("%Y-%m-%d") != target_date:
                    continue

                games[game_id] = {
                    "game_id": game_id,
                    "game_date": et_start.strftime("%Y-%m-%d"),
                    "start_time_et": et_start.strftime("%Y-%m-%d %H:%M:%S"),
                    "home_team": g["teams"]["home"]["name"],
                    "away_team": g["teams"]["away"]["name"],
                    "moneyline_home": None,
                    "moneyline_away": None,
                    "total_line": None,
                    "over_odds": None,
                    "under_odds": None,
                    "home_score": None,
                    "away_score": None,
                    "status": None,
                    "winner": None,
                    "total_result": None,
                }
            except Exception as e:
                print(f"⚠️ Error processing game metadata: {e}")

    # === Add odds if available
    for game_id, game in games.items():
        try:
            odds_url = f"https://v1.baseball.api-sports.io/odds?game={game_id}&bookmaker=22"
            odds_data = requests.get(odds_url, headers=HEADERS).json()
            bets = odds_data["response"][0]["bookmakers"][0]["bets"]
        except:
            continue

        for bet in bets:
            if bet["name"] not in {"Home/Away", "Over/Under"}:
                continue
            for val in bet.get("values", []):
                opt = val["value"].lower()
                odd = val["odd"]

                if bet["name"] == "Home/Away":
                    if opt == "home":
                        game["moneyline_home"] = odd
                    elif opt == "away":
                        game["moneyline_away"] = odd
                elif bet["name"] == "Over/Under":
                    if "over" in opt and not game["over_odds"]:
                        try:
                            game["total_line"] = float(opt.split("over")[1].strip())
                            game["over_odds"] = odd
                        except:
                            continue
                    elif "under" in opt and not game["under_odds"]:
                        try:
                            game["under_odds"] = odd
                        except:
                            continue

    return games

# === Step 2: Enrich Today's Games
today_games = pull_games_and_odds(today)
enrich_results_for_games(today_games)

# === Save Today's File
today_df = pd.DataFrame(today_games.values())
today_filename = f"data/daily/MLB_Combined_Odds_Results_{today}.csv"
today_df.to_csv(today_filename, index=False)
print(f"\n✅ Saved today's file to: {today_filename}")

# === Step 3: Optional Backfill for Yesterday
yesterday_filename = f"data/daily/MLB_Combined_Odds_Results_{yesterday}.csv"
if os.path.exists(yesterday_filename):
    print(f"\n♻️ Enriching yesterday's file: {yesterday_filename}")
    y_df = pd.read_csv(yesterday_filename)
    yesterday_games = y_df.to_dict(orient="records")
    game_map = {g["game_id"]: g for g in yesterday_games}
    enrich_results_for_games(game_map)

    final_df = pd.DataFrame(game_map.values())
    final_df.to_csv(yesterday_filename, index=False)
    print(f"✅ Updated yesterday's file with enriched results: {yesterday_filename}")
else:
    print(f"\n⚠️ No file found for yesterday ({yesterday_filename}) — skipping backfill.")

import os

# === Confirm File Creation ===
print("\n📂 Current working directory:", os.getcwd())

daily_filename = f"MLB_Combined_Odds_Results_{today}.csv"
daily_path = os.path.join("data", "daily", daily_filename)

# Confirm if file exists
if os.path.exists(daily_path):
    print(f"✅ File saved: {daily_path}")
    # Print a sample of the data
    df_check = pd.read_csv(daily_path)
    print(f"📊 File contains {len(df_check)} rows")
    display(df_check.head())
else:
    print(f"❌ File not found at expected location: {daily_path}")

# List all files in data/daily
print("\n📁 Files in data/daily:")
print(os.listdir("data/daily"))



📅 Pulling game schedule and odds for 2025-05-09

✅ Saved today's file to: data/daily/MLB_Combined_Odds_Results_2025-05-09.csv

♻️ Enriching yesterday's file: data/daily/MLB_Combined_Odds_Results_2025-05-08.csv
✅ Updated yesterday's file with enriched results: data/daily/MLB_Combined_Odds_Results_2025-05-08.csv


In [3]:
import pandas as pd

master_file = "data/master/master_template.parquet"
master_df = pd.read_parquet(master_file)
print(f"📦 Master currently has {len(master_df)} rows")


FileNotFoundError: [Errno 2] No such file or directory: 'data/master/master_template.parquet'

In [4]:
!pwd


/content
