In [2]:
import requests
import pandas as pd
import time
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

SPORT_SLUG = {
    "Men": "mens-college-basketball",
    "Women": "womens-college-basketball"
}

BERLIN_TZ = ZoneInfo("Europe/Berlin")
ET_TZ = ZoneInfo("America/New_York")   # ESPN schedules use ET


def parse_time_to_berlin(date_str, time_str):
    """Convert ET game time → Berlin time. If not a time (FINAL, TBD, etc.), return as-is."""
    try:
        dt_et = datetime.strptime(date_str + " " + time_str, "%Y%m%d %I:%M %p")
        dt_et = dt_et.replace(tzinfo=ET_TZ)
        dt_berlin = dt_et.astimezone(BERLIN_TZ)
        return dt_berlin.strftime("%Y-%m-%d %H:%M")
    except:
        return time_str


def fetch_espn_schedule(date, sport_slug):
    url = f"https://www.espn.com/{sport_slug}/schedule/_/date/{date}"
    response = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(response.text, "html.parser")

    fixtures = []

    tables = soup.select("table")

    for table in tables:
        rows = table.select("tbody tr")

        for row in rows:
            cols = row.find_all("td")
            if len(cols) < 3:
                continue

            away = cols[0].get_text(" ", strip=True)
            home = cols[1].get_text(" ", strip=True)
            time_status = cols[2].get_text(" ", strip=True)

            location = ""
            if len(cols) >= 4:
                location = cols[3].get_text(" ", strip=True)

            if not away or not home:
                continue

            fixtures.append({
                "Date (ET)": date,
                "Away Team": away,
                "Home Team": home,
                "Time / Status (ET)": time_status,
                "Time (Berlin)": parse_time_to_berlin(date, time_status),
                "Location": location
            })

    return pd.DataFrame(fixtures)


def extract_schedule_to_excel(start_date, end_date, sport, output_file):
    sport_slug = SPORT_SLUG[sport]

    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")

    all_data = []
    current = start

    while current <= end:
        date_str = current.strftime("%Y%m%d")
        print(f"Fetching {date_str} ({sport})")

        df = fetch_espn_schedule(date_str, sport_slug)

        if not df.empty:
            all_data.append(df)
        else:
            print(f"⚠ No games on {date_str}")

        time.sleep(1.2)
        current += timedelta(days=1)

    if all_data:
        final_df = pd.concat(all_data, ignore_index=True)
        final_df.to_excel(output_file, index=False)
        print(f"\n✅ Excel created: {output_file}")
        return final_df
    else:
        print("\n❌ No fixtures found")
        return pd.DataFrame()


# ---------------- RUN ----------------
final_df = extract_schedule_to_excel(
    start_date="2025-11-03",
    end_date="2026-03-15",
    sport="Men",        # "Men" or "Women"
    output_file="ncaa_fixtures_1.xlsx"
)

print(final_df.head())


Fetching 20251103 (Men)
Fetching 20251104 (Men)
Fetching 20251105 (Men)
Fetching 20251106 (Men)
Fetching 20251107 (Men)
Fetching 20251108 (Men)
Fetching 20251109 (Men)
Fetching 20251110 (Men)
Fetching 20251111 (Men)
Fetching 20251112 (Men)
Fetching 20251113 (Men)
Fetching 20251114 (Men)
Fetching 20251115 (Men)
Fetching 20251116 (Men)
Fetching 20251117 (Men)
Fetching 20251118 (Men)
Fetching 20251119 (Men)
Fetching 20251120 (Men)
Fetching 20251121 (Men)
Fetching 20251122 (Men)
Fetching 20251123 (Men)
Fetching 20251124 (Men)
Fetching 20251125 (Men)
Fetching 20251126 (Men)
Fetching 20251127 (Men)
Fetching 20251128 (Men)
Fetching 20251129 (Men)
Fetching 20251130 (Men)
Fetching 20251201 (Men)
Fetching 20251202 (Men)
Fetching 20251203 (Men)
Fetching 20251204 (Men)
Fetching 20251205 (Men)
Fetching 20251206 (Men)
Fetching 20251207 (Men)
Fetching 20251208 (Men)
Fetching 20251209 (Men)
Fetching 20251210 (Men)
Fetching 20251211 (Men)
Fetching 20251212 (Men)
Fetching 20251213 (Men)
Fetching 2025121