<a href="https://colab.research.google.com/github/brendancron/NFLelos/blob/main/NFLelos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import json
import os
from time import sleep
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
FIXTURE_URL = "https://fixturedownload.com/feed/json/nfl-2025"
OUTPUT_DIR = "data"
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "nfl_2025_schedule.json")

def fetch_and_save_schedule():
    """
    Fetches the JSON data from the specified URL and saves it to a local file.
    Includes basic error handling and directory creation.
    """
    print(f"1. Creating directory: {OUTPUT_DIR}/")
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    print(f"2. Attempting to fetch data from: {FIXTURE_URL}")

    try:
        # Use a user-agent to avoid potential 403 errors from being blocked
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        # Fetch the data
        response = requests.get(FIXTURE_URL, headers=headers, timeout=10)

        # Raise an exception for bad status codes (4xx or 5xx)
        response.raise_for_status()

        # The response is expected to be a list of dictionaries (the schedule)
        schedule_data = response.json()

        print("3. Data fetched successfully. Saving to file...")

        # Write the JSON data to the specified file path
        with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
            # Use json.dump for clean formatting in the saved file
            json.dump(schedule_data, f, ensure_ascii=False, indent=4)

        print("-" * 40)
        print(f"SUCCESS: NFL 2025 schedule saved to: {OUTPUT_FILE}")
        print(f"Total games/events found: {len(schedule_data)}")
        print("-" * 40)

    except requests.exceptions.HTTPError as e:
        print(f"ERROR: HTTP Request failed with status code {e.response.status_code}.")
        print("This could mean the URL has changed or the site is blocking requests.")
    except requests.exceptions.RequestException as e:
        print(f"ERROR: A network error occurred: {e}")
    except json.JSONDecodeError:
        print("ERROR: Could not decode response as JSON. The URL might not be returning valid data.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# Execute the function
fetch_and_save_schedule()

# Optional: Display the first few lines of the saved file for confirmation
print("\n--- Confirmation (First 5 lines of file) ---")
try:
    with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
        for i in range(5):
            print(f.readline(), end='')
except FileNotFoundError:
    pass

1. Creating directory: data/
2. Attempting to fetch data from: https://fixturedownload.com/feed/json/nfl-2025
3. Data fetched successfully. Saving to file...
----------------------------------------
SUCCESS: NFL 2025 schedule saved to: data/nfl_2025_schedule.json
Total games/events found: 272
----------------------------------------

--- Confirmation (First 5 lines of file) ---
[
    {
        "MatchNumber": 1,
        "RoundNumber": 1,
        "DateUtc": "2025-09-05 00:20:00Z",


In [None]:
# Read the JSON file into a pandas DataFrame
try:
    with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
        schedule_data = json.load(f)
    df = pd.DataFrame(schedule_data)
    print("NFL 2025 schedule loaded into DataFrame.")
    display(df.head())
except FileNotFoundError:
    print(f"Error: File not found at {OUTPUT_FILE}")
except json.JSONDecodeError:
    print("Error: Could not decode JSON from the file.")
except Exception as e:
    print(f"An unexpected error occurred while loading data: {e}")

# --- Calculate Standings ---

# Filter out games that haven't been played yet (where scores are null)
completed_games_df = df.dropna(subset=['HomeTeamScore', 'AwayTeamScore']).copy()


NFL 2025 schedule loaded into DataFrame.


Unnamed: 0,MatchNumber,RoundNumber,DateUtc,Location,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore
0,1,1,2025-09-05 00:20:00Z,Lincoln Financial Field,Philadelphia Eagles,Dallas Cowboys,,24.0,20.0
1,2,1,2025-09-06 00:00:00Z,Arena Corinthians,Los Angeles Chargers,Kansas City Chiefs,,27.0,21.0
2,3,1,2025-09-07 17:00:00Z,Mercedes-Benz Stadium,Atlanta Falcons,Tampa Bay Buccaneers,,20.0,23.0
3,4,1,2025-09-07 17:00:00Z,Huntington Bank Field,Cleveland Browns,Cincinnati Bengals,,16.0,17.0
4,5,1,2025-09-07 17:00:00Z,Lucas Oil Stadium,Indianapolis Colts,Miami Dolphins,,33.0,8.0


In [None]:
# Initialize a dictionary to store team records (W, L, T)
team_records = {}

# Iterate through completed games to update team records
for index, row in completed_games_df.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    home_score = row['HomeTeamScore']
    away_score = row['AwayTeamScore']

    # Initialize team records if they don't exist
    if home_team not in team_records:
        team_records[home_team] = {'W': 0, 'L': 0, 'T': 0}
    if away_team not in team_records:
        team_records[away_team] = {'W': 0, 'L': 0, 'T': 0}

    # Update records based on game outcome
    if home_score > away_score:
        team_records[home_team]['W'] += 1
        team_records[away_team]['L'] += 1
    elif home_score < away_score:
        team_records[home_team]['L'] += 1
        team_records[away_team]['W'] += 1
    else:
        team_records[home_team]['T'] += 1
        team_records[away_team]['T'] += 1

# Convert team records to a DataFrame
standings_df = pd.DataFrame.from_dict(team_records, orient='index')
standings_df.index.name = 'Team'
standings_df = standings_df.reset_index()

# Add a placeholder 'Division' column for now.
# To accurately calculate standings by division, we would need a mapping of teams to their divisions.
# Since this information is not in the provided JSON, we'll just display overall standings.
# In a real-world scenario, you would merge this with a division lookup table.
print("\n--- Overall Standings (W-L-T) ---")
display(standings_df.sort_values(by=['W', 'T'], ascending=[False, False]))

# NOTE: Calculating standings *by division* requires a separate dataset
# mapping teams to their respective NFL divisions (e.g., AFC East, NFC West).
# This information is not present in the provided schedule JSON.
# The code above calculates overall standings based on the completed games.
# To get standings by division, you would need to:
# 1. Get a dataset with Team -> Division mapping.
# 2. Merge the standings_df with the division mapping dataset.
# 3. Group the merged DataFrame by Division and display standings for each division.


--- Overall Standings (W-L-T) ---


Unnamed: 0,Team,W,L,T
5,Tampa Bay Buccaneers,5,1,0
8,Indianapolis Colts,5,1,0
0,Philadelphia Eagles,4,2,0
2,Los Angeles Chargers,4,2,0
10,Jacksonville Jaguars,4,2,0
12,New England Patriots,4,2,0
17,Pittsburgh Steelers,4,2,0
20,Denver Broncos,4,2,0
22,Seattle Seahawks,4,2,0
23,San Francisco 49ers,4,2,0


In [None]:
# --- Calculate Elo Ratings (Accumulator Format) ---

MEAN_RATING = 1000
REGRESSION_FACTOR = 0.002

# Initialize Elo ratings for all teams
all_teams = pd.concat([df['HomeTeam'], df['AwayTeam']]).unique()
initial_elo_ratings = {team: 1000 for team in all_teams}

# Elo constants
K = 32 # Standard K-factor in Elo

# Function to calculate expected score
def expected_score(elo_a, elo_b):
    return 1 / (1 + 10**((elo_b - elo_a) / 400))

# Function to update Elo ratings based on a single game outcome (accumulator)
def update_elo_with_game(current_elo_ratings, game_record):
    home_team = game_record['HomeTeam']
    away_team = game_record['AwayTeam']
    home_score = game_record['HomeTeamScore']
    away_score = game_record['AwayTeamScore']

    # Get current Elo ratings for the teams
    home_elo = current_elo_ratings.get(home_team, 1000) # Use .get() with default 1000 in case a team wasn't in the initial list (shouldn't happen with all_teams)
    away_elo = current_elo_ratings.get(away_team, 1000)

    # Determine the actual score based on win, loss, or tie
    # Win = 1, Loss = 0, Tie = 0.5
    if home_score > away_score:
        actual_score_home = 1
    elif home_score < away_score:
        actual_score_home = 0
    else:
        actual_score_home = 0.5

    # Update Elo ratings
    new_home_elo, new_away_elo = update_elo(home_elo, away_elo, actual_score_home)

    normalized_home_elo = (new_home_elo * (1 - REGRESSION_FACTOR)) + (MEAN_RATING * REGRESSION_FACTOR)
    normalized_away_elo = (new_away_elo * (1 - REGRESSION_FACTOR)) + (MEAN_RATING * REGRESSION_FACTOR)

    # Create a new dictionary with updated ratings (to avoid modifying the input dictionary directly if needed elsewhere)
    updated_elo_ratings = current_elo_ratings.copy()
    updated_elo_ratings[home_team] = normalized_home_elo
    updated_elo_ratings[away_team] = normalized_away_elo

    return updated_elo_ratings

# Fold over played games to accumulate Elo ratings
print("\n--- Calculating Elo Ratings (Accumulator Format) ---")

# Start with the initial Elo ratings
current_elo_ratings = initial_elo_ratings.copy()

# Iterate through completed games and update Elo ratings using the accumulator function
iterations = 2000
for i in tqdm(range(iterations), desc="Overall Elo Recalculation"):
  for index, row in completed_games_df.iterrows():
      current_elo_ratings = update_elo_with_game(current_elo_ratings, row)

# Convert final Elo ratings to a DataFrame for display
elo_df = pd.DataFrame.from_dict(current_elo_ratings, orient='index', columns=['Elo'])
elo_df.index.name = 'Team'
elo_df = elo_df.reset_index()

print("\n--- Current Elo Standings (Accumulator Format) ---")
display(elo_df.sort_values(by='Elo', ascending=False))


--- Calculating Elo Ratings (Accumulator Format) ---


Overall Elo Recalculation:   0%|          | 0/2000 [00:00<?, ?it/s]


--- Current Elo Standings (Accumulator Format) ---


Unnamed: 0,Team,Elo
29,Tampa Bay Buccaneers,1413.468443
11,Seattle Seahawks,1266.815805
30,San Francisco 49ers,1265.590507
4,Indianapolis Colts,1246.233796
5,Jacksonville Jaguars,1210.75985
0,Philadelphia Eagles,1205.889998
13,Los Angeles Rams,1187.636205
10,Denver Broncos,1178.43065
2,Atlanta Falcons,1163.913803
12,Green Bay Packers,1112.23904


In [None]:
merged_df = pd.merge(
    elo_df,
    standings_df,
    on='Team',
    how='left'
)

# --- Step 3: Sort the merged data ONLY by the 'Elo' score ---
# This gives the true Elo ranking.
ranked_elo_standings = merged_df.sort_values(by='Elo', ascending=False)

# --- Step 4: Create the 1-based index (Rank) for display ---
ranked_elo_standings = ranked_elo_standings.reset_index(drop=True)
ranked_elo_standings.index = ranked_elo_standings.index + 1
ranked_elo_standings.index.name = 'Elo Rank'

# --- Step 5: Optional: Reorder columns for better presentation (Rank, Team, Elo, W, L, T) ---
# Assuming the standings columns are 'W', 'L', 'T'
display_cols = ['Team', 'Elo', 'W', 'L', 'T']
ranked_elo_standings = ranked_elo_standings[display_cols]


print("\n--- Current Standings Ranked by Elo Score (with W-L-T Record) ---")
display(ranked_elo_standings)


--- Current Standings Ranked by Elo Score (with W-L-T Record) ---


Unnamed: 0_level_0,Team,Elo,W,L,T
Elo Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Tampa Bay Buccaneers,1413.468443,5,1,0
2,Seattle Seahawks,1266.815805,4,2,0
3,San Francisco 49ers,1265.590507,4,2,0
4,Indianapolis Colts,1246.233796,5,1,0
5,Jacksonville Jaguars,1210.75985,4,2,0
6,Philadelphia Eagles,1205.889998,4,2,0
7,Los Angeles Rams,1187.636205,4,2,0
8,Denver Broncos,1178.43065,4,2,0
9,Atlanta Falcons,1163.913803,3,2,0
10,Green Bay Packers,1112.23904,3,1,1
