In [1]:
import pandas as pd
from datetime import datetime
import sqlite3

In [2]:
# https://fftoday.com/nfl/schedule.php

In [3]:
df = pd.read_csv("schedule.csv", skiprows=1)

In [4]:
df['Date'] = df['Date'].fillna(method='ffill')

  df['Date'] = df['Date'].fillna(method='ffill')


In [5]:
df

Unnamed: 0,Date,Time (ET),Away Team,Home Team
0,Thu Sep 5,8:20 pm,Baltimore Ravens,Kansas City Chiefs
1,Fri Sep 6,8:15 pm,Green Bay Packers,Philadelphia Eagles ¹
2,Sun Sep 8,1:00 pm,Arizona Cardinals,Buffalo Bills
3,Sun Sep 8,1:00 pm,Carolina Panthers,New Orleans Saints
4,Sun Sep 8,1:00 pm,Houston Texans,Indianapolis Colts
...,...,...,...,...
312,Sun Jan 5,1:00 pm,New York Giants,Philadelphia Eagles *
313,Sun Jan 5,1:00 pm,San Francisco 49ers,Arizona Cardinals *
314,Sun Jan 5,1:00 pm,Seattle Seahawks,Los Angeles Rams *
315,Sun Jan 5,1:00 pm,Washington Commanders,Dallas Cowboys *


In [6]:
def parse_date(date_str):
    date = pd.to_datetime(date_str, format='%a %b %d', errors='coerce')
    current_year = datetime.now().year
    
    # If the month is January, set the year to next year
    if date.month == 1:
        return date.replace(year=current_year + 1)
    else:
        return date.replace(year=current_year)

df['Date'] = df['Date'].apply(parse_date)

In [7]:
df.dropna(subset=['Date'], inplace=True)

In [8]:
import re
# Remove ' ¹' or '*' from the end of the Home Team column
df['Home Team'] = df['Home Team'].str.replace(r'[ ¹*]+$', '', regex=True)

In [9]:
df['DateTime'] = pd.to_datetime(df['Date'].dt.strftime('%Y-%m-%d') + ' ' + df['Time (ET)'])

  df['DateTime'] = pd.to_datetime(df['Date'].dt.strftime('%Y-%m-%d') + ' ' + df['Time (ET)'])


In [10]:
df = df[['DateTime', 'Away Team', 'Home Team']].rename(columns={'DateTime': 'datetime', 'Away Team': 'away_team', 'Home Team': 'home_team'})

In [12]:
df = df.reset_index(names='game_id')

In [13]:
df

Unnamed: 0,game_id,datetime,away_team,home_team
0,0,2024-09-05 20:20:00,Baltimore Ravens,Kansas City Chiefs
1,1,2024-09-06 20:15:00,Green Bay Packers,Philadelphia Eagles
2,2,2024-09-08 13:00:00,Arizona Cardinals,Buffalo Bills
3,3,2024-09-08 13:00:00,Carolina Panthers,New Orleans Saints
4,4,2024-09-08 13:00:00,Houston Texans,Indianapolis Colts
...,...,...,...,...
267,311,2025-01-05 13:00:00,New Orleans Saints,Tampa Bay Buccaneers
268,312,2025-01-05 13:00:00,New York Giants,Philadelphia Eagles
269,313,2025-01-05 13:00:00,San Francisco 49ers,Arizona Cardinals
270,314,2025-01-05 13:00:00,Seattle Seahawks,Los Angeles Rams


In [14]:
df.to_parquet("schedule.parquet")

In [51]:
conn = sqlite3.connect('data/main.db')
df.to_sql('schedule', conn, if_exists='replace', index=True, index_label='game_id')
conn.close()

In [48]:
df

Unnamed: 0,datetime,away_team,home_team
0,2024-09-05 20:20:00,Baltimore Ravens,Kansas City Chiefs
1,2024-09-06 20:15:00,Green Bay Packers,Philadelphia Eagles
2,2024-09-08 13:00:00,Arizona Cardinals,Buffalo Bills
3,2024-09-08 13:00:00,Carolina Panthers,New Orleans Saints
4,2024-09-08 13:00:00,Houston Texans,Indianapolis Colts
...,...,...,...
311,2025-01-05 13:00:00,New Orleans Saints,Tampa Bay Buccaneers
312,2025-01-05 13:00:00,New York Giants,Philadelphia Eagles
313,2025-01-05 13:00:00,San Francisco 49ers,Arizona Cardinals
314,2025-01-05 13:00:00,Seattle Seahawks,Los Angeles Rams


In [None]:
import requests
import os

api_key = os.environ.get('ODDS_API_KEY')
r = requests.get(f"https://api.the-odds-api.com/v4/sports/americanfootball_nfl/scores/?daysFrom=3&apiKey={api_key}")
results = pd.DataFrame(r.json())

In [None]:

# Explode the 'scores' column to create separate rows for each team's score
results_exploded = results.explode('scores')

# Extract 'name' and 'score' from the 'scores' dictionary
results_exploded['team'] = results_exploded['scores'].apply(lambda x: x['name'] if x else None)
results_exploded['score'] = results_exploded['scores'].apply(lambda x: x['score'] if x else None)

# Drop the original 'scores' column
results_exploded = results_exploded.drop('scores', axis=1)

# Pivot the table to have home and away scores in separate columns
results_fixed = results_exploded.pivot(index=['id', 'sport_key', 'sport_title', 'commence_time', 'completed', 'home_team', 'away_team', 'last_update'],
                                       columns='team',
                                       values='score').reset_index()

# Rename columns to distinguish between home and away scores
results_fixed.columns.name = None
results_fixed = results_fixed.rename(columns={
    results_fixed.columns[-2]: 'away_score',
    results_fixed.columns[-1]: 'home_score'
})

In [23]:
results_fixed

Unnamed: 0,id,sport_key,sport_title,commence_time,completed,home_team,away_team,last_update,NaN,away_score,home_score
0,111ac41e21c6f16a2d3d1511f07e2004,americanfootball_nfl,NFL,2024-09-15T17:00:00Z,False,Jacksonville Jaguars,Cleveland Browns,,,,
1,17d4d840f9f4094d76c6e9b53b18ac05,americanfootball_nfl,NFL,2024-09-15T17:00:00Z,False,Tennessee Titans,New York Jets,,,,
2,200c20527d1d6f44257155548cdac1e8,americanfootball_nfl,NFL,2024-09-22T17:00:00Z,False,Cleveland Browns,New York Giants,,,,
3,2cefb0f9bcf18ed3f64396174ece14ad,americanfootball_nfl,NFL,2024-09-17T00:15:00Z,False,Philadelphia Eagles,Atlanta Falcons,,,,
4,2ee776f967540a404694111f27822004,americanfootball_nfl,NFL,2024-09-22T17:00:00Z,False,Pittsburgh Steelers,Los Angeles Chargers,,,,
5,326301fd41fe90d64929b44a2e227b23,americanfootball_nfl,NFL,2024-09-22T17:00:00Z,False,New Orleans Saints,Philadelphia Eagles,,,,
6,3a300e2fb659ca896b5e60252f13150c,americanfootball_nfl,NFL,2024-09-15T20:25:00Z,False,Denver Broncos,Pittsburgh Steelers,,,,
7,42d27889b51ada96c20671b30129ce4e,americanfootball_nfl,NFL,2024-09-15T17:00:00Z,False,New England Patriots,Seattle Seahawks,,,,
8,49397bc00cf6d5e50f0a943538f1b00d,americanfootball_nfl,NFL,2024-09-22T20:05:00Z,False,Las Vegas Raiders,Carolina Panthers,,,,
9,495d9f2c316b05ef092099ef9401a180,americanfootball_nfl,NFL,2024-09-15T17:00:00Z,False,Washington Commanders,New York Giants,,,,
