In [1]:
# Import cfbd library, pandas, and establish API connection to Games and Lines instance

import cfbd
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = 'do9LyjazUodASYyuS2JceIPqtXACDIE1JLoSFDLWwZrkNs/1L+9X25Im8qFmdEEY'
configuration.api_key_prefix['Authorization'] = 'Bearer'
api_config = cfbd.ApiClient(configuration)

api_games_instance = cfbd.GamesApi(api_config)
api_lines_instance = cfbd.BettingApi(api_config)

In [2]:
## Pull all games in 2024 played/unplayed

games = []

games_response = api_games_instance.get_games(year=2024, season_type = 'regular')
games = [*games, *games_response]

In [3]:
# Flatten games API response and filter only to needed fields

games = [
    dict(
        id=g.id,
        season=g.season,
        week=g.week,
        start_date=g.start_date,
        neutral_site = g.neutral_site,
        home_team=g.home_team,
        home_conference=g.home_conference,
        home_points=g.home_points,
        home_pregame_elo=g.home_pregame_elo,
        away_team=g.away_team,
        away_conference=g.away_conference,
        away_points=g.away_points,
        away_pregame_elo=g.away_pregame_elo
    ) for g in games]

In [4]:
# Convert 2024 games into a pandas dataframe and export as csv
# Is this needed/used?

games_2024_df = pd.DataFrame.from_records(games)
games_2024_df.to_csv('games_2024.csv',index=False)

In [5]:
# Pull lines for all 2024 games played/unplayed

lines=[]

lines_response = api_lines_instance.get_lines(year=2024, season_type = 'regular')
lines = [*lines, *lines_response]

In [6]:
# Join betting lines data with games data, only rows with both a spread and over/under line

# Current betting lines provider: Bovada
# Update as needed

for game in games:
    game_lines = [l for l in lines if l.id == game['id']]

    if len(game_lines) > 0:
        game_line = [l for l in game_lines[0].lines if l.provider == 'Bovada']

        if len(game_line) > 0 and game_line[0].spread is not None:
            game['spread'] = float(game_line[0].spread)
    
        over_under = [l for l in game_lines[0].lines if l.provider == 'Bovada']
        
        if len(over_under) > 0 and game_line[0].over_under is not None:
            game['over_under'] = float(game_line[0].over_under)

In [7]:
# Convert games and lines merged data into pandas dataframe
# Drop null columns for anything but the score
# Filter out any weeks unplayed except for upcoming week
# Export as csv

games_lines_df = pd.DataFrame.from_records(games)

games_lines_df.dropna(subset=['home_pregame_elo','away_pregame_elo','spread'],inplace=True)

min_week = games_lines_df.loc[games_lines_df['home_points'].isnull()]
min_week = min_week['week'].min()

games_lines_df = games_lines_df.loc[games_lines_df['week']<=min_week]

games_lines_df.to_csv('games_with_lines_2024.csv',index=False)
