In [1]:
import cfbd
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Configuration
configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = 'JOo7NPQiRwoWx1kDmv1VDiujh+ra/nfYLRGy0TVt2KhLM+Yiu6BpXOC7W4YtjBdq'
configuration.api_key_prefix['Authorization'] = 'Bearer'

In [3]:
# Initialize API instances
games_api_instance = cfbd.GamesApi(cfbd.ApiClient(configuration))
stats_api_instance = cfbd.StatsApi(cfbd.ApiClient(configuration))

In [4]:
def fetch_games_data_for_years(start_year, end_year):
    dataframes = {}
    for year in range(start_year, end_year + 1):
        games = games_api_instance.get_games(year=year)
        big_12_games = [game for game in games if game.home_conference == 'Big 12' or game.away_conference == 'Big 12']
        
        game_data = pd.DataFrame.from_records(
            [dict(game_id=game.id,
                  year=game.season,
                  home_team=game.home_team,
                  away_team=game.away_team,
                  home_points=game.home_points,
                  away_points=game.away_points) for game in big_12_games]
        )
        dataframes[year] = game_data
    return dataframes

In [5]:
def fetch_big_12_teams(game_data):
    big_12_teams = set()
    for year, data in game_data.items():
        big_12_teams.update(data['home_team'].unique())
        big_12_teams.update(data['away_team'].unique())
    return big_12_teams

In [6]:
def fetch_advanced_team_stats_for_years(start_year, end_year, big_12_teams):
    advanced_stats_data = {}
    for year in range(start_year, end_year + 1):
        advanced_stats = stats_api_instance.get_advanced_team_game_stats(year=year)
        big_12_advanced_stats = [stat for stat in advanced_stats if stat.team in big_12_teams or stat.opponent in big_12_teams]

        advanced_stats_data[year] = pd.DataFrame.from_records(
            [dict(game_id=stat.game_id,
                  team=stat.team,
                  opponent=stat.opponent,
                  line_yards_total=stat.offense.line_yards_total,
                  second_level_yards_total=stat.offense.second_level_yards_total,
                  open_field_yards_total=stat.offense.open_field_yards_total,
                  total_points=stat.offense.total_ppa,
                  total_plays=stat.offense.plays) for stat in big_12_advanced_stats]
        )
    return advanced_stats_data

In [7]:
# Fetch data
years_data = fetch_games_data_for_years(2017, 2020)

In [8]:
big_12_teams = fetch_big_12_teams(years_data)

In [9]:
advanced_stats_data = fetch_advanced_team_stats_for_years(2017, 2020, big_12_teams)

In [None]:
# Combine data
combined_data = pd.concat(years_data.values(), ignore_index=True)
advanced_stats_combined = pd.concat(advanced_stats_data.values(), ignore_index=True)

In [None]:
# Calculate point differential and merge datasets
calculate_point_differential(combined_data)
combined_data_with_advanced_stats = combined_data.merge(advanced_stats_combined, left_on=['game_id', 'home_team'], right_on=['game_id', 'team'], how='inner')
combined_data_with_advanced_stats.drop('team', axis=1, inplace=True)

In [None]:
# Prepare dataset for model training
df = combined_data_with_advanced_stats.dropna()
X = df[['line_yards_total', 'second_level_yards_total', 'open_field_yards_total', 'total_points', 'total_plays']]
y = df['point_differential']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# Train model
model = LinearRegression()
model.fit(X_train, y_train)