In [None]:
import cfbd
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = 'JOo7NPQiRwoWx1kDmv1VDiujh+ra/nfYLRGy0TVt2KhLM+Yiu6BpXOC7W4YtjBdq'
configuration.api_key_prefix['Authorization'] = 'Bearer'

In [None]:
api_instance = cfbd.GamesApi(cfbd.ApiClient(configuration))

In [None]:
def fetch_games_data_for_years(start_year, end_year):
    dataframes = {}
    for year in range(start_year, end_year + 1):
        games = api_instance.get_games(year=year)
        big_12_games = [game for game in games if game.home_conference == 'Big 12' or game.away_conference == 'Big 12']
        
        game_data = pd.DataFrame.from_records(
            [dict(game_id=game.id,
                  year=game.season,
                  home_team=game.home_team,
                  away_team=game.away_team,
                  home_points=game.home_points,
                  away_points=game.away_points) for game in big_12_games]
        )
        dataframes[year] = game_data
    return dataframes

years_data = fetch_games_data_for_years(2016, 2020)


In [None]:
print(years_data)

In [None]:
combined_data = pd.concat(years_data.values(), ignore_index=True)


In [None]:
def calculate_point_differential(df):
    df['point_differential'] = df['home_points'] - df['away_points']

calculate_point_differential(combined_data)


In [None]:
api_instance = cfbd.StatsApi(cfbd.ApiClient(configuration))
advanced_stats_data = {}
for year in range(2016, 2020):
    advanced_stats = api_instance.get_advanced_team_game_stats(year=year)
    advanced_stats_data[year] = pd.DataFrame.from_records(
        [dict(game_id=stat.game_id,
              team=stat.team,
              opponent=stat.opponent,
              line_yards_total=stat.offense.line_yards_total,
              second_level_yards_total=stat.offense.second_level_yards_total,
              open_field_yards_total=stat.offense.open_field_yards_total,
              total_points=stat.offense.total_ppa,
              total_plays=stat.offense.plays) for stat in advanced_stats]
    )

In [None]:
advanced_stats_combined = pd.concat(advanced_stats_data.values(), ignore_index=True)
combined_data_with_advanced_stats = combined_data.merge(advanced_stats_combined, left_on=['game_id', 'home_team'], right_on=['game_id', 'team'], how='inner')
combined_data_with_advanced_stats.drop('team', axis=1, inplace=True)
df = combined_data_with_advanced_stats.dropna()

In [None]:
X = df[['line_yards_total', 'second_level_yards_total', 'open_field_yards_total', 'total_points', 'total_plays']]
y = df['point_differential']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [None]:
# convert training and testing sets to dataframes
train_df = pd.concat([X_train, y_train], axis=1)
test_df = pd.concat([X_test, y_test], axis=1)

print(train_df, test_df)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)