In [1]:
from sportsipy.nfl.boxscore import Boxscores, Boxscore
from sportsipy.nfl.schedule import Schedule, Game
from sportsipy.nfl.teams import Teams
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

In [2]:
# The game_Data function is extracting game statistics for each game. It loops through each game and each week grabbing the statistics referenced.
# This function creates the foundation for our final dataset.
def game_data(game_df,game_stats):
    try:
        # Creates a dataframe for the away_team and the home_team. Sets column names to be exact matches between the two.
        away_team_df = game_df[['away_name', 'away_abbr', 'away_score']].rename(columns = {'away_name': 'team_name', 'away_abbr': 'team_abbr', 'away_score': 'score'})
        home_team_df = game_df[['home_name','home_abbr', 'home_score']].rename(columns = {'home_name': 'team_name', 'home_abbr': 'team_abbr', 'home_score': 'score'})
        try:
            if game_df.loc[0,'away_score'] > game_df.loc[0,'home_score']:
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [1], 'game_lost' : [0]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [1]}),left_index = True, right_index = True)
            elif game_df.loc[0,'away_score'] < game_df.loc[0,'home_score']:
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [1]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [1], 'game_lost' : [0]}),left_index = True, right_index = True)
            else: 
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [0]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [0]}),left_index = True, right_index = True)
        except TypeError:
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [np.nan], 'game_lost' : [np.nan]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [np.nan], 'game_lost' : [np.nan]}),left_index = True, right_index = True)        

        # Creating the away_team & home_team stats dataframe. Grabbing the selected stats and then renaming them to match home == away dataframe column names.
        away_stats_df = game_stats.dataframe[['away_first_downs', 'away_fourth_down_attempts',
               'away_fourth_down_conversions', 'away_fumbles', 'away_fumbles_lost',
               'away_interceptions', 'away_net_pass_yards', 'away_pass_attempts',
               'away_pass_completions', 'away_pass_touchdowns', 'away_pass_yards',
               'away_penalties', 'away_points', 'away_rush_attempts',
               'away_rush_touchdowns', 'away_rush_yards', 'away_third_down_attempts',
               'away_third_down_conversions', 'away_time_of_possession',
               'away_times_sacked', 'away_total_yards', 'away_turnovers',
               'away_yards_from_penalties', 'away_yards_lost_from_sacks']].reset_index().drop(columns ='index').rename(columns = {
               'away_first_downs': 'first_downs', 'away_fourth_down_attempts':'fourth_down_attempts',
               'away_fourth_down_conversions':'fourth_down_conversions' , 'away_fumbles': 'fumbles', 'away_fumbles_lost': 'fumbles_lost',
               'away_interceptions': 'interceptions', 'away_net_pass_yards':'net_pass_yards' , 'away_pass_attempts': 'pass_attempts',
               'away_pass_completions':'pass_completions' , 'away_pass_touchdowns': 'pass_touchdowns', 'away_pass_yards': 'pass_yards',
               'away_penalties': 'penalties', 'away_points': 'points', 'away_rush_attempts': 'rush_attempts',
               'away_rush_touchdowns': 'rush_touchdowns', 'away_rush_yards': 'rush_yards', 'away_third_down_attempts': 'third_down_attempts',
               'away_third_down_conversions': 'third_down_conversions', 'away_time_of_possession': 'time_of_possession',
               'away_times_sacked': 'times_sacked', 'away_total_yards': 'total_yards', 'away_turnovers': 'turnovers',
               'away_yards_from_penalties':'yards_from_penalties', 'away_yards_lost_from_sacks': 'yards_lost_from_sacks'})

        home_stats_df = game_stats.dataframe[['home_first_downs', 'home_fourth_down_attempts',
               'home_fourth_down_conversions', 'home_fumbles', 'home_fumbles_lost',
               'home_interceptions', 'home_net_pass_yards', 'home_pass_attempts',
               'home_pass_completions', 'home_pass_touchdowns', 'home_pass_yards',
               'home_penalties', 'home_points', 'home_rush_attempts',
               'home_rush_touchdowns', 'home_rush_yards', 'home_third_down_attempts',
               'home_third_down_conversions', 'home_time_of_possession',
               'home_times_sacked', 'home_total_yards', 'home_turnovers',
               'home_yards_from_penalties', 'home_yards_lost_from_sacks']].reset_index().drop(columns = 'index').rename(columns = {
               'home_first_downs': 'first_downs', 'home_fourth_down_attempts':'fourth_down_attempts',
               'home_fourth_down_conversions':'fourth_down_conversions' , 'home_fumbles': 'fumbles', 'home_fumbles_lost': 'fumbles_lost',
               'home_interceptions': 'interceptions', 'home_net_pass_yards':'net_pass_yards' , 'home_pass_attempts': 'pass_attempts',
               'home_pass_completions':'pass_completions' , 'home_pass_touchdowns': 'pass_touchdowns', 'home_pass_yards': 'pass_yards',
               'home_penalties': 'penalties', 'home_points': 'points', 'home_rush_attempts': 'rush_attempts',
               'home_rush_touchdowns': 'rush_touchdowns', 'home_rush_yards': 'rush_yards', 'home_third_down_attempts': 'third_down_attempts',
               'home_third_down_conversions': 'third_down_conversions', 'home_time_of_possession': 'time_of_possession',
               'home_times_sacked': 'times_sacked', 'home_total_yards': 'total_yards', 'home_turnovers': 'turnovers',
               'home_yards_from_penalties':'yards_from_penalties', 'home_yards_lost_from_sacks': 'yards_lost_from_sacks'})
        
        # Merge the team_df & stats_df for both home & away teams. Set the left_index & right_index to True so that both dataframes merge on the same indices. 
        away_team_df = pd.merge(away_team_df, away_stats_df,left_index = True, right_index = True)
        home_team_df = pd.merge(home_team_df, home_stats_df,left_index = True, right_index = True)
        try:
            # Converting time_of_possession from MM:SS format into seconds(int). 
            away_team_df['time_of_possession'] = (int(away_team_df['time_of_possession'].loc[0][0:2]) * 60) + int(away_team_df['time_of_possession'].loc[0][3:5])
            home_team_df['time_of_possession'] = (int(home_team_df['time_of_possession'].loc[0][0:2]) * 60) + int(home_team_df['time_of_possession'].loc[0][3:5])
        except TypeError:
            away_team_df['time_of_possession'] = np.nan
            home_team_df['time_of_possession'] = np.nan
    except TypeError:
        away_team_df = pd.DataFrame()
        home_team_df = pd.DataFrame()
    return away_team_df, home_team_df


def game_data_up_to_week(weeks,year):
    weeks_games_df = pd.DataFrame()
    for w in range(len(weeks)):
        date_string = str(weeks[w]) + '-' + str(year)
        week_scores = Boxscores(weeks[w],year)
        week_games_df = pd.DataFrame()
        for g in range(len(week_scores.games[date_string])):
            game_str = week_scores.games[date_string][g]['boxscore']
            game_stats = Boxscore(game_str)
            game_df = pd.DataFrame(week_scores.games[date_string][g], index = [0])
            away_team_df, home_team_df = game_data(game_df,game_stats)
            away_team_df['week'] = weeks[w]
            home_team_df['week'] = weeks[w]
            week_games_df = pd.concat([week_games_df,away_team_df])
            week_games_df = pd.concat([week_games_df,home_team_df])
        weeks_games_df = pd.concat([weeks_games_df,week_games_df])
    return weeks_games_df

def get_schedule(year):
    weeks = list(range(1,19))
    schedule_df = pd.DataFrame()
    for w in range(len(weeks)):
        date_string = str(weeks[w]) + '-' + str(year)
        week_scores = Boxscores(weeks[w],year)
        week_games_df = pd.DataFrame()
        for g in range(len(week_scores.games[date_string])):
            game = pd.DataFrame(week_scores.games[date_string][g], index = [0])[['away_name', 'away_abbr','home_name', 'home_abbr','winning_name', 'winning_abbr' ]]
            game['week'] = weeks[w]
            week_games_df = pd.concat([week_games_df,game])
        schedule_df = pd.concat([schedule_df, week_games_df]).reset_index().drop(columns = 'index') 
    return schedule_df 

def agg_weekly_data(schedule_df,weeks_games_df,current_week,weeks):
    schedule_df = schedule_df[schedule_df.week < current_week]
    agg_games_df = pd.DataFrame()
    for w in range(1,len(weeks)):
        games_df = schedule_df[schedule_df.week == weeks[w]]
        agg_weekly_df = weeks_games_df[weeks_games_df.week < weeks[w]].drop(columns = ['score','week','game_won', 'game_lost']).groupby(by=["team_name", "team_abbr"]).mean().reset_index()
        win_loss_df = weeks_games_df[weeks_games_df.week < weeks[w]][["team_name", "team_abbr",'game_won', 'game_lost']].groupby(by=["team_name", "team_abbr"]).sum().reset_index()
        win_loss_df['win_perc'] = win_loss_df['game_won'] / (win_loss_df['game_won'] + win_loss_df['game_lost'])
        win_loss_df = win_loss_df.drop(columns = ['game_won', 'game_lost'])

        try:
            agg_weekly_df['fourth_down_perc'] = agg_weekly_df['fourth_down_conversions'] / agg_weekly_df['fourth_down_attempts']  
        except ZeroDivisionError:
            agg_weekly_df['fourth_down_perc'] = 0 
        agg_weekly_df['fourth_down_perc'] = agg_weekly_df['fourth_down_perc'].fillna(0)

        try:
            agg_weekly_df['third_down_perc'] = agg_weekly_df['third_down_conversions'] / agg_weekly_df['third_down_attempts']  
        except ZeroDivisionError:
            agg_weekly_df['third_down_perc'] = 0
        agg_weekly_df['third_down_perc'] = agg_weekly_df['third_down_perc'].fillna(0)  

        agg_weekly_df = agg_weekly_df.drop(columns = ['fourth_down_attempts', 'fourth_down_conversions', 'third_down_attempts', 'third_down_conversions'])
        agg_weekly_df = pd.merge(win_loss_df,agg_weekly_df,left_on = ['team_name', 'team_abbr'], right_on = ['team_name', 'team_abbr'])

        away_df = pd.merge(games_df,agg_weekly_df,how = 'inner', left_on = ['away_name', 'away_abbr'], right_on = ['team_name', 'team_abbr']).drop(columns = ['team_name', 'team_abbr']).rename(columns = {
                'win_perc': 'away_win_perc',
               'first_downs': 'away_first_downs', 'fumbles': 'away_fumbles', 'fumbles_lost':'away_fumbles_lost', 'interceptions':'away_interceptions',
               'net_pass_yards': 'away_net_pass_yards', 'pass_attempts':'away_pass_attempts', 'pass_completions':'away_pass_completions',
               'pass_touchdowns':'away_pass_touchdowns', 'pass_yards':'away_pass_yards', 'penalties':'away_penalties', 'points':'away_points', 'rush_attempts':'away_rush_attempts',
               'rush_touchdowns':'away_rush_touchdowns', 'rush_yards':'away_rush_yards', 'time_of_possession':'away_time_of_possession', 'times_sacked':'away_times_sacked',
               'total_yards':'away_total_yards', 'turnovers':'away_turnovers', 'yards_from_penalties':'away_yards_from_penalties',
               'yards_lost_from_sacks': 'away_yards_lost_from_sacks', 'fourth_down_perc':'away_fourth_down_perc', 'third_down_perc':'away_third_down_perc'})

        home_df = pd.merge(games_df,agg_weekly_df,how = 'inner', left_on = ['home_name', 'home_abbr'], right_on = ['team_name', 'team_abbr']).drop(columns = ['team_name', 'team_abbr']).rename(columns = {
                'win_perc': 'home_win_perc',
               'first_downs': 'home_first_downs', 'fumbles': 'home_fumbles', 'fumbles_lost':'home_fumbles_lost', 'interceptions':'home_interceptions',
               'net_pass_yards': 'home_net_pass_yards', 'pass_attempts':'home_pass_attempts', 'pass_completions':'home_pass_completions',
               'pass_touchdowns':'home_pass_touchdowns', 'pass_yards':'home_pass_yards', 'penalties':'home_penalties', 'points':'home_points', 'rush_attempts':'home_rush_attempts',
               'rush_touchdowns':'home_rush_touchdowns', 'rush_yards':'home_rush_yards', 'time_of_possession':'home_time_of_possession', 'times_sacked':'home_times_sacked',
               'total_yards':'home_total_yards', 'turnovers':'home_turnovers', 'yards_from_penalties':'home_yards_from_penalties',
               'yards_lost_from_sacks': 'home_yards_lost_from_sacks', 'fourth_down_perc':'home_fourth_down_perc', 'third_down_perc':'home_third_down_perc'})

        agg_weekly_df = pd.merge(away_df,home_df,left_on = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'winning_name',
               'winning_abbr', 'week'], right_on = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'winning_name',
               'winning_abbr', 'week'])

        agg_weekly_df['win_perc_dif'] = agg_weekly_df['away_win_perc'] - agg_weekly_df['home_win_perc']
        agg_weekly_df['first_downs_dif'] = agg_weekly_df['away_first_downs'] - agg_weekly_df['home_first_downs']
        agg_weekly_df['fumbles_dif'] = agg_weekly_df['away_fumbles'] - agg_weekly_df['home_fumbles']
        agg_weekly_df['interceptions_dif'] = agg_weekly_df['away_interceptions'] - agg_weekly_df['home_interceptions']
        agg_weekly_df['net_pass_yards_dif'] = agg_weekly_df['away_net_pass_yards'] - agg_weekly_df['home_net_pass_yards']
        agg_weekly_df['pass_attempts_dif'] = agg_weekly_df['away_pass_attempts'] - agg_weekly_df['home_pass_attempts']
        agg_weekly_df['pass_completions_dif'] = agg_weekly_df['away_pass_completions'] - agg_weekly_df['home_pass_completions']
        agg_weekly_df['pass_touchdowns_dif'] = agg_weekly_df['away_pass_touchdowns'] - agg_weekly_df['home_pass_touchdowns']
        agg_weekly_df['pass_yards_dif'] = agg_weekly_df['away_pass_yards'] - agg_weekly_df['home_pass_yards']
        agg_weekly_df['penalties_dif'] = agg_weekly_df['away_penalties'] - agg_weekly_df['home_penalties']
        agg_weekly_df['points_dif'] = agg_weekly_df['away_points'] - agg_weekly_df['home_points']
        agg_weekly_df['rush_attempts_dif'] = agg_weekly_df['away_rush_attempts'] - agg_weekly_df['home_rush_attempts']
        agg_weekly_df['rush_touchdowns_dif'] = agg_weekly_df['away_rush_touchdowns'] - agg_weekly_df['home_rush_touchdowns']
        agg_weekly_df['rush_yards_dif'] = agg_weekly_df['away_rush_yards'] - agg_weekly_df['home_rush_yards']
        agg_weekly_df['time_of_possession_dif'] = agg_weekly_df['away_time_of_possession'] - agg_weekly_df['home_time_of_possession']
        agg_weekly_df['times_sacked_dif'] = agg_weekly_df['away_times_sacked'] - agg_weekly_df['home_times_sacked']
        agg_weekly_df['total_yards_dif'] = agg_weekly_df['away_total_yards'] - agg_weekly_df['home_total_yards']
        agg_weekly_df['turnovers_dif'] = agg_weekly_df['away_turnovers'] - agg_weekly_df['home_turnovers']
        agg_weekly_df['yards_from_penalties_dif'] = agg_weekly_df['away_yards_from_penalties'] - agg_weekly_df['home_yards_from_penalties']
        agg_weekly_df['yards_lost_from_sacks_dif'] = agg_weekly_df['away_yards_lost_from_sacks'] - agg_weekly_df['home_yards_lost_from_sacks']
        agg_weekly_df['fourth_down_perc_dif'] = agg_weekly_df['away_fourth_down_perc'] - agg_weekly_df['home_fourth_down_perc']
        agg_weekly_df['third_down_perc_dif'] = agg_weekly_df['away_third_down_perc'] - agg_weekly_df['home_third_down_perc']

        agg_weekly_df = agg_weekly_df.drop(columns = ['away_win_perc',
               'away_first_downs', 'away_fumbles', 'away_fumbles_lost', 'away_interceptions',
               'away_net_pass_yards', 'away_pass_attempts','away_pass_completions',
               'away_pass_touchdowns', 'away_pass_yards', 'away_penalties', 'away_points', 'away_rush_attempts',
               'away_rush_touchdowns', 'away_rush_yards', 'away_time_of_possession', 'away_times_sacked',
               'away_total_yards', 'away_turnovers', 'away_yards_from_penalties',
               'away_yards_lost_from_sacks','away_fourth_down_perc', 'away_third_down_perc','home_win_perc',
               'home_first_downs', 'home_fumbles', 'home_fumbles_lost', 'home_interceptions',
               'home_net_pass_yards', 'home_pass_attempts','home_pass_completions',
               'home_pass_touchdowns', 'home_pass_yards', 'home_penalties', 'home_points', 'home_rush_attempts',
               'home_rush_touchdowns', 'home_rush_yards', 'home_time_of_possession', 'home_times_sacked',
               'home_total_yards', 'home_turnovers', 'home_yards_from_penalties',
               'home_yards_lost_from_sacks','home_fourth_down_perc', 'home_third_down_perc'])
        
        if (agg_weekly_df['winning_name'].isnull().values.any() and weeks[w] > 3):
            agg_weekly_df['result'] = np.nan
            print(f"Week {weeks[w]} games have not finished yet.")
        else:
            agg_weekly_df['result'] = agg_weekly_df['winning_name'] == agg_weekly_df['away_name']
            agg_weekly_df['result'] = agg_weekly_df['result'].astype('float')
        agg_weekly_df = agg_weekly_df.drop(columns = ['winning_name', 'winning_abbr'])
        agg_games_df = pd.concat([agg_games_df, agg_weekly_df])
    agg_games_df = agg_games_df.reset_index().drop(columns = 'index')
    # What is .drop(index = 20) doing?
    agg_games_df = agg_games_df.drop(index = 20, axis=0)
    return agg_games_df

def get_elo():
    elo_df = pd.read_csv('https://projects.fivethirtyeight.com/nfl-api/nfl_elo_latest.csv')
    elo_df = elo_df.drop(columns = ['season','neutral' ,'playoff', 'elo_prob1', 'elo_prob2', 'elo1_post', 'elo2_post',
           'qbelo1_pre', 'qbelo2_pre', 'qb1', 'qb2', 'qb1_adj', 'qb2_adj', 'qbelo_prob1', 'qbelo_prob2',
           'qb1_game_value', 'qb2_game_value', 'qb1_value_post', 'qb2_value_post',
           'qbelo1_post', 'qbelo2_post', 'score1', 'score2', 'quality', 'importance', 'total_rating'])
    elo_df.date = pd.to_datetime(elo_df.date)
    elo_df = elo_df[elo_df.date < '01-05-2022']

    elo_df['team1'] = elo_df['team1'].replace(['KC', 'JAX', 'CAR', 'BAL', 'BUF', 'MIN', 'DET', 'ATL', 'NE', 'WSH',
           'CIN', 'NO', 'SF', 'LAR', 'NYG', 'DEN', 'CLE', 'IND', 'TEN', 'NYJ',
           'TB', 'MIA', 'PIT', 'PHI', 'GB', 'CHI', 'DAL', 'ARI', 'LAC', 'HOU',
           'SEA', 'OAK'],
            ['kan','jax','car', 'rav', 'buf', 'min', 'det', 'atl', 'nwe', 'was', 
            'cin', 'nor', 'sfo', 'ram', 'nyg', 'den', 'cle', 'clt', 'oti', 'nyj', 
             'tam','mia', 'pit', 'phi', 'gnb', 'chi', 'dal', 'crd', 'sdg', 'htx', 'sea', 'rai' ])
    elo_df['team2'] = elo_df['team2'].replace(['KC', 'JAX', 'CAR', 'BAL', 'BUF', 'MIN', 'DET', 'ATL', 'NE', 'WSH',
           'CIN', 'NO', 'SF', 'LAR', 'NYG', 'DEN', 'CLE', 'IND', 'TEN', 'NYJ',
           'TB', 'MIA', 'PIT', 'PHI', 'GB', 'CHI', 'DAL', 'ARI', 'LAC', 'HOU',
           'SEA', 'OAK'],
            ['kan','jax','car', 'rav', 'buf', 'min', 'det', 'atl', 'nwe', 'was', 
            'cin', 'nor', 'sfo', 'ram', 'nyg', 'den', 'cle', 'clt', 'oti', 'nyj', 
             'tam','mia', 'pit', 'phi', 'gnb', 'chi', 'dal', 'crd', 'sdg', 'htx', 'sea', 'rai' ])
    return elo_df

def merge_rankings(agg_games_df,elo_df):
    agg_games_df = pd.merge(agg_games_df, elo_df, how = 'inner', left_on = ['home_abbr', 'away_abbr'], right_on = ['team1', 'team2']).drop(columns = ['date','team1', 'team2'])
    agg_games_df['elo_dif'] = agg_games_df['elo2_pre'] - agg_games_df['elo1_pre']
    agg_games_df['qb_dif'] = agg_games_df['qb2_value_pre'] - agg_games_df['qb1_value_pre']
    agg_games_df = agg_games_df.drop(columns = ['elo1_pre', 'elo2_pre', 'qb1_value_pre', 'qb2_value_pre'])
    return agg_games_df

# def prep_test_train(current_week,weeks,year):
#     current_week = current_week + 1
#     schedule_df  = get_schedule(year)
#     weeks_games_df = game_data_up_to_week(weeks,year)
#     agg_games_df = agg_weekly_data(schedule_df,weeks_games_df,current_week,weeks)
#     elo_df = get_elo()
#     agg_games_df = merge_rankings(agg_games_df, elo_df)
#     train_df = agg_games_df[agg_games_df.result.notna()]
#     current_week = current_week - 1
#     test_df = agg_games_df[agg_games_df.week == current_week]
#     return test_df, train_df

def prep_test_train(current_week,weeks,year):
    current_week = current_week + 1
    schedule_df  = get_schedule(year)
    weeks_games_df = game_data_up_to_week(weeks,year)
    agg_games_df = agg_weekly_data(schedule_df,weeks_games_df,current_week,weeks)
    elo_df = get_elo()
    agg_games_df = merge_rankings(agg_games_df, elo_df)
    train_df = agg_games_df[agg_games_df.result.notna()]
    current_week = current_week - 1
    test_df = agg_games_df[agg_games_df.week == current_week]
    return test_df, train_df

def display(y_pred,X_test):
    for g in range(len(y_pred)):
        win_prob = round(y_pred[g],2)
        away_team = X_test.reset_index().drop(columns = 'index').loc[g,'away_name']
        home_team = X_test.reset_index().drop(columns = 'index').loc[g,'home_name']
        print(f'The {away_team} have a probability of {win_prob} of beating the {home_team}.')
    

In [3]:
#game = game_data_up_to_week(range(1,4),2021)
#game


In [4]:
current_week = 4
weeks = list(range(1,current_week + 1))
year = 2021

pred_games_df, comp_games_df = prep_test_train(current_week,weeks,year)

In [5]:
pred_games_df

Unnamed: 0,away_name,away_abbr,home_name,home_abbr,week,win_perc_dif,first_downs_dif,fumbles_dif,interceptions_dif,net_pass_yards_dif,pass_attempts_dif,pass_completions_dif,pass_touchdowns_dif,pass_yards_dif,penalties_dif,points_dif,rush_attempts_dif,rush_touchdowns_dif,rush_yards_dif,time_of_possession_dif,times_sacked_dif,total_yards_dif,turnovers_dif,yards_from_penalties_dif,yards_lost_from_sacks_dif,fourth_down_perc_dif,third_down_perc_dif,result,elo_dif,qb_dif
31,Jacksonville Jaguars,jax,Cincinnati Bengals,cin,4,-0.666667,2.0,1.333333,1.0,22.333333,14.333333,3.666667,-0.666667,9.666667,1.333333,-5.0,-6.333333,0.0,-1.333333,-299.333333,-1.666667,21.0,1.333333,10.333333,-12.666667,0.5,-0.060606,0.0,-167.281985,-53.649267
32,Houston Texans,htx,Buffalo Bills,buf,4,-0.333333,-6.333333,-0.333333,0.0,-41.333333,-12.666667,-7.333333,-0.666667,-40.666667,0.666667,-9.0,-0.666667,-0.333333,-32.666667,-262.0,0.666667,-74.0,-0.333333,5.0,0.666667,-0.4,-0.044466,0.0,-242.36313,-176.82093
33,Washington Football Team,was,Atlanta Falcons,atl,4,0.0,-1.0,0.666667,0.0,3.0,-9.333333,-7.0,0.333333,-8.0,0.0,6.333333,2.666667,0.333333,14.333333,-280.666667,-1.333333,17.333333,0.333333,-0.333333,-11.0,0.514286,-0.068627,1.0,-4.518876,-99.570473
34,Kansas City Chiefs,kan,Philadelphia Eagles,phi,4,0.0,7.333333,0.333333,0.333333,59.0,4.333333,4.666667,1.333333,53.333333,-6.333333,9.333333,-0.333333,0.0,-22.333333,163.0,-0.333333,36.666667,1.333333,-31.333333,-5.666667,0.0,0.146319,1.0,181.431277,101.201007
35,Tennessee Titans,oti,New York Jets,nyj,4,0.666667,9.666667,1.0,-1.333333,53.333333,-0.666667,3.0,0.666667,44.666667,-2.0,17.0,12.666667,1.333333,79.333333,365.666667,-1.666667,132.666667,0.0,-23.666667,-8.666667,0.114286,0.091667,0.0,212.19837,140.617767
36,New York Giants,nyg,New Orleans Saints,nor,4,-0.666667,4.666667,1.333333,-0.666667,129.0,13.333333,9.666667,-1.666667,130.666667,1.333333,-5.666667,-6.333333,0.333333,-12.666667,-8.333333,0.333333,116.333333,0.0,5.333333,1.666667,-0.166667,0.065015,1.0,-258.686289,10.739585
37,Cleveland Browns,cle,Minnesota Vikings,min,4,0.333333,-0.666667,0.333333,0.666667,-61.666667,-13.333333,-10.0,-2.0,-49.666667,-0.666667,-0.333333,6.333333,2.333333,46.666667,54.0,1.333333,-15.0,1.0,6.0,12.0,-0.428571,-0.000758,1.0,52.368869,-71.688376
38,Indianapolis Colts,clt,Miami Dolphins,mia,4,-0.333333,4.333333,-1.0,0.0,39.666667,-3.0,-2.333333,0.666667,39.333333,-4.333333,3.666667,1.333333,-0.666667,10.333333,143.0,-0.666667,50.0,-0.333333,-24.0,-0.333333,-0.041667,-0.021868,1.0,5.178283,-25.261063
39,Carolina Panthers,car,Dallas Cowboys,dal,4,0.333333,-2.666667,0.0,-0.333333,7.333333,-1.333333,-4.333333,-1.0,3.333333,-0.333333,-7.0,1.0,0.333333,-33.666667,145.333333,-0.333333,-26.333333,-0.333333,-5.666667,-4.0,-0.333333,-0.113462,0.0,28.888137,-95.046304
40,Detroit Lions,det,Chicago Bears,chi,4,-0.333333,8.666667,-0.333333,0.0,156.666667,13.333333,12.0,1.333333,133.666667,1.666667,9.0,-1.0,0.333333,4.666667,111.666667,-3.0,161.333333,0.0,1.333333,-23.0,0.285714,-0.000795,0.0,-110.813536,110.999259


In [6]:
comp_games_df

Unnamed: 0,away_name,away_abbr,home_name,home_abbr,week,win_perc_dif,first_downs_dif,fumbles_dif,interceptions_dif,net_pass_yards_dif,pass_attempts_dif,pass_completions_dif,pass_touchdowns_dif,pass_yards_dif,penalties_dif,points_dif,rush_attempts_dif,rush_touchdowns_dif,rush_yards_dif,time_of_possession_dif,times_sacked_dif,total_yards_dif,turnovers_dif,yards_from_penalties_dif,yards_lost_from_sacks_dif,fourth_down_perc_dif,third_down_perc_dif,result,elo_dif,qb_dif
0,New York Giants,nyg,Washington Football Team,was,2,0.0,4.0,-2.0,0.0,121.0,16.0,8.0,0.0,132.0,-4.0,-3.0,-7.0,1.0,-66.0,55.0,1.0,55.0,0.0,-9.0,11.0,-0.666667,0.2,0.0,-50.848658,100.316973
1,Cincinnati Bengals,cin,Chicago Bears,chi,2,1.0,-4.0,-1.0,-1.0,29.0,-13.0,-9.0,2.0,45.0,0.0,13.0,10.0,-1.0,15.0,-23.0,2.0,44.0,-2.0,-20.0,16.0,0.666667,-0.24026,0.0,-51.742731,34.616025
2,New Orleans Saints,nor,Carolina Panthers,car,2,0.0,4.0,-1.0,0.0,-119.0,-14.0,-9.0,4.0,-128.0,0.0,19.0,12.0,-1.0,60.0,196.0,-1.0,-59.0,-1.0,-2.0,-9.0,1.0,0.214286,0.0,204.627654,54.983874
3,Las Vegas Raiders,rai,Pittsburgh Steelers,pit,2,0.0,10.0,0.0,1.0,232.0,24.0,16.0,1.0,247.0,5.0,10.0,0.0,2.0,7.0,445.0,1.0,239.0,1.0,30.0,15.0,0.0,0.133333,1.0,-62.282436,-14.386838
4,San Francisco 49ers,sfo,Philadelphia Eagles,phi,2,0.0,-3.0,0.0,0.0,50.0,-9.0,-9.0,-1.0,55.0,-7.0,9.0,-3.0,1.0,-42.0,-270.0,0.0,8.0,2.0,-10.0,5.0,0.0,-0.128205,1.0,36.874709,-13.318243
5,New England Patriots,nwe,New York Jets,nyj,2,0.0,8.0,3.0,-1.0,61.0,2.0,9.0,-1.0,23.0,3.0,2.0,13.0,0.0,80.0,483.0,-5.0,141.0,1.0,54.0,-38.0,-0.5,0.379808,1.0,122.883221,21.246444
6,Buffalo Bills,buf,Miami Dolphins,mia,2,-1.0,6.0,4.0,-1.0,69.0,24.0,14.0,0.0,68.0,3.0,-1.0,2.0,-1.0,43.0,600.0,1.0,112.0,0.0,53.0,-1.0,0.333333,0.080808,1.0,61.134924,115.468364
7,Denver Broncos,den,Jacksonville Jaguars,jax,2,1.0,4.0,0.0,-3.0,-64.0,-15.0,0.0,-1.0,-68.0,-5.0,6.0,12.0,1.0,89.0,612.0,1.0,25.0,-2.0,-31.0,-4.0,0.0,0.193939,1.0,156.607317,63.307311
8,Los Angeles Rams,ram,Indianapolis Colts,clt,2,1.0,-5.0,-1.0,0.0,89.0,-12.0,-5.0,1.0,70.0,2.0,18.0,-7.0,1.0,-39.0,-661.0,-2.0,50.0,-1.0,20.0,-19.0,1.0,0.160839,1.0,58.818705,63.118542
9,Houston Texans,htx,Cleveland Browns,cle,2,1.0,-2.0,-1.0,-1.0,-15.0,5.0,0.0,2.0,-30.0,0.0,8.0,15.0,-2.0,7.0,471.0,-1.0,-8.0,-2.0,20.0,-15.0,-0.75,0.285714,0.0,-75.880653,-34.836432


In [7]:
train_df = comp_games_df
test_df = pred_games_df

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

In [8]:
from sklearn.linear_model import LogisticRegression

# Figure out what these Logistic Regression settings do.
# penalty='l1' - Imposes a penalty to the logistic model for having too many variables. Shrinks the coefficients of less contributive variables. Results in more sparse data.
# dual=False - Duality vs Primal
# tol=0.001 - Tolerance for stopping criteria. This tells scikit to stop searching for a minimum (or maximum) once some tolerance is achieved, i.e. once you're close enough.
clf = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=1000, multi_class='ovr', verbose=0)

clf.fit(X_train, np.ravel(y_train.values))
y_pred = clf.predict_proba(X_test)
y_pred = y_pred[:,1]

display(y_pred,test_df)

The Jacksonville Jaguars have a probability of 0.07 of beating the Cincinnati Bengals.
The Houston Texans have a probability of 0.09 of beating the Buffalo Bills.
The Washington Football Team have a probability of 0.23 of beating the Atlanta Falcons.
The Kansas City Chiefs have a probability of 0.86 of beating the Philadelphia Eagles.
The Tennessee Titans have a probability of 0.5 of beating the New York Jets.
The New York Giants have a probability of 0.8 of beating the New Orleans Saints.
The Cleveland Browns have a probability of 0.99 of beating the Minnesota Vikings.
The Indianapolis Colts have a probability of 0.94 of beating the Miami Dolphins.
The Carolina Panthers have a probability of 0.16 of beating the Dallas Cowboys.
The Detroit Lions have a probability of 0.44 of beating the Chicago Bears.
The Seattle Seahawks have a probability of 0.93 of beating the San Francisco 49ers.
The Arizona Cardinals have a probability of 1.0 of beating the Los Angeles Rams.
The Pittsburgh Steeler

In [9]:
y_pred

array([0.07382489, 0.08979587, 0.23082188, 0.85698431, 0.49993877,
       0.79909087, 0.99111859, 0.93973498, 0.15878598, 0.43989125,
       0.9347521 , 0.99521935, 0.14283935, 0.97705979, 0.6746568 ,
       0.36345845])

In [10]:
# Create dataframe to hold prediction results
results_df = test_df[['week','away_name','home_name']]
results_df['away_team_win_%'] = y_pred
results_df['home_team_win_%'] = 1 - results_df['away_team_win_%']
results_df['predicted_winner'] = 0

# Loop through results dataframe and add predicted_winner name to column
for index, row in results_df.iterrows():
    if results_df.loc[index,'away_team_win_%'] > results_df.loc[index,'home_team_win_%']:
        results_df.loc[index,'predicted_winner'] = results_df.loc[index,'away_name']
    else:
        results_df.loc[index,'predicted_winner'] = results_df.loc[index,'home_name']

# Set Index to the 'week' column
results_df=results_df.set_index('week')      

# Use get_schedule function to pull dataframe that shows actual weekly winners. Includes all weeks.
weekly_winner = get_schedule('2021')
weekly_winner = weekly_winner.drop(columns=['away_abbr','home_abbr','winning_abbr'])
weekly_winner = weekly_winner.rename(columns={'away_name':'ww_away_name','home_name':'ww_home_name'})

# Slices weekly_winner dataframe based on current week model is predicting.
current_week_wins = weekly_winner.loc[weekly_winner['week'] == current_week]
current_week_wins = current_week_wins.set_index('week')

# Sort both dataframes by away_name
results_df = results_df.sort_values(by='away_name')
current_week_wins = current_week_wins.sort_values(by='ww_away_name')

# Combine results & weekly_winner df's. Drop dupe columns and rename winning_name to 'actual_winner'
actual_results_df = (pd.concat([results_df,current_week_wins],axis=1)).drop(columns=['ww_away_name','ww_home_name']).rename(columns={'winning_name':'actual_winner'})
actual_results_df




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[r

Unnamed: 0_level_0,away_name,home_name,away_team_win_%,home_team_win_%,predicted_winner,actual_winner
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4,Arizona Cardinals,Los Angeles Rams,0.995219,0.004781,Arizona Cardinals,Arizona Cardinals
4,Baltimore Ravens,Denver Broncos,0.97706,0.02294,Baltimore Ravens,Baltimore Ravens
4,Carolina Panthers,Dallas Cowboys,0.158786,0.841214,Dallas Cowboys,Dallas Cowboys
4,Cleveland Browns,Minnesota Vikings,0.991119,0.008881,Cleveland Browns,Cleveland Browns
4,Detroit Lions,Chicago Bears,0.439891,0.560109,Chicago Bears,Chicago Bears
4,Houston Texans,Buffalo Bills,0.089796,0.910204,Buffalo Bills,Buffalo Bills
4,Indianapolis Colts,Miami Dolphins,0.939735,0.060265,Indianapolis Colts,Indianapolis Colts
4,Jacksonville Jaguars,Cincinnati Bengals,0.073825,0.926175,Cincinnati Bengals,Cincinnati Bengals
4,Kansas City Chiefs,Philadelphia Eagles,0.856984,0.143016,Kansas City Chiefs,Kansas City Chiefs
4,Las Vegas Raiders,Los Angeles Chargers,0.363458,0.636542,Los Angeles Chargers,Los Angeles Chargers


In [11]:
file_path = str(f'Exports/week_{current_week}_base_results.csv')
actual_results_df.to_csv(file_path)

In [49]:
from sportsreference.nfl.teams import Teams

for team in Teams():
    print(team.name)
    schedule = team.schedule  # Request the current team's schedule
    for game in schedule:
        print(game.date, game.points_scored, game.points_allowed)

Dallas Cowboys
September 12 37 21
September 19 21 31
September 23 9 24
October 3 0 40
October 10 22 25
October 17 3 31
October 24 5 31
October 31 22 38
November 7 9 17
November 21 22 13
November 28 14 21
December 5 None None
December 12 None None
December 19 None None
December 26 None None
January 2 None None
January 9 None None
Tampa Bay Buccaneers
September 12 37 21
September 19 21 31
September 23 9 24
October 3 0 40
October 10 22 25
October 17 3 31
October 24 5 31
October 31 22 38
November 7 9 17
November 21 22 13
November 28 14 21
December 5 None None
December 12 None None
December 19 None None
December 26 None None
January 2 None None
January 9 None None
Indianapolis Colts
September 12 37 21
September 19 21 31
September 23 9 24
October 3 0 40
October 10 22 25
October 17 3 31
October 24 5 31
October 31 22 38
November 7 9 17
November 21 22 13
November 28 14 21
December 5 None None
December 12 None None
December 19 None None
December 26 None None
January 2 None None
January 9 None No

In [10]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test,np.round(y_pred))

0.6875

In [10]:
import xgboost as xgb

dtest = xgb.DMatrix(X_test, y_test, feature_names=X_test.columns)
dtrain = xgb.DMatrix(X_train, y_train,feature_names=X_train.columns)

param = {'verbosity':1, 
         'objective':'binary:hinge',
         'feature_selector': 'shuffle',
         'booster':'gblinear',
         'eval_metric' :'error',
         'learning_rate': 0.05}

evallist = [(dtrain, 'train'), (dtest, 'test')]

ModuleNotFoundError: No module named 'xgboost'

In [None]:
num_round = 300
bst = xgb.train(param, dtrain, num_round, evallist)

In [None]:
X_test = pred_games_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_pred = clf.predict_proba(X_test)
y_pred = y_pred[:,1]

display(y_pred,pred_games_df)