In [1]:
import pandas as pd
import numpy as np
import requests
import os
import re

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics

from sklearn.model_selection import cross_val_score

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Set Fantasy Points

In [2]:
qb_fantasy_points = {
            'touchdown': 6,
            'pass_yds': 25,
            'interception': -2,
            'rush_yds': 10,
            'rush_td': 6,
        }

flex_fantasy_points = {
            'rush_touchdown': 6,
            'rush_yards': 10,
            'receptions': 1, # ppr
            'rec_yards': 10,
            'rec_touchdown': 6,
        }

kicking_fantasy_points = {
            'field_goals_made': 3,
            'field_goals_missed': -1,
            'extra_points_made': 1,
            'extra_points_missed': -1,
        }

# Scrape Data
This scrapes all data, for all positions in `starting_week` through `current_week`

In [3]:
# Set the weeks in which you would like to pull data
starting_week = 19
current_week = 23

In [4]:
all_positions = ['QB', 'WR', 'RB', 'TE', 'K']
weeks = [week for week in range(starting_week, current_week + 1)]

file_list = []

for position in all_positions:
    for week in weeks:
        url = f'https://www.cbssports.com/nfl/stats/leaders/live/{position}/{week}/'
        res = requests.get(url)
        output = open(f'./nfl_data/weekly/cbs_weekly/week-{week}_{position}.xls', 'wb')
        output.write(res.content)
        file_list.append(f'./nfl_data/weekly/cbs_weekly/week-{week}_{position}.xls')
        output.close()
        data = pd.read_html(f'./nfl_data/weekly/cbs_weekly/week-{week}_{position}.xls')
        df = data[0]

In [5]:
position_list = all_positions
dfs = []
for file in file_list:
    for i in range(len(position_list)):
        if position_list[i] in file:
            data = pd.read_html(file)
            df2 = pd.DataFrame(data[0])
            if position_list[i] != 'K':
                df2 = df2.droplevel(0, axis=1)
                df2['Week'] = re.search(r'(?<=\-)\s*(..)', file)[0]
                df2['Pos'] = [i.split()[-1] for i in df2['Player  Player on team']]
                df2.rename(columns={'Player  Player on team': 'Player'}, inplace=True)
                df2['Player'] = df2['Player'].map(lambda x: x.split()[3] + ' ' + x.split()[4])
            else:
                df2['Week'] = re.search(r'(?<=\-)\s*(..)', file)[0]
                df2['Pos'] = [i.split()[-1] for i in df2['Player  Player on team']]
                df2.rename(columns={'Player  Player on team': 'Player'}, inplace=True)
                df2['Player'] = df2['Player'].map(lambda x: x.split()[3] + ' ' + x.split()[4])
            dfs.append(df2)
df = pd.concat(dfs, ignore_index=True)

In [6]:
def impute_special_char(df, char):
    df.replace(char, 0, inplace = True) # get rid of the dashes, impute with 0
    
impute_special_char(df, '—')

In [7]:
def change_col_types(df):
    numcols_to_change = df.columns
    numcols_to_change2 = []
    for col in numcols_to_change:
        try:
            df[col] = df[col].astype(int)
            print('success!')
        except:
            numcols_to_change2.append(col)
            print(f'need to clean column: {col}')
            
change_col_types(df)

need to clean column: Player
need to clean column: Game
success!
need to clean column: CMP  Pass Completions
need to clean column: ATT  Pass Attempts
need to clean column: YDS  Passing Yards
need to clean column: TD  Touchdown Passes
need to clean column: INT  Interceptions Thrown
need to clean column: ATT  Rushing Attempts
need to clean column: YDS  Rushing Yards
need to clean column: TD  Rushing Touchdowns
success!
need to clean column: Pos
need to clean column: REC  Receptions
need to clean column: YDS  Receiving Yards
need to clean column: TGT  Targets
need to clean column: TD  Receiving Touchdowns
need to clean column: FGM-A  Field Goals Made - Field Goals Attempted
need to clean column: LNG  Longest field goal in terms of yards by a kicker
need to clean column: XPM-A  Extra Points Made - Extra Points Attempted


In [8]:
df.fillna(0, inplace=True)

In [9]:
df.shape

(312, 20)

In [10]:
df.dtypes

Player                                                    object
Game                                                      object
FPTS  Fantasy Points                                       int64
CMP  Pass Completions                                     object
ATT  Pass Attempts                                        object
YDS  Passing Yards                                        object
TD  Touchdown Passes                                      object
INT  Interceptions Thrown                                 object
ATT  Rushing Attempts                                     object
YDS  Rushing Yards                                        object
TD  Rushing Touchdowns                                    object
Week                                                       int64
Pos                                                       object
REC  Receptions                                           object
YDS  Receiving Yards                                      object
TGT  Targets             

In [11]:
df['FG_Made'] = df['FGM-A  Field Goals Made - Field Goals Attempted'].map(lambda x: str(x)[0])
df['FG_Attempted'] = df['FGM-A  Field Goals Made - Field Goals Attempted'].map(lambda x: str(x)[-1])
df['XPM'] = df['XPM-A  Extra Points Made - Extra Points Attempted'].map(lambda x: str(x)[0])
df['XPA'] = df['XPM-A  Extra Points Made - Extra Points Attempted'].map(lambda x: str(x)[-1])

### Scrape football reference, to get `player_info` df

In [12]:
position = ['rushing', 'passing', 'receiving']

for element in position:
    url = f'https://www.pro-football-reference.com/years/2021/{element}.htm'
    resp = requests.get(url)
    
    with open(f'./nfl_data/2021_{element}_stats.xls', 'wb') as f:
        f.write(resp.content)

In [13]:
pi_wr = pd.read_html('./nfl_data/2021_receiving_stats.xls')
pi_wr = pd.DataFrame(pi_wr[0]) # Saves df var to dataframe

pi_qb = pd.read_html('./nfl_data/2021_passing_stats.xls')
pi_qb = pd.DataFrame(pi_qb[0]) # Saves df var to dataframe

pi_rb = pd.read_html('./nfl_data/2021_rushing_stats.xls')
pi_rb = pd.DataFrame(pi_rb[0]) # Saves df var to dataframe
pi_rb = pi_rb.droplevel(0, axis=1) # Removes first level column

In [14]:
def drop_rows(position):
    for header in position.columns:
        index_list = position.loc[(position[header] == header)].index
        position.drop(labels=index_list, axis=0, inplace = True)
        return
    
drop_rows(pi_rb)
drop_rows(pi_wr)
drop_rows(pi_qb)

In [15]:
pi_wr['Player'] = pi_wr['Player'].map(lambda x: x.rstrip('_+!*@#$?^'))
pi_qb['Player'] = pi_qb['Player'].map(lambda x: x.rstrip('_+!*@#$?^'))
pi_rb['Player'] = pi_rb['Player'].map(lambda x: x.rstrip('_+!*@#$?^'))

In [16]:
dfs = [pi_wr, pi_qb, pi_rb]
player_info = pd.concat([player_info.squeeze() for player_info in dfs], ignore_index=True)

In [17]:
player_info.head()

Unnamed: 0,Rk,Player,Tm,Age,Pos,G,GS,Tgt,Rec,Ctch%,Yds,Y/R,TD,1D,Lng,Y/Tgt,R/G,Y/G,Fmb,QBrec,Cmp,Att,Cmp%,TD%,Int,Int%,Y/A,AY/A,Y/C,Rate,QBR,Sk,Yds.1,Sk%,NY/A,ANY/A,4QC,GWD
0,1,Cooper Kupp,LAR,28,WR,17,17,191,145,75.9%,1947,13.4,16,89,59,10.2,8.5,114.5,0,,,,,,,,,,,,,,,,,,,
1,2,Davante Adams,GNB,29,WR,16,16,169,123,72.8%,1553,12.6,11,84,59,9.2,7.7,97.1,0,,,,,,,,,,,,,,,,,,,
2,3,Tyreek Hill,KAN,27,WR,17,16,159,111,69.8%,1239,11.2,9,75,75,7.8,6.5,72.9,2,,,,,,,,,,,,,,,,,,,
3,4,Justin Jefferson,MIN,22,WR,17,17,167,108,64.7%,1616,15.0,10,75,56,9.7,6.4,95.1,1,,,,,,,,,,,,,,,,,,,
4,5,Mark Andrews,BAL,26,te,17,9,153,107,69.9%,1361,12.7,9,75,43,8.9,6.3,80.1,1,,,,,,,,,,,,,,,,,,,


In [18]:
player_info = player_info[['Player', 'Tm']].copy()
player_info.rename(columns = {'Tm':'Team_Name_Abbrev'}, inplace = True)
player_info = player_info.drop_duplicates()

In [19]:
player_info_dict = pd.Series(player_info.Team_Name_Abbrev.values,index=player_info.Player).to_dict()
# create dictionary with {player : team abbreviation}

In [20]:
Team_Abbreviations_Dict = {
    'Arizona Cardinals': 'ARI',
    'Atlanta Falcons': 'ATL',
    'Baltimore Ravens': 'BAL',
    'Buffalo Bills' : 'BUF',
    'Carolina Panthers': 'CAR',
    'Chicago Bears': 'CHI',
    'Cincinnati Bengals': 'CIN',
    'Cleveland Browns': 'CLE',
    'Dallas Cowboys': 'DAL',
    'Denver Broncos': 'DEN',
    'Detroit Lions': 'DET',
    'Green Bay Packers': 'GNB',
    'Houston Texans': 'HOU',
    'Indianapolis Colts': 'IND',
    'Jacksonville Jaguars': 'JAX',
    'Kansas City Chiefs': 'KAN',
    'Miami Dolphins': 'MIA',
    'Minnesota Vikings': 'MIN',
    'New England Patriots': 'NWE',
    'New Orleans Saints': 'NO',
    'New York Giants': 'NYG',
    'New York Jets': 'NYJ',
    'Las Vegas Raiders': 'LVR',
    'Philadelphia Eagles': 'PHI',
    'Pittsburgh Steelers': 'PIT',
    'Los Angeles Chargers': 'LAC',
    'San Francisco 49ers': 'SFO',
    'Seattle Seahawks': 'SEA',
    'Los Angeles Rams': 'LAR',
    'Tampa Bay Buccaneers': 'TAM',
    'Tennessee Titans': 'TEN',
    'Washington Football Team': 'WAS'
}

In [21]:
def get_key(val):
    for key, value in Team_Abbreviations_Dict.items():
         if val == value:
            return key
 
    return f'key does not exist --> {val}'

In [22]:
player_info['Team_Name_Full'] = player_info['Team_Name_Abbrev'].map(get_key)

In [23]:
player_info['Team_Name_Mascot'] = player_info['Team_Name_Full'].map(lambda x: x.split()[-1])

In [24]:
player_info.head()

Unnamed: 0,Player,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot
0,Cooper Kupp,LAR,Los Angeles Rams,Rams
1,Davante Adams,GNB,Green Bay Packers,Packers
2,Tyreek Hill,KAN,Kansas City Chiefs,Chiefs
3,Justin Jefferson,MIN,Minnesota Vikings,Vikings
4,Mark Andrews,BAL,Baltimore Ravens,Ravens


In [25]:
replace_values = {'KAN': 'KC', 
                  'TAM': 'TB',
                  'SFO': 'SF', 
                  'GNB': 'GB', 
                  'NWE': 'NE',
                  'LVR': 'LV',
                  'NOR': 'NO',
                  'JAX': 'JAC'}
player_info['Team_Name_Abbrev'] = player_info['Team_Name_Abbrev'].replace(replace_values)
player_info.head(30)

Unnamed: 0,Player,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot
0,Cooper Kupp,LAR,Los Angeles Rams,Rams
1,Davante Adams,GB,Green Bay Packers,Packers
2,Tyreek Hill,KC,Kansas City Chiefs,Chiefs
3,Justin Jefferson,MIN,Minnesota Vikings,Vikings
4,Mark Andrews,BAL,Baltimore Ravens,Ravens
5,Diontae Johnson,PIT,Pittsburgh Steelers,Steelers
6,Keenan Allen,LAC,Los Angeles Chargers,Chargers
7,Jaylen Waddle,MIA,Miami Dolphins,Dolphins
8,Stefon Diggs,BUF,Buffalo Bills,Bills
9,Hunter Renfrow,LV,Las Vegas Raiders,Raiders


In [26]:
df.shape, player_info.shape

((312, 24), (627, 4))

### Merge `player_info` with `df`

In [27]:
df = pd.merge(df, player_info, on = 'Player', how = 'left')
df['Team1'] = df['Game'].map(lambda x: x.split()[0])
df['Team2'] = df['Game'].map(lambda x: x.split()[-2])
df['Opp'] = np.where(df['Team_Name_Abbrev'] == df['Team1'], df['Team2'], df['Team1'])
df.head()

Unnamed: 0,Player,Game,FPTS Fantasy Points,CMP Pass Completions,ATT Pass Attempts,YDS Passing Yards,TD Touchdown Passes,INT Interceptions Thrown,ATT Rushing Attempts,YDS Rushing Yards,TD Rushing Touchdowns,Week,Pos,REC Receptions,YDS Receiving Yards,TGT Targets,TD Receiving Touchdowns,FGM-A Field Goals Made - Field Goals Attempted,LNG Longest field goal in terms of yards by a kicker,XPM-A Extra Points Made - Extra Points Attempted,FG_Made,FG_Attempted,XPM,XPA,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot,Team1,Team2,Opp
0,Josh Allen,BUF 47 - NE 17,48,21,25,308,5,0,6,66,0,19,QB,0,0,0,0,0,0.0,0,0,0,0,0,BUF,Buffalo Bills,Bills,BUF,NE,NE
1,Patrick Mahomes,KC 42 - PIT 21,46,30,39,404,5,1,3,29,0,19,QB,0,0,0,0,0,0.0,0,0,0,0,0,KC,Kansas City Chiefs,Chiefs,KC,PIT,PIT
2,Matthew Stafford,LAR 34 - ARI 11,28,13,17,202,2,0,6,22,1,19,QB,0,0,0,0,0,0.0,0,0,0,0,0,LAR,Los Angeles Rams,Rams,LAR,ARI,ARI
3,Dak Prescott,SF 23 - DAL 17,22,23,43,254,1,1,4,27,1,19,QB,0,0,0,0,0,0.0,0,0,0,0,0,DAL,Dallas Cowboys,Cowboys,SF,DAL,SF
4,Tom Brady,TB 31 - PHI 15,22,29,37,271,2,0,0,0,0,19,QB,0,0,0,0,0,0.0,0,0,0,0,0,TB,Tampa Bay Buccaneers,Buccaneers,TB,PHI,PHI


### Add opponent defensive rank to `df`

In [28]:
url = 'https://www.pro-football-reference.com/years/2021/opp.htm'
res = requests.get(url)
with open('./nfl_data/weekly/defensive_ranks.xls', 'wb') as f:
    f.write(res.content)

In [29]:
# Set the defensive week to the week you care about; default is current_week
defensive_week = 22

df_def = pd.read_html('./nfl_data/weekly/defensive_ranks.xls')
df_def = pd.DataFrame(df_def[0]) 
df_def = df_def.droplevel(0, axis=1)
df_def = df_def.rename(columns = {'Tm':'Team', 'Rk': 'Rank'})
df_def = df_def.iloc[:, [0,1]].copy() # filter to only Rank and Team
df_def['Tm_Abr'] = df_def['Team'].map(lambda x: Team_Abbreviations_Dict.get(x)) # tack on column with corresponding team abbreviation
df_def.drop(index = [32,33,34], axis = 0, inplace = True) # drop the average and total rows
df_def['Current_Week'] = defensive_week
df_def['Team_Mascot'] = df_def['Team'].map(lambda x: x.split()[-1])
df_def.rename(columns = {'Rank': f'Def_Rank_Week_{current_week}'}, inplace = True)
df_def.head()

Unnamed: 0,Def_Rank_Week_23,Team,Tm_Abr,Current_Week,Team_Mascot
0,1.0,Buffalo Bills,BUF,22,Bills
1,2.0,New England Patriots,NWE,22,Patriots
2,3.0,Denver Broncos,DEN,22,Broncos
3,4.0,New Orleans Saints,NO,22,Saints
4,5.0,Tampa Bay Buccaneers,TAM,22,Buccaneers


In [30]:
df_def['Tm_Abr'] = df_def['Tm_Abr'].replace(replace_values) # addressing football reference's weird abbreviations again

In [31]:
df_def = df_def[['Def_Rank_Week_23','Tm_Abr', 'Current_Week']].copy()
df_def.head()

Unnamed: 0,Def_Rank_Week_23,Tm_Abr,Current_Week
0,1.0,BUF,22
1,2.0,NE,22
2,3.0,DEN,22
3,4.0,NO,22
4,5.0,TB,22


### Merge `df_def` with `df`

In [32]:
print(f'df shape is {df.shape}')
print(f'df_def shape is {df_def.shape}')

df shape is (312, 30)
df_def shape is (32, 3)


In [33]:
df = pd.merge(df, df_def, left_on = ['Week', 'Opp'], right_on= ['Current_Week','Tm_Abr'], how = 'left')

## Scrape weather

In [34]:
weeks = [f'week-{week}' for week in range(1,17)]

file_list = []

for week in weeks:
    url = f'https://www.nflweather.com/en/week/2021/{week}/'
    res = requests.get(url)
    output = open(f'./nfl_data/weekly/weather/{week}_weather.xls', 'wb')
    output.write(res.content)
    file_list.append(f'./nfl_data/weekly/weather/{week}_weather.xls')
    output.close()
    data = pd.read_html(f'./nfl_data/weekly/weather/{week}_weather.xls')
    df_w = data[0]
    print(f'Scraping weather for {week}')

Scraping weather for week-1
Scraping weather for week-2
Scraping weather for week-3
Scraping weather for week-4
Scraping weather for week-5
Scraping weather for week-6
Scraping weather for week-7
Scraping weather for week-8
Scraping weather for week-9
Scraping weather for week-10
Scraping weather for week-11
Scraping weather for week-12
Scraping weather for week-13
Scraping weather for week-14
Scraping weather for week-15
Scraping weather for week-16


In [35]:
dfs = []
for file in file_list:
    data = pd.read_html(file)
    df2 = pd.DataFrame(data[0])
    df2.drop(columns = ['Unnamed: 0', 'Game', 'Game.1', 'Game.2', 'Time (ET)', 'TV', 'Unnamed: 8', 'Unnamed: 12'], inplace = True) 
    df2['Week'] = re.search(r'(\d+)', file)[0]
    df2['Wind_Speed_MPH'] = df2['Wind'].map(lambda x:re.search(r'(\d+)',x)[0])
    df2['Wind_Direction'] = df2['Wind'].map(lambda x:re.search(r'[A-Z]+',x)[0])
    df2['Temp'] = df2['Forecast'].map(lambda x: re.search(r'(\d+)',x)[0] if x != 'DOME' else 0)
    df2['Weather_Desc'] = df2['Forecast'].map(lambda x: re.search(r'\s(.+)',x)[0].strip() if x != 'DOME' else 0)
    df2["Wind"] = np.where(df2["Forecast"] == "DOME", 0, df2["Wind"])
    df2["Wind_Speed_MPH"] = np.where(df2["Forecast"] == "DOME", 0, df2["Wind_Speed_MPH"])
    df2["Wind_Direction"] = np.where(df2["Forecast"] == "DOME", 0, df2["Wind_Direction"])
    dfs.append(df2)
weather_df = pd.concat(dfs, ignore_index=True)
weather_df.head()

Unnamed: 0,Away,Home,Forecast,Extended Forecast,Wind,Week,Wind_Speed_MPH,Wind_Direction,Temp,Weather_Desc
0,Cowboys,Buccaneers,80f Humid and Partly Cloudy,Humid and Partly Cloudy. Rain until evening.,6m WSW,1,6,WSW,80,Humid and Partly Cloudy
1,Eagles,Falcons,DOME,Clear. Clear throughout the day.,0,1,0,0,0,0
2,Steelers,Bills,72f Mostly Cloudy,Mostly Cloudy. Rain overnight.,12m WSW,1,12,WSW,72,Mostly Cloudy
3,Vikings,Bengals,82f Clear,Clear. Clear throughout the day.,11m SW,1,11,SW,82,Clear
4,49ers,Lions,DOME,Mostly Cloudy. Mostly cloudy throughout the day.,0,1,0,0,0,0


In [36]:
weather_df['Week'] = weather_df['Week'].astype(int)

## Merge `df` and `df_weather`

In [37]:
change_col_types(df)

need to clean column: Player
need to clean column: Game
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
need to clean column: Pos
success!
success!
success!
success!
need to clean column: FGM-A  Field Goals Made - Field Goals Attempted
success!
need to clean column: XPM-A  Extra Points Made - Extra Points Attempted
success!
success!
success!
success!
need to clean column: Team_Name_Abbrev
need to clean column: Team_Name_Full
need to clean column: Team_Name_Mascot
need to clean column: Team1
need to clean column: Team2
need to clean column: Opp
need to clean column: Def_Rank_Week_23
need to clean column: Tm_Abr
need to clean column: Current_Week


In [38]:
df = pd.merge(df, weather_df, left_on = ['Team_Name_Mascot', 'Week'], right_on = ['Home', 'Week'], how = 'left')

In [39]:
df['Away_Game'] = np.where(df['Away'] == df['Team_Name_Mascot'], 1, 0)
df['Home_Game'] = np.where(df['Home'] == df['Team_Name_Mascot'], 1, 0)
df['FG_Missed'] = df['FG_Attempted'] - df['FG_Made']
df['XP_Missed'] = df['XPA'] - df['XPM']

In [40]:
df.fillna(0, inplace=True)

In [41]:
change_col_types(df)

need to clean column: Player
need to clean column: Game
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
need to clean column: Pos
success!
success!
success!
success!
need to clean column: FGM-A  Field Goals Made - Field Goals Attempted
success!
need to clean column: XPM-A  Extra Points Made - Extra Points Attempted
success!
success!
success!
success!
need to clean column: Team_Name_Abbrev
need to clean column: Team_Name_Full
need to clean column: Team_Name_Mascot
need to clean column: Team1
need to clean column: Team2
need to clean column: Opp
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!


# Calculate Custom Fantasy Points

In [42]:
qb = df['TD  Touchdown Passes'].multiply(qb_fantasy_points['touchdown']) + df['YDS  Passing Yards'].divide(qb_fantasy_points['pass_yds']) + df['INT  Interceptions Thrown'].multiply(qb_fantasy_points['interception']) + df['YDS  Rushing Yards'].divide(qb_fantasy_points['rush_yds']) + df['TD  Rushing Touchdowns'].multiply(qb_fantasy_points['rush_td'])
wr = df['TD  Rushing Touchdowns'].multiply(flex_fantasy_points['rush_touchdown']) + df['YDS  Rushing Yards'].divide(flex_fantasy_points['rush_yards']) + df['REC  Receptions'].add(flex_fantasy_points['receptions']) + df['YDS  Receiving Yards'].divide(flex_fantasy_points['rec_yards']) + df['TD  Receiving Touchdowns'].multiply(flex_fantasy_points['rec_touchdown'])
rb = df['TD  Rushing Touchdowns'].multiply(flex_fantasy_points['rush_touchdown']) + df['YDS  Rushing Yards'].divide(flex_fantasy_points['rush_yards']) + df['REC  Receptions'].add(flex_fantasy_points['receptions']) + df['YDS  Receiving Yards'].divide(flex_fantasy_points['rec_yards']) + df['TD  Receiving Touchdowns'].multiply(flex_fantasy_points['rec_touchdown'])
te = df['TD  Rushing Touchdowns'].multiply(flex_fantasy_points['rush_touchdown']) + df['YDS  Rushing Yards'].divide(flex_fantasy_points['rush_yards']) + df['REC  Receptions'].add(flex_fantasy_points['receptions']) + df['YDS  Receiving Yards'].divide(flex_fantasy_points['rec_yards']) + df['TD  Receiving Touchdowns'].multiply(flex_fantasy_points['rec_touchdown'])
k = df['FG_Made'].multiply(kicking_fantasy_points['field_goals_made']) + df['FG_Missed'].multiply(kicking_fantasy_points['field_goals_missed']) + df['XPM'].multiply(kicking_fantasy_points['extra_points_made']) + df['XP_Missed'].multiply(kicking_fantasy_points['extra_points_missed'])

position_list = [('QB', qb), ('WR', wr), ('RB', rb), ('TE', te), ('K', k)]

for position in position_list:
    first_idx = df[df['Pos'] == position[0]].index[0]
    final_idx = df[df['Pos'] == position[0]].index[-1]
    df.loc[first_idx:final_idx,['Fantasy_Pts']] = position[1]
    

In [43]:
df.rename(columns = {'CMP  Pass Completions': 'Completions',
                     'ATT  Pass Attempts': 'Pass_Attempts',
                    'YDS  Passing Yards': 'Passing_Yds',
                    'TD  Touchdown Passes': 'TD_Passes',
                    'INT  Interceptions Thrown': 'INT',
                     'ATT  Rushing Attempts': 'Carries',
                     'YDS  Rushing Yards': 'Rushing_Yds',
                     'TD  Rushing Touchdowns': 'Rushing_TD',
                     'REC  Receptions': 'Receptions',
                     'YDS  Receiving Yards': 'Receiving_Yds',
                     'TGT  Targets': 'Targets',
                     'TD  Receiving Touchdowns': 'Receiving_TD',
                     'FGM-A  Field Goals Made - Field Goals Attempted': 'FGM-FGA',
                     'LNG  Longest field goal in terms of yards by a kicker': 'Longest_FG',
                     'XPM-A  Extra Points Made - Extra Points Attempted': 'XPM-XPA',
                     'FPTS  Fantasy Points' : 'CBS_Fantasy_Pts'
                    }, inplace = True)
df.head()

Unnamed: 0,Player,Game,CBS_Fantasy_Pts,Completions,Pass_Attempts,Passing_Yds,TD_Passes,INT,Carries,Rushing_Yds,Rushing_TD,Week,Pos,Receptions,Receiving_Yds,Targets,Receiving_TD,FGM-FGA,Longest_FG,XPM-XPA,FG_Made,FG_Attempted,XPM,XPA,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot,Team1,Team2,Opp,Def_Rank_Week_23,Tm_Abr,Current_Week,Away,Home,Forecast,Extended Forecast,Wind,Wind_Speed_MPH,Wind_Direction,Temp,Weather_Desc,Away_Game,Home_Game,FG_Missed,XP_Missed,Fantasy_Pts
0,Josh Allen,BUF 47 - NE 17,48,21,25,308,5,0,6,66,0,19,QB,0,0,0,0,0,0,0,0,0,0,0,BUF,Buffalo Bills,Bills,BUF,NE,NE,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48.92
1,Patrick Mahomes,KC 42 - PIT 21,46,30,39,404,5,1,3,29,0,19,QB,0,0,0,0,0,0,0,0,0,0,0,KC,Kansas City Chiefs,Chiefs,KC,PIT,PIT,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47.06
2,Matthew Stafford,LAR 34 - ARI 11,28,13,17,202,2,0,6,22,1,19,QB,0,0,0,0,0,0,0,0,0,0,0,LAR,Los Angeles Rams,Rams,LAR,ARI,ARI,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28.28
3,Dak Prescott,SF 23 - DAL 17,22,23,43,254,1,1,4,27,1,19,QB,0,0,0,0,0,0,0,0,0,0,0,DAL,Dallas Cowboys,Cowboys,SF,DAL,SF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22.86
4,Tom Brady,TB 31 - PHI 15,22,29,37,271,2,0,0,0,0,19,QB,0,0,0,0,0,0,0,0,0,0,0,TB,Tampa Bay Buccaneers,Buccaneers,TB,PHI,PHI,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22.84


In [44]:
df = df[['Week', 'Player', 'Pos', 'CBS_Fantasy_Pts', 'Fantasy_Pts', 'Team_Name_Full', 'Team_Name_Mascot','Team_Name_Abbrev','Game', 'Opp', 'Def_Rank_Week_23', 'Away_Game',
             'Home_Game', 'Completions', 'Pass_Attempts', 'Passing_Yds', 'TD_Passes', 'INT', 'Carries', 'Rushing_Yds', 'Rushing_TD', 'Receptions', 
             'Receiving_Yds', 'Targets', 'Receiving_TD', 'FGM-FGA', 'FG_Missed', 'Longest_FG', 'XPM-XPA', 'XP_Missed', 'Forecast', 
             'Extended Forecast', 'Wind', 'Wind_Speed_MPH', 'Wind_Direction', 'Temp', 'Weather_Desc']]
df.head()

Unnamed: 0,Week,Player,Pos,CBS_Fantasy_Pts,Fantasy_Pts,Team_Name_Full,Team_Name_Mascot,Team_Name_Abbrev,Game,Opp,Def_Rank_Week_23,Away_Game,Home_Game,Completions,Pass_Attempts,Passing_Yds,TD_Passes,INT,Carries,Rushing_Yds,Rushing_TD,Receptions,Receiving_Yds,Targets,Receiving_TD,FGM-FGA,FG_Missed,Longest_FG,XPM-XPA,XP_Missed,Forecast,Extended Forecast,Wind,Wind_Speed_MPH,Wind_Direction,Temp,Weather_Desc
0,19,Josh Allen,QB,48,48.92,Buffalo Bills,Bills,BUF,BUF 47 - NE 17,NE,0,0,0,21,25,308,5,0,6,66,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19,Patrick Mahomes,QB,46,47.06,Kansas City Chiefs,Chiefs,KC,KC 42 - PIT 21,PIT,0,0,0,30,39,404,5,1,3,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19,Matthew Stafford,QB,28,28.28,Los Angeles Rams,Rams,LAR,LAR 34 - ARI 11,ARI,0,0,0,13,17,202,2,0,6,22,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19,Dak Prescott,QB,22,22.86,Dallas Cowboys,Cowboys,DAL,SF 23 - DAL 17,SF,0,0,0,23,43,254,1,1,4,27,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19,Tom Brady,QB,22,22.84,Tampa Bay Buccaneers,Buccaneers,TB,TB 31 - PHI 15,PHI,0,0,0,29,37,271,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Modeling

In [45]:
df.fillna(0, inplace=True)

In [46]:
# Create X and y
X = df.drop(columns = ['Player', 'Pos', 'CBS_Fantasy_Pts', 'Fantasy_Pts', 'Team_Name_Full', 'Team_Name_Mascot', 'Team_Name_Abbrev', 'Game', 'Opp', 'FGM-FGA', 'XPM-XPA'])
y = df['Fantasy_Pts']

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) 

lr = LinearRegression()
lr.fit(X_train, y_train)

cross_val_score(lr, X_train, y_train).mean()

0.9878710149069623

In [48]:
# Take a peek at the model coefficient and intercept
pd.DataFrame(set(zip(X.columns, lr.coef_))).head()

Unnamed: 0,0,1
0,Carries,-0.005301078
1,Targets,-0.01962161
2,Away_Game,-5.52336e-15
3,Extended Forecast,0.0
4,Passing_Yds,0.03932004


In [49]:
y_pred = lr.predict(X_test)

In [50]:
resids = y_test - y_pred
resids.head()

228    0.036169
9      0.128032
57    -0.117520
60    -0.075193
25    -0.495654
Name: Fantasy_Pts, dtype: float64

In [51]:
# mse = np.mean(resids**2)
# mse

In [52]:
# Create the predictions for the "null model"
y_bar = np.mean(y)

In [53]:
# The null MSE
null_mse = np.mean((y - y_bar)**2)
null_mse

84.70205752794219

In [54]:
# Can compute R2 from metrics...
# metrics.r2_score(y, y_pred)

In [55]:
y_pred

array([ 5.5638312 , 18.27196841, 10.01751993,  9.27519292, 14.415654  ,
        4.74237048, 18.97488759,  2.54061735, 15.11701217, 13.99633052,
        1.17192872, 17.74994388,  7.786803  ,  3.70430206,  5.81677263,
        8.56239878, 21.83938504,  8.85224396, 15.21729471,  1.17192872,
        1.17192872,  1.00124928,  3.1753184 , 16.97533969,  1.17192872,
       13.13599094,  5.86774863, 27.74844015,  2.42683105,  9.73815538,
       19.49986387, 22.88433567])

In [56]:
pd.DataFrame(set(zip(y_test, y_pred))).head()

Unnamed: 0,0,1
0,3.6,3.704302
1,1.0,1.171929
2,9.9,10.01752
3,13.3,13.135991
4,2.4,2.426831


In [57]:
df_predicted = pd.DataFrame(set(zip(y_test, y_pred)))

In [58]:
df = pd.merge(df, df_predicted, left_on = ['Fantasy_Pts'], right_on = [0])

In [59]:
df[['Week', 'Player', 'Fantasy_Pts', 1]]

Unnamed: 0,Week,Player,Fantasy_Pts,1
0,19,Dak Prescott,22.86,22.884336
1,19,Joe Burrow,21.56,21.839385
2,19,Derek Carr,18.4,18.271968
3,20,Aaron Rodgers,9.0,8.852244
4,23,Darrell Henderson,9.0,8.852244
5,22,Justin Herbert,13.92,14.415654
6,19,Mike Evans,27.7,27.74844
7,19,Amari Cooper,19.4,19.499864
8,19,C.J. Uzomah,19.4,19.499864
9,19,Tyler Boyd,13.9,13.996331
