In [224]:
import pandas as pd
import numpy as np
import requests
import os
import re

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics

from sklearn.model_selection import cross_val_score

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Set Fantasy Points

In [225]:
qb_fantasy_points = {
            'touchdown': 6,
            'pass_yds': 25,
            'interception': -2,
            'rush_yds': 10,
            'rush_td': 6,
        }

flex_fantasy_points = {
            'rush_touchdown': 6,
            'rush_yards': 10,
            'receptions': 1, # ppr
            'rec_yards': 10,
            'rec_touchdown': 6,
        }

kicking_fantasy_points = {
            'field_goals_made': 3,
            'field_goals_missed': -1,
            'extra_points_made': 1,
            'extra_points_missed': -1,
        }

# Scrape Data
This scrapes all data, for all positions in `starting_week` through `current_week`

In [226]:
# Set the weeks in which you would like to pull data
starting_week = 1
current_week = 1
current_year = 2022

In [227]:
all_positions = ['QB', 'WR', 'RB', 'TE', 'K']
weeks = [week for week in range(starting_week, current_week + 1)]

file_list = []

for position in all_positions:
    for week in weeks:
        url = f'https://www.cbssports.com/nfl/stats/leaders/live/{position}/{week}/'
        res = requests.get(url)
        output = open(f'./nfl_data/weekly/cbs_weekly/week-{week}_{position}.xls', 'wb')
        output.write(res.content)
        file_list.append(f'./nfl_data/weekly/cbs_weekly/week-{week}_{position}.xls')
        output.close()
        data = pd.read_html(f'./nfl_data/weekly/cbs_weekly/week-{week}_{position}.xls')
        df = data[0]

In [228]:
position_list = all_positions
dfs = []
for file in file_list:
    for i in range(len(position_list)):
        if position_list[i] in file:
            data = pd.read_html(file)
            df2 = pd.DataFrame(data[0])
            if position_list[i] != 'K':
                df2 = df2.droplevel(0, axis=1)
                df2['Week'] = re.search(r'(?<=\-)\s*(..)', file)[0]
                df2['Pos'] = [i.split()[-1] for i in df2['Player  Player on team']]
                df2.rename(columns={'Player  Player on team': 'Player'}, inplace=True)
                df2['Player'] = df2['Player'].map(lambda x: x.split()[3] + ' ' + x.split()[4])
            else:
                df2['Week'] = re.search(r'(?<=\-)\s*(..)', file)[0]
                df2['Pos'] = [i.split()[-1] for i in df2['Player  Player on team']]
                df2.rename(columns={'Player  Player on team': 'Player'}, inplace=True)
                df2['Player'] = df2['Player'].map(lambda x: x.split()[3] + ' ' + x.split()[4])
            df2['Week'] = df2['Week'].map(lambda x: x.rstrip('_'))
    dfs.append(df2)
df = pd.concat(dfs, ignore_index=True)

In [229]:
df.head()

Unnamed: 0,Player,Game,FPTS Fantasy Points,CMP Pass Completions,ATT Pass Attempts,YDS Passing Yards,TD Touchdown Passes,INT Interceptions Thrown,ATT Rushing Attempts,YDS Rushing Yards,TD Rushing Touchdowns,Week,Pos,REC Receptions,YDS Receiving Yards,TGT Targets,TD Receiving Touchdowns,FGM-A Field Goals Made - Field Goals Attempted,LNG Longest field goal in terms of yards by a kicker,XPM-A Extra Points Made - Extra Points Attempted
0,Patrick Mahomes,KC 44 - ARI 21,44,30,39,360,5,0,3,5,0,1,QB,,,,,,,
1,Josh Allen,BUF 31 - LAR 10,36,26,31,297,3,2,10,56,1,1,QB,,,,,,,
2,Carson Wentz,WAS 28 - JAC 22,35,27,41,313,4,2,6,12,0,1,QB,,,,,,,
3,Justin Herbert,LAC 24 - LV 19,29,26,34,279,3,0,4,1,0,1,QB,,,,,,,
4,Lamar Jackson,BAL 24 - NYJ 9,25,17,30,213,3,1,6,17,0,1,QB,,,,,,,


In [230]:
def impute_special_char(df, char):
    df.replace(char, 0, inplace = True) # get rid of the dashes, impute with 0
    
impute_special_char(df, '—')

In [231]:
impute_special_char(df, '_')

In [232]:
def change_col_types(df):
    numcols_to_change = df.columns
    numcols_to_change2 = []
    for col in numcols_to_change:
        try:
            df[col] = df[col].astype(int)
            print('success!')
        except:
            numcols_to_change2.append(col)
            print(f'need to clean column: {col}')
            
change_col_types(df)

need to clean column: Player
need to clean column: Game
success!
need to clean column: CMP  Pass Completions
need to clean column: ATT  Pass Attempts
need to clean column: YDS  Passing Yards
need to clean column: TD  Touchdown Passes
need to clean column: INT  Interceptions Thrown
need to clean column: ATT  Rushing Attempts
need to clean column: YDS  Rushing Yards
need to clean column: TD  Rushing Touchdowns
success!
need to clean column: Pos
need to clean column: REC  Receptions
need to clean column: YDS  Receiving Yards
need to clean column: TGT  Targets
need to clean column: TD  Receiving Touchdowns
need to clean column: FGM-A  Field Goals Made - Field Goals Attempted
need to clean column: LNG  Longest field goal in terms of yards by a kicker
need to clean column: XPM-A  Extra Points Made - Extra Points Attempted


In [233]:
df.fillna(0, inplace=True)

In [234]:
df.shape

(370, 20)

In [235]:
df.dtypes

Player                                                    object
Game                                                      object
FPTS  Fantasy Points                                       int64
CMP  Pass Completions                                     object
ATT  Pass Attempts                                        object
YDS  Passing Yards                                        object
TD  Touchdown Passes                                      object
INT  Interceptions Thrown                                 object
ATT  Rushing Attempts                                     object
YDS  Rushing Yards                                        object
TD  Rushing Touchdowns                                    object
Week                                                       int64
Pos                                                       object
REC  Receptions                                           object
YDS  Receiving Yards                                      object
TGT  Targets             

In [236]:
df['FG_Made'] = df['FGM-A  Field Goals Made - Field Goals Attempted'].map(lambda x: str(x)[0])
df['FG_Attempted'] = df['FGM-A  Field Goals Made - Field Goals Attempted'].map(lambda x: str(x)[-1])
df['XPM'] = df['XPM-A  Extra Points Made - Extra Points Attempted'].map(lambda x: str(x)[0])
df['XPA'] = df['XPM-A  Extra Points Made - Extra Points Attempted'].map(lambda x: str(x)[-1])

In [237]:
change_col_types(df)

need to clean column: Player
need to clean column: Game
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
need to clean column: Pos
success!
success!
success!
success!
need to clean column: FGM-A  Field Goals Made - Field Goals Attempted
success!
need to clean column: XPM-A  Extra Points Made - Extra Points Attempted
success!
success!
success!
success!


### Scrape football reference, to get `player_info` df

In [238]:
position = ['rushing', 'passing', 'receiving']

for element in position:
    url = f'https://www.pro-football-reference.com/years/{current_year}/{element}.htm'
    resp = requests.get(url)
    
    with open(f'./nfl_data/{current_year}_{element}_stats.xls', 'wb') as f:
        f.write(resp.content)

In [239]:
pi_wr = pd.read_html(f'./nfl_data/{current_year}_receiving_stats.xls')
pi_wr = pd.DataFrame(pi_wr[0]) # Saves df var to dataframe

pi_qb = pd.read_html(f'./nfl_data/{current_year}_passing_stats.xls')
pi_qb = pd.DataFrame(pi_qb[0]) # Saves df var to dataframe

pi_rb = pd.read_html(f'./nfl_data/{current_year}_rushing_stats.xls')
pi_rb = pd.DataFrame(pi_rb[0]) # Saves df var to dataframe
pi_rb = pi_rb.droplevel(0, axis=1) # Removes first level column

In [240]:
def drop_rows(position):
    for header in position.columns:
        index_list = position.loc[(position[header] == header)].index
        position.drop(labels=index_list, axis=0, inplace = True)
        return
    
drop_rows(pi_rb)
drop_rows(pi_wr)
drop_rows(pi_qb)

In [241]:
pi_wr['Player'] = pi_wr['Player'].map(lambda x: x.rstrip('_+!*@#$?^'))
pi_qb['Player'] = pi_qb['Player'].map(lambda x: x.rstrip('_+!*@#$?^'))
pi_rb['Player'] = pi_rb['Player'].map(lambda x: x.rstrip('_+!*@#$?^'))

In [242]:
dfs = [pi_wr, pi_qb, pi_rb]
player_info = pd.concat([player_info.squeeze() for player_info in dfs], ignore_index=True)

In [243]:
player_info.head()

Unnamed: 0,Rk,Player,Tm,Age,Pos,G,GS,Tgt,Rec,Ctch%,Yds,Y/R,TD,1D,Lng,Y/Tgt,R/G,Y/G,Fmb,QBrec,Cmp,Att,Cmp%,TD%,Int,Int%,Y/A,AY/A,Y/C,Rate,QBR,Sk,Yds.1,Sk%,NY/A,ANY/A,4QC,GWD
0,1,Cooper Kupp,LAR,29,WR,1,1,15,13,86.7%,128,9.8,1,6,28,8.5,13.0,128.0,0,,,,,,,,,,,,,,,,,,,
1,2,Javonte Williams,DEN,22,RB,1,1,12,11,91.7%,65,5.9,0,3,13,5.4,11.0,65.0,1,,,,,,,,,,,,,,,,,,,
2,3,Davante Adams,LVR,30,WR,1,1,17,10,58.8%,141,14.1,1,8,41,8.3,10.0,141.0,0,,,,,,,,,,,,,,,,,,,
3,4,A.J. Brown,PHI,25,WR,1,1,13,10,76.9%,155,15.5,0,7,54,11.9,10.0,155.0,0,,,,,,,,,,,,,,,,,,,
4,5,Ja'Marr Chase,CIN,22,WR,1,1,16,10,62.5%,129,12.9,1,10,24,8.1,10.0,129.0,0,,,,,,,,,,,,,,,,,,,


In [244]:
player_info = player_info[['Player', 'Tm']].copy()
player_info.rename(columns = {'Tm':'Team_Name_Abbrev'}, inplace = True)
player_info = player_info.drop_duplicates()

In [245]:
player_info_dict = pd.Series(player_info.Team_Name_Abbrev.values,index=player_info.Player).to_dict()
# create dictionary with {player : team abbreviation}

In [246]:
Team_Abbreviations_Dict = {
    'Arizona Cardinals': 'ARI',
    'Atlanta Falcons': 'ATL',
    'Baltimore Ravens': 'BAL',
    'Buffalo Bills' : 'BUF',
    'Carolina Panthers': 'CAR',
    'Chicago Bears': 'CHI',
    'Cincinnati Bengals': 'CIN',
    'Cleveland Browns': 'CLE',
    'Dallas Cowboys': 'DAL',
    'Denver Broncos': 'DEN',
    'Detroit Lions': 'DET',
    'Green Bay Packers': 'GNB',
    'Houston Texans': 'HOU',
    'Indianapolis Colts': 'IND',
    'Jacksonville Jaguars': 'JAX',
    'Kansas City Chiefs': 'KAN',
    'Miami Dolphins': 'MIA',
    'Minnesota Vikings': 'MIN',
    'New England Patriots': 'NWE',
    'New Orleans Saints': 'NO',
    'New York Giants': 'NYG',
    'New York Jets': 'NYJ',
    'Las Vegas Raiders': 'LVR',
    'Philadelphia Eagles': 'PHI',
    'Pittsburgh Steelers': 'PIT',
    'Los Angeles Chargers': 'LAC',
    'San Francisco 49ers': 'SFO',
    'Seattle Seahawks': 'SEA',
    'Los Angeles Rams': 'LAR',
    'Tampa Bay Buccaneers': 'TAM',
    'Tennessee Titans': 'TEN',
    'Washington Football Team': 'WAS'
}

In [247]:
def get_key(val):
    for key, value in Team_Abbreviations_Dict.items():
         if val == value:
            return key
 
    return f'key does not exist --> {val}'

In [248]:
player_info['Team_Name_Full'] = player_info['Team_Name_Abbrev'].map(get_key)

In [249]:
player_info['Team_Name_Mascot'] = player_info['Team_Name_Full'].map(lambda x: x.split()[-1])

In [250]:
player_info.head()

Unnamed: 0,Player,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot
0,Cooper Kupp,LAR,Los Angeles Rams,Rams
1,Javonte Williams,DEN,Denver Broncos,Broncos
2,Davante Adams,LVR,Las Vegas Raiders,Raiders
3,A.J. Brown,PHI,Philadelphia Eagles,Eagles
4,Ja'Marr Chase,CIN,Cincinnati Bengals,Bengals


In [251]:
replace_values = {'KAN': 'KC', 
                  'TAM': 'TB',
                  'SFO': 'SF', 
                  'GNB': 'GB', 
                  'NWE': 'NE',
                  'LVR': 'LV',
                  'NOR': 'NO',
                  'JAX': 'JAC'}
player_info['Team_Name_Abbrev'] = player_info['Team_Name_Abbrev'].replace(replace_values)
player_info.head(30)

Unnamed: 0,Player,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot
0,Cooper Kupp,LAR,Los Angeles Rams,Rams
1,Javonte Williams,DEN,Denver Broncos,Broncos
2,Davante Adams,LV,Las Vegas Raiders,Raiders
3,A.J. Brown,PHI,Philadelphia Eagles,Eagles
4,Ja'Marr Chase,CIN,Cincinnati Bengals,Bengals
5,Justin Jefferson,MIN,Minnesota Vikings,Vikings
6,Michael Pittman Jr.,IND,Indianapolis Colts,Colts
7,Stefon Diggs,BUF,Buffalo Bills,Bills
8,Tyreek Hill,MIA,Miami Dolphins,Dolphins
9,Travis Kelce,KC,Kansas City Chiefs,Chiefs


In [252]:
df.shape, player_info.shape

((370, 24), (317, 4))

### Merge `player_info` with `df`

In [253]:
df = pd.merge(df, player_info, on = 'Player', how = 'left')
df['Team1'] = df['Game'].map(lambda x: x.split()[0])
df['Team2'] = df['Game'].map(lambda x: x.split()[-2])
df['Opp'] = np.where(df['Team_Name_Abbrev'] == df['Team1'], df['Team2'], df['Team1'])
df.head()

Unnamed: 0,Player,Game,FPTS Fantasy Points,CMP Pass Completions,ATT Pass Attempts,YDS Passing Yards,TD Touchdown Passes,INT Interceptions Thrown,ATT Rushing Attempts,YDS Rushing Yards,TD Rushing Touchdowns,Week,Pos,REC Receptions,YDS Receiving Yards,TGT Targets,TD Receiving Touchdowns,FGM-A Field Goals Made - Field Goals Attempted,LNG Longest field goal in terms of yards by a kicker,XPM-A Extra Points Made - Extra Points Attempted,FG_Made,FG_Attempted,XPM,XPA,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot,Team1,Team2,Opp
0,Patrick Mahomes,KC 44 - ARI 21,44,30,39,360,5,0,3,5,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,KC,Kansas City Chiefs,Chiefs,KC,ARI,ARI
1,Josh Allen,BUF 31 - LAR 10,36,26,31,297,3,2,10,56,1,1,QB,0,0,0,0,0,0,0,0,0,0,0,BUF,Buffalo Bills,Bills,BUF,LAR,LAR
2,Carson Wentz,WAS 28 - JAC 22,35,27,41,313,4,2,6,12,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,WAS,Washington Football Team,Team,WAS,JAC,JAC
3,Justin Herbert,LAC 24 - LV 19,29,26,34,279,3,0,4,1,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,LAC,Los Angeles Chargers,Chargers,LAC,LV,LV
4,Lamar Jackson,BAL 24 - NYJ 9,25,17,30,213,3,1,6,17,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,BAL,Baltimore Ravens,Ravens,BAL,NYJ,NYJ


In [254]:
df['Opp_Team_Name_Full'] = df['Opp'].map(get_key)

In [255]:
df['Opp_Team_Mascot'] = df['Opp_Team_Name_Full'].map(lambda x: x.split()[-1])
df.head()

Unnamed: 0,Player,Game,FPTS Fantasy Points,CMP Pass Completions,ATT Pass Attempts,YDS Passing Yards,TD Touchdown Passes,INT Interceptions Thrown,ATT Rushing Attempts,YDS Rushing Yards,TD Rushing Touchdowns,Week,Pos,REC Receptions,YDS Receiving Yards,TGT Targets,TD Receiving Touchdowns,FGM-A Field Goals Made - Field Goals Attempted,LNG Longest field goal in terms of yards by a kicker,XPM-A Extra Points Made - Extra Points Attempted,FG_Made,FG_Attempted,XPM,XPA,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot,Team1,Team2,Opp,Opp_Team_Name_Full,Opp_Team_Mascot
0,Patrick Mahomes,KC 44 - ARI 21,44,30,39,360,5,0,3,5,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,KC,Kansas City Chiefs,Chiefs,KC,ARI,ARI,Arizona Cardinals,Cardinals
1,Josh Allen,BUF 31 - LAR 10,36,26,31,297,3,2,10,56,1,1,QB,0,0,0,0,0,0,0,0,0,0,0,BUF,Buffalo Bills,Bills,BUF,LAR,LAR,Los Angeles Rams,Rams
2,Carson Wentz,WAS 28 - JAC 22,35,27,41,313,4,2,6,12,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,WAS,Washington Football Team,Team,WAS,JAC,JAC,key does not exist --> JAC,JAC
3,Justin Herbert,LAC 24 - LV 19,29,26,34,279,3,0,4,1,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,LAC,Los Angeles Chargers,Chargers,LAC,LV,LV,key does not exist --> LV,LV
4,Lamar Jackson,BAL 24 - NYJ 9,25,17,30,213,3,1,6,17,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,BAL,Baltimore Ravens,Ravens,BAL,NYJ,NYJ,New York Jets,Jets


### Add opponent defensive rank to `df`

In [256]:
url = 'https://www.pro-football-reference.com/years/2021/opp.htm'
res = requests.get(url)
with open('./nfl_data/weekly/defensive_ranks.xls', 'wb') as f:
    f.write(res.content)

In [257]:
# Set the defensive week to the week you care about; default is current_week
defensive_week = 1

df_def = pd.read_html('./nfl_data/weekly/defensive_ranks.xls')
df_def = pd.DataFrame(df_def[0]) 
df_def = df_def.droplevel(0, axis=1)
df_def = df_def.rename(columns = {'Tm':'Team', 'Rk': 'Rank'})
df_def = df_def.iloc[:, [0,1]].copy() # filter to only Rank and Team
df_def['Tm_Abr'] = df_def['Team'].map(lambda x: Team_Abbreviations_Dict.get(x)) # tack on column with corresponding team abbreviation
df_def.drop(index = [32,33,34], axis = 0, inplace = True) # drop the average and total rows
df_def['Current_Week'] = defensive_week
df_def['Team_Mascot'] = df_def['Team'].map(lambda x: x.split()[-1])
df_def.rename(columns = {'Rank': f'Def_Rank_Week_{current_week}'}, inplace = True)
df_def.head()

Unnamed: 0,Def_Rank_Week_1,Team,Tm_Abr,Current_Week,Team_Mascot
0,1.0,Buffalo Bills,BUF,1,Bills
1,2.0,New England Patriots,NWE,1,Patriots
2,3.0,Denver Broncos,DEN,1,Broncos
3,4.0,New Orleans Saints,NO,1,Saints
4,5.0,Tampa Bay Buccaneers,TAM,1,Buccaneers


In [258]:
df_def['Tm_Abr'] = df_def['Tm_Abr'].replace(replace_values) # addressing football reference's weird abbreviations again

In [259]:
df_def = df_def[['Def_Rank_Week_1','Tm_Abr', 'Current_Week']].copy()
df_def.head()

Unnamed: 0,Def_Rank_Week_1,Tm_Abr,Current_Week
0,1.0,BUF,1
1,2.0,NE,1
2,3.0,DEN,1
3,4.0,NO,1
4,5.0,TB,1


### Merge `df_def` with `df`

In [260]:
print(f'df shape is {df.shape}')
print(f'df_def shape is {df_def.shape}')

df shape is (370, 32)
df_def shape is (32, 3)


In [261]:
df = pd.merge(df, df_def, left_on = ['Week', 'Opp'], right_on= ['Current_Week','Tm_Abr'], how = 'left')

## Scrape weather

In [262]:
weeks = [f'week-{week}' for week in range(1,2)]

file_list = []

for week in weeks:
    url = f'https://www.nflweather.com/en/week/{current_year}/{week}/'
    res = requests.get(url)
    output = open(f'./nfl_data/weekly/weather/{week}_weather.xls', 'wb')
    output.write(res.content)
    file_list.append(f'./nfl_data/weekly/weather/{week}_weather.xls')
    output.close()
    data = pd.read_html(f'./nfl_data/weekly/weather/{week}_weather.xls')
    df_w = data[0]
    print(f'Scraping weather for {week}')

Scraping weather for week-1


In [263]:
dfs = []
for file in file_list:
    data = pd.read_html(file)
    df2 = pd.DataFrame(data[0])
    df2.drop(columns = ['Unnamed: 0', 'Game', 'Game.1', 'Game.2', 'Time (ET)', 'TV', 'Unnamed: 8', 'Unnamed: 12'], inplace = True) 
    df2['Week'] = re.search(r'(\d+)', file)[0]
    df2['Wind_Speed_MPH'] = df2['Wind'].map(lambda x:re.search(r'(\d+)',x)[0])
    df2['Wind_Direction'] = df2['Wind'].map(lambda x:re.search(r'[A-Z]+',x)[0])
    df2['Temp'] = df2['Forecast'].map(lambda x: re.search(r'(\d+)',x)[0] if x != 'DOME' else 0)
    df2['Weather_Desc'] = df2['Forecast'].map(lambda x: re.search(r'\s(.+)',x)[0].strip() if x != 'DOME' else 0)
    df2["Wind"] = np.where(df2["Forecast"] == "DOME", 0, df2["Wind"])
    df2["Wind_Speed_MPH"] = np.where(df2["Forecast"] == "DOME", 0, df2["Wind_Speed_MPH"])
    df2["Wind_Direction"] = np.where(df2["Forecast"] == "DOME", 0, df2["Wind_Direction"])
    dfs.append(df2)
weather_df = pd.concat(dfs, ignore_index=True)
weather_df.head()

Unnamed: 0,Away,Home,Forecast,Extended Forecast,Wind,Week,Wind_Speed_MPH,Wind_Direction,Temp,Weather_Desc
0,Bills,Rams,DOME,Partly Cloudy. Partly cloudy throughout the day.,0,1,0,0,0,0
1,Saints,Falcons,DOME,Mostly Cloudy. Overcast throughout the day.,0,1,0,0,0,0
2,49ers,Bears,68f Rain,Rain. Rain throughout the day.,9m NE,1,9,NE,68,Rain
3,Steelers,Bengals,75f Mostly Cloudy,Mostly Cloudy. Rain in the afternoon and evening.,4m S,1,4,S,75,Mostly Cloudy
4,Eagles,Lions,DOME,"Mostly Cloudy. Rain until afternoon, starting ...",0,1,0,0,0,0


In [264]:
weather_df['Week'] = weather_df['Week'].astype(int)

## Merge `df` and `df_weather`

In [265]:
change_col_types(df)

need to clean column: Player
need to clean column: Game
success!
success!
success!
success!
success!
success!
success!
success!
success!
success!
need to clean column: Pos
success!
success!
success!
success!
need to clean column: FGM-A  Field Goals Made - Field Goals Attempted
success!
need to clean column: XPM-A  Extra Points Made - Extra Points Attempted
success!
success!
success!
success!
need to clean column: Team_Name_Abbrev
need to clean column: Team_Name_Full
need to clean column: Team_Name_Mascot
need to clean column: Team1
need to clean column: Team2
need to clean column: Opp
need to clean column: Opp_Team_Name_Full
need to clean column: Opp_Team_Mascot
success!
need to clean column: Tm_Abr
success!


In [266]:
weather_df.head()

Unnamed: 0,Away,Home,Forecast,Extended Forecast,Wind,Week,Wind_Speed_MPH,Wind_Direction,Temp,Weather_Desc
0,Bills,Rams,DOME,Partly Cloudy. Partly cloudy throughout the day.,0,1,0,0,0,0
1,Saints,Falcons,DOME,Mostly Cloudy. Overcast throughout the day.,0,1,0,0,0,0
2,49ers,Bears,68f Rain,Rain. Rain throughout the day.,9m NE,1,9,NE,68,Rain
3,Steelers,Bengals,75f Mostly Cloudy,Mostly Cloudy. Rain in the afternoon and evening.,4m S,1,4,S,75,Mostly Cloudy
4,Eagles,Lions,DOME,"Mostly Cloudy. Rain until afternoon, starting ...",0,1,0,0,0,0


In [267]:
df.head()

Unnamed: 0,Player,Game,FPTS Fantasy Points,CMP Pass Completions,ATT Pass Attempts,YDS Passing Yards,TD Touchdown Passes,INT Interceptions Thrown,ATT Rushing Attempts,YDS Rushing Yards,TD Rushing Touchdowns,Week,Pos,REC Receptions,YDS Receiving Yards,TGT Targets,TD Receiving Touchdowns,FGM-A Field Goals Made - Field Goals Attempted,LNG Longest field goal in terms of yards by a kicker,XPM-A Extra Points Made - Extra Points Attempted,FG_Made,FG_Attempted,XPM,XPA,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot,Team1,Team2,Opp,Opp_Team_Name_Full,Opp_Team_Mascot,Def_Rank_Week_1,Tm_Abr,Current_Week
0,Patrick Mahomes,KC 44 - ARI 21,44,30,39,360,5,0,3,5,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,KC,Kansas City Chiefs,Chiefs,KC,ARI,ARI,Arizona Cardinals,Cardinals,11,ARI,1
1,Josh Allen,BUF 31 - LAR 10,36,26,31,297,3,2,10,56,1,1,QB,0,0,0,0,0,0,0,0,0,0,0,BUF,Buffalo Bills,Bills,BUF,LAR,LAR,Los Angeles Rams,Rams,15,LAR,1
2,Carson Wentz,WAS 28 - JAC 22,35,27,41,313,4,2,6,12,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,WAS,Washington Football Team,Team,WAS,JAC,JAC,key does not exist --> JAC,JAC,28,JAC,1
3,Justin Herbert,LAC 24 - LV 19,29,26,34,279,3,0,4,1,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,LAC,Los Angeles Chargers,Chargers,LAC,LV,LV,key does not exist --> LV,LV,26,LV,1
4,Lamar Jackson,BAL 24 - NYJ 9,25,17,30,213,3,1,6,17,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,BAL,Baltimore Ravens,Ravens,BAL,NYJ,NYJ,New York Jets,Jets,32,NYJ,1


In [268]:
df = pd.merge(df, weather_df, left_on = ['Opp_Team_Mascot', 'Team_Name_Mascot', 'Week'], right_on = ['Home', 'Away', 'Week'], how = 'left')

In [270]:
df.head(20)

Unnamed: 0,Player,Game,FPTS Fantasy Points,CMP Pass Completions,ATT Pass Attempts,YDS Passing Yards,TD Touchdown Passes,INT Interceptions Thrown,ATT Rushing Attempts,YDS Rushing Yards,TD Rushing Touchdowns,Week,Pos,REC Receptions,YDS Receiving Yards,TGT Targets,TD Receiving Touchdowns,FGM-A Field Goals Made - Field Goals Attempted,LNG Longest field goal in terms of yards by a kicker,XPM-A Extra Points Made - Extra Points Attempted,FG_Made,FG_Attempted,XPM,XPA,Team_Name_Abbrev,Team_Name_Full,Team_Name_Mascot,Team1,Team2,Opp,Opp_Team_Name_Full,Opp_Team_Mascot,Def_Rank_Week_1,Tm_Abr,Current_Week,Away,Home,Forecast,Extended Forecast,Wind,Wind_Speed_MPH,Wind_Direction,Temp,Weather_Desc
0,Patrick Mahomes,KC 44 - ARI 21,44,30,39,360,5,0,3,5,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,KC,Kansas City Chiefs,Chiefs,KC,ARI,ARI,Arizona Cardinals,Cardinals,11,ARI,1,Chiefs,Cardinals,DOME,Clear. Clear throughout the day.,0,0.0,0,0.0,0
1,Josh Allen,BUF 31 - LAR 10,36,26,31,297,3,2,10,56,1,1,QB,0,0,0,0,0,0,0,0,0,0,0,BUF,Buffalo Bills,Bills,BUF,LAR,LAR,Los Angeles Rams,Rams,15,LAR,1,Bills,Rams,DOME,Partly Cloudy. Partly cloudy throughout the day.,0,0.0,0,0.0,0
2,Carson Wentz,WAS 28 - JAC 22,35,27,41,313,4,2,6,12,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,WAS,Washington Football Team,Team,WAS,JAC,JAC,key does not exist --> JAC,JAC,28,JAC,1,,,,,,,,,
3,Justin Herbert,LAC 24 - LV 19,29,26,34,279,3,0,4,1,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,LAC,Los Angeles Chargers,Chargers,LAC,LV,LV,key does not exist --> LV,LV,26,LV,1,,,,,,,,,
4,Lamar Jackson,BAL 24 - NYJ 9,25,17,30,213,3,1,6,17,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,BAL,Baltimore Ravens,Ravens,BAL,NYJ,NYJ,New York Jets,Jets,32,NYJ,1,Ravens,Jets,74f Light Rain,Light Rain. Rain starting in the afternoon.,3m SSW,3.0,SSW,74.0,Light Rain
5,Jameis Winston,NO 27 - ATL 26,24,23,34,269,2,0,2,9,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,NO,key does not exist --> NOR,NOR,NO,ATL,ATL,Atlanta Falcons,Falcons,29,ATL,1,,,,,,,,,
6,Jalen Hurts,PHI 38 - DET 35,24,18,32,243,0,0,17,90,1,1,QB,0,0,0,0,0,0,0,0,0,0,0,PHI,Philadelphia Eagles,Eagles,PHI,DET,DET,Detroit Lions,Lions,31,DET,1,Eagles,Lions,DOME,"Mostly Cloudy. Rain until afternoon, starting ...",0,0.0,0,0.0,0
7,Kirk Cousins,MIN 23 - GB 7,23,23,32,277,2,0,0,0,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,MIN,Minnesota Vikings,Vikings,MIN,GB,GB,key does not exist --> GB,GB,13,GB,1,,,,,,,,,
8,Kyler Murray,KC 44 - ARI 21,23,22,34,193,2,0,5,29,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,ARI,Arizona Cardinals,Cardinals,KC,ARI,KC,key does not exist --> KC,KC,8,KC,1,,,,,,,,,
9,Ryan Tannehill,NYG 21 - TEN 20,22,20,33,266,2,0,2,7,0,1,QB,0,0,0,0,0,0,0,0,0,0,0,TEN,Tennessee Titans,Titans,NYG,TEN,NYG,New York Giants,Giants,23,NYG,1,,,,,,,,,


In [None]:
df['Away_Game'] = np.where(df['Away'] == df['Team_Name_Mascot'], 1, 0)
df['Home_Game'] = np.where(df['Home'] == df['Team_Name_Mascot'], 1, 0)
df['FG_Missed'] = df['FG_Attempted'] - df['FG_Made']
df['XP_Missed'] = df['XPA'] - df['XPM']

In [None]:
df.fillna(0, inplace=True)

In [None]:
change_col_types(df)

In [None]:
df.head()

# Calculate Custom Fantasy Points

In [None]:
qb = df['TD  Touchdown Passes'].multiply(qb_fantasy_points['touchdown']) + df['YDS  Passing Yards'].divide(qb_fantasy_points['pass_yds']) + df['INT  Interceptions Thrown'].multiply(qb_fantasy_points['interception']) + df['YDS  Rushing Yards'].divide(qb_fantasy_points['rush_yds']) + df['TD  Rushing Touchdowns'].multiply(qb_fantasy_points['rush_td'])
wr = df['TD  Rushing Touchdowns'].multiply(flex_fantasy_points['rush_touchdown']) + df['YDS  Rushing Yards'].divide(flex_fantasy_points['rush_yards']) + df['REC  Receptions'].add(flex_fantasy_points['receptions']) + df['YDS  Receiving Yards'].divide(flex_fantasy_points['rec_yards']) + df['TD  Receiving Touchdowns'].multiply(flex_fantasy_points['rec_touchdown'])
rb = df['TD  Rushing Touchdowns'].multiply(flex_fantasy_points['rush_touchdown']) + df['YDS  Rushing Yards'].divide(flex_fantasy_points['rush_yards']) + df['REC  Receptions'].add(flex_fantasy_points['receptions']) + df['YDS  Receiving Yards'].divide(flex_fantasy_points['rec_yards']) + df['TD  Receiving Touchdowns'].multiply(flex_fantasy_points['rec_touchdown'])
te = df['TD  Rushing Touchdowns'].multiply(flex_fantasy_points['rush_touchdown']) + df['YDS  Rushing Yards'].divide(flex_fantasy_points['rush_yards']) + df['REC  Receptions'].add(flex_fantasy_points['receptions']) + df['YDS  Receiving Yards'].divide(flex_fantasy_points['rec_yards']) + df['TD  Receiving Touchdowns'].multiply(flex_fantasy_points['rec_touchdown'])
k = df['FG_Made'].multiply(kicking_fantasy_points['field_goals_made']) + df['FG_Missed'].multiply(kicking_fantasy_points['field_goals_missed']) + df['XPM'].multiply(kicking_fantasy_points['extra_points_made']) + df['XP_Missed'].multiply(kicking_fantasy_points['extra_points_missed'])

position_list = [('QB', qb), ('WR', wr), ('RB', rb), ('TE', te), ('K', k)]

for position in position_list:
    first_idx = df[df['Pos'] == position[0]].index[0]
    final_idx = df[df['Pos'] == position[0]].index[-1]
    df.loc[first_idx:final_idx,['Fantasy_Pts']] = position[1]
    

In [None]:
df.rename(columns = {'CMP  Pass Completions': 'Completions',
                     'ATT  Pass Attempts': 'Pass_Attempts',
                    'YDS  Passing Yards': 'Passing_Yds',
                    'TD  Touchdown Passes': 'TD_Passes',
                    'INT  Interceptions Thrown': 'INT',
                     'ATT  Rushing Attempts': 'Carries',
                     'YDS  Rushing Yards': 'Rushing_Yds',
                     'TD  Rushing Touchdowns': 'Rushing_TD',
                     'REC  Receptions': 'Receptions',
                     'YDS  Receiving Yards': 'Receiving_Yds',
                     'TGT  Targets': 'Targets',
                     'TD  Receiving Touchdowns': 'Receiving_TD',
                     'FGM-A  Field Goals Made - Field Goals Attempted': 'FGM-FGA',
                     'LNG  Longest field goal in terms of yards by a kicker': 'Longest_FG',
                     'XPM-A  Extra Points Made - Extra Points Attempted': 'XPM-XPA',
                     'FPTS  Fantasy Points' : 'CBS_Fantasy_Pts'
                    }, inplace = True)
df.head()

In [None]:
df = df[['Week', 'Player', 'Pos', 'CBS_Fantasy_Pts', 'Fantasy_Pts', 'Team_Name_Full', 'Team_Name_Mascot','Team_Name_Abbrev','Game', 'Opp', 'Away_Game',
             'Home_Game', 'Completions', 'Pass_Attempts', 'Passing_Yds', 'TD_Passes', 'INT', 'Carries', 'Rushing_Yds', 'Rushing_TD', 'Receptions', 
             'Receiving_Yds', 'Targets', 'Receiving_TD', 'FGM-FGA', 'FG_Missed', 'Longest_FG', 'XPM-XPA', 'XP_Missed', 'Forecast', 
             'Extended Forecast', 'Wind', 'Wind_Speed_MPH', 'Wind_Direction', 'Temp', 'Weather_Desc']]
df.head()

## Modeling

In [None]:
df.fillna(0, inplace=True)

In [None]:
# Create X and y
X = df.drop(columns = ['Player', 'Pos', 'CBS_Fantasy_Pts', 'Fantasy_Pts', 'Team_Name_Full', 'Team_Name_Mascot', 'Team_Name_Abbrev', 'Game', 'Opp', 'FGM-FGA', 'XPM-XPA', 'Forecast', 'Extended Forecast', 'Wind', 'Wind_Direction', 'Weather_Desc'])
y = df['Fantasy_Pts']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) 

lr = LinearRegression()
lr.fit(X_train, y_train)

cross_val_score(lr, X_train, y_train).mean()

In [None]:
# Take a peek at the model coefficient and intercept
pd.DataFrame(set(zip(X.columns, lr.coef_))).head()

In [None]:
y_pred = lr.predict(X_test)

In [None]:
resids = y_test - y_pred
resids.head()

In [None]:
# mse = np.mean(resids**2)
# mse

In [None]:
# Create the predictions for the "null model"
y_bar = np.mean(y)

In [None]:
# The null MSE
null_mse = np.mean((y - y_bar)**2)
null_mse

In [None]:
# Can compute R2 from metrics...
# metrics.r2_score(y, y_pred)

In [None]:
y_pred

In [None]:
pd.DataFrame(set(zip(y_test, y_pred))).head()

In [None]:
df_predicted = pd.DataFrame(set(zip(y_test, y_pred)))

In [None]:
df = pd.merge(df, df_predicted, left_on = ['Fantasy_Pts'], right_on = [0])

In [None]:
df.rename(columns={1: 'Prediction'}, inplace=True)

In [None]:
df[['Week', 'Player', 'Fantasy_Pts', 'Prediction']].head()