In [68]:
import pandas as pd
import requests
import time
pd.options.display.max_columns = 999

## QB

### Pass Completions

In [2]:
url = 'https://www.teamrankings.com/nfl/player-stat/passing-plays-completed?split=&rate=per-game'
resp = requests.get(url)
resp.status_code, resp.text[:50]

(200, '<!doctype html>\n<html id="html" class="no-js" lang')

In [3]:
output = open(f'./nfl_data/qb_completions.xls', 'wb')
output.write(resp.content)
output.close()

In [4]:
qb_completions = pd.read_html('./nfl_data/qb_completions.xls')
qb_completions = pd.DataFrame(qb_completions[0]) # Saves df var to dataframe
qb_completions

Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Tom Brady,Tampa Bay Buccaneers,QB,28.63
1,2,Patrick Mahomes,Kansas City Chiefs,QB,26.25
2,3,Justin Herbert,Los Angeles Chargers,QB,26.06
3,4,Dak Prescott,Dallas Cowboys,QB,25.47
4,5,Derek Carr,Las Vegas Raiders,QB,25.39
...,...,...,...,...,...
95,96,Tyler Boyd,Cincinnati Bengals,WR,0.05
96,96,Joe Mixon,Cincinnati Bengals,HB,0.05
97,98,Johnny Hekker,Los Angeles Rams,P,0.05
98,99,Cole Beasley,Buffalo Bills,WR,0.00


In [5]:
# filter out non-quarterbacks
qb_completions = qb_completions.loc[(qb_completions['Pos'] == 'QB')].copy()

### Pass Completions @ Home

In [6]:
url = 'https://www.teamrankings.com/nfl/player-stat/passing-plays-completed?split=home&rate=per-game'
resp = requests.get(url)
output = open(f'./nfl_data/qb_completions_home.xls', 'wb')
output.write(resp.content)
output.close()

In [7]:
qb_completions_home = pd.read_html('./nfl_data/qb_completions_home.xls')
qb_completions_home = pd.DataFrame(qb_completions_home[0]) # Saves df var to dataframe
qb_completions_home = qb_completions_home.loc[(qb_completions_home['Pos'] == 'QB')].copy()
qb_completions_home

Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Mike White,New York Jets,QB,30.50
1,2,Mason Rudolph,Pittsburgh Steelers,QB,30.00
2,3,Tom Brady,Tampa Bay Buccaneers,QB,28.10
3,4,Joe Burrow,Cincinnati Bengals,QB,25.90
4,5,Kyler Murray,Arizona Cardinals,QB,25.71
...,...,...,...,...,...
63,64,Jordan Love,Green Bay Packers,QB,1.00
65,65,Josh Rosen,Atlanta Falcons,QB,0.50
66,67,Brandon Allen,Cincinnati Bengals,QB,0.33
70,70,Marcus Mariota,Las Vegas Raiders,QB,0.20


### Pass Completions last 2 wks

In [8]:
url = 'https://www.teamrankings.com/nfl/player-stat/passing-plays-completed?split=last_2_weeks&rate=per-game'
resp = requests.get(url)
output = open(f'./nfl_data/qb_completions_last2wks.xls', 'wb')
output.write(resp.content)
output.close()

In [9]:
qb_completions_last2wks = pd.read_html('./nfl_data/qb_completions_home.xls')
qb_completions_last2wks = pd.DataFrame(qb_completions_last2wks[0]) # Saves df var to dataframe
qb_completions_last2wks = qb_completions_last2wks.loc[(qb_completions_last2wks['Pos'] == 'QB')].copy()
qb_completions_last2wks

Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Mike White,New York Jets,QB,30.50
1,2,Mason Rudolph,Pittsburgh Steelers,QB,30.00
2,3,Tom Brady,Tampa Bay Buccaneers,QB,28.10
3,4,Joe Burrow,Cincinnati Bengals,QB,25.90
4,5,Kyler Murray,Arizona Cardinals,QB,25.71
...,...,...,...,...,...
63,64,Jordan Love,Green Bay Packers,QB,1.00
65,65,Josh Rosen,Atlanta Falcons,QB,0.50
66,67,Brandon Allen,Cincinnati Bengals,QB,0.33
70,70,Marcus Mariota,Las Vegas Raiders,QB,0.20


### Scrape 7 QB categories, each with 5 sub-categories.  All on a per-game basis.

- Categories: Pass Completions, Pass Attempts, Passing Yards, Passing TD, INT, Longest Pass, QB Rating

- Sub-categories: Home games, Division Games, L2 Weeks, L4 weeks, Vs Top 10

In [10]:
stats = ['passing-plays-completed', 'passing-plays-attempted', 'passing-gross-yards', 'passing-touchdowns', 
        'passing-plays-intercepted', 'passing-longest-yards', 'qb-rating-nfl']

sub_cats = ['home', 'division', 'last_2_weeks','last_4_weeks', 'top_10_nfl']

file_list = []

for stat in stats:
    for cat in sub_cats:
        url = f'https://www.teamrankings.com/nfl/player-stat/{stat}?split={cat}&rate=per-game'
        resp = requests.get(url)
        output = open(f'./nfl_data/{stat}_{cat}.xls', 'wb')
        output.write(resp.content)
        file_list.append(f'./nfl_data/{stat}_{cat}.xls')
        output.close() 
        data = pd.read_html(f'./nfl_data/{stat}_{cat}.xls')
        df = pd.DataFrame(data[0]) 
        print((stat, cat), df.shape)

('passing-plays-completed', 'home') (100, 5)
('passing-plays-completed', 'division') (100, 5)
('passing-plays-completed', 'last_2_weeks') (100, 5)
('passing-plays-completed', 'last_4_weeks') (100, 5)
('passing-plays-completed', 'top_10_nfl') (100, 5)
('passing-plays-attempted', 'home') (100, 5)
('passing-plays-attempted', 'division') (100, 5)
('passing-plays-attempted', 'last_2_weeks') (100, 5)
('passing-plays-attempted', 'last_4_weeks') (100, 5)
('passing-plays-attempted', 'top_10_nfl') (100, 5)
('passing-gross-yards', 'home') (100, 5)
('passing-gross-yards', 'division') (100, 5)
('passing-gross-yards', 'last_2_weeks') (100, 5)
('passing-gross-yards', 'last_4_weeks') (100, 5)
('passing-gross-yards', 'top_10_nfl') (100, 5)
('passing-touchdowns', 'home') (100, 5)
('passing-touchdowns', 'division') (100, 5)
('passing-touchdowns', 'last_2_weeks') (100, 5)
('passing-touchdowns', 'last_4_weeks') (100, 5)
('passing-touchdowns', 'top_10_nfl') (100, 5)
('passing-plays-intercepted', 'home') (10

#### one merge example

In [27]:
qbdf_yars_div = pd.read_html('./nfl_data/passing-gross-yards_division.xls')
qbdf_yars_div = pd.DataFrame(qbdf_yars_div[0]) 
qbdf_yars_div = qbdf_yars_div.loc[(qbdf_yars_div['Pos'] == 'QB')].copy()
qbdf_yars_div.rename(columns = {'Value':'yards_vs_division'}, inplace = True)
qbdf_yars_div.drop(columns = 'Rank', inplace = True) 

In [28]:
qbdf_yars_home = pd.read_html('./nfl_data/passing-gross-yards_home.xls')
qbdf_yars_home = pd.DataFrame(qbdf_yars_home[0]) 
qbdf_yars_home = qbdf_yars_home.loc[(qbdf_yars_home['Pos'] == 'QB')].copy()
qbdf_yars_home.rename(columns = {'Value':'yards_at_home'}, inplace = True)
qbdf_yars_home.drop(columns = 'Rank', inplace = True)

In [39]:
merged_df = pd.merge(qbdf_yars_div, qbdf_yars_home, on = ['Player', 'Team', 'Pos'], how = 'outer')

In [44]:
merged_df

Unnamed: 0,Player,Team,Pos,yards_vs_division,yards_at_home
0,Andy Dalton,Chicago Bears,QB,321.00,164.75
1,Joe Burrow,Cincinnati Bengals,QB,317.00,322.50
2,Josh Johnson,Baltimore Ravens,QB,304.00,17.00
3,Tom Brady,Tampa Bay Buccaneers,QB,298.50,308.70
4,Patrick Mahomes,Kansas City Chiefs,QB,298.00,277.25
...,...,...,...,...,...
65,C.J. Beathard,Jacksonville Jaguars,QB,,33.00
66,Jacob Eason,Seattle Seahawks,QB,,25.00
67,Blaine Gabbert,Tampa Bay Buccaneers,QB,,13.40
68,Ryan Fitzpatrick,Washington Commanders,QB,,13.00


### Loop through all files and merge into one df

In [45]:
# get df started with the 1st file './nfl_data/passing-plays-completed_home.xls'.  This way we have something to merge to.

df_qb = pd.read_html('./nfl_data/passing-plays-completed_home.xls')
df_qb = pd.DataFrame(df_qb[0]) 
df_qb = df_qb.loc[(df_qb['Pos'] == 'QB')].copy() # filter out non-QBs
df_qb.drop(columns = 'Rank', inplace = True) 
col_name = file_list[0][11:-4]
df_qb.rename(columns = {'Value': col_name}, inplace = True)

In [46]:
df_qb.shape

(67, 4)

In [47]:
for file in file_list[1:]:
    data = pd.read_html(file)
    df = pd.DataFrame(data[0])
    df = df.loc[(df['Pos'] == 'QB')].copy()
    df.drop(columns = 'Rank', inplace = True) 
    col_name = file[11:-4]
    df.rename(columns = {'Value': col_name}, inplace = True)    
    df_qb = pd.merge(df_qb, df, on = ['Player', 'Team', 'Pos'], how = 'outer')
    print(f'The shape of the merged df is {df_qb.shape}')

The shape of the merged df is (72, 5)
The shape of the merged df is (74, 6)
The shape of the merged df is (75, 7)
The shape of the merged df is (77, 8)
The shape of the merged df is (78, 9)
The shape of the merged df is (78, 10)
The shape of the merged df is (78, 11)
The shape of the merged df is (78, 12)
The shape of the merged df is (79, 13)
The shape of the merged df is (79, 14)
The shape of the merged df is (79, 15)
The shape of the merged df is (79, 16)
The shape of the merged df is (79, 17)
The shape of the merged df is (79, 18)
The shape of the merged df is (79, 19)
The shape of the merged df is (79, 20)
The shape of the merged df is (79, 21)
The shape of the merged df is (79, 22)
The shape of the merged df is (79, 23)
The shape of the merged df is (80, 24)
The shape of the merged df is (80, 25)
The shape of the merged df is (80, 26)
The shape of the merged df is (80, 27)
The shape of the merged df is (80, 28)
The shape of the merged df is (80, 29)
The shape of the merged df is 

In [49]:
df_qb.head(3)

Unnamed: 0,Player,Team,Pos,passing-plays-completed_home,passing-plays-completed_division,passing-plays-completed_last_2_weeks,passing-plays-completed_last_4_weeks,passing-plays-completed_top_10_nfl,passing-plays-attempted_home,passing-plays-attempted_division,...,passing-longest-yards_home,passing-longest-yards_division,passing-longest-yards_last_2_weeks,passing-longest-yards_last_4_weeks,passing-longest-yards_top_10_nfl,qb-rating-nfl_home,qb-rating-nfl_division,qb-rating-nfl_last_2_weeks,qb-rating-nfl_last_4_weeks,qb-rating-nfl_top_10_nfl
0,Mike White,New York Jets,QB,30.5,22.0,,,30.5,44.5,38.0,...,28.0,28.0,,,28.0,73.1,46.7,,,73.1
1,Mason Rudolph,Pittsburgh Steelers,QB,30.0,,,,5.0,50.0,,...,36.0,,,,12.0,,,,,
2,Tom Brady,Tampa Bay Buccaneers,QB,28.1,27.17,,30.0,33.5,43.1,40.33,...,62.0,62.0,,55.0,58.0,103.1,105.6,,72.2,94.0
3,Joe Burrow,Cincinnati Bengals,QB,25.9,24.4,22.0,24.33,25.67,35.4,33.2,...,72.0,82.0,75.0,75.0,75.0,111.6,112.7,100.9,93.2,106.7
4,Kyler Murray,Arizona Cardinals,QB,25.71,25.0,,,24.25,37.0,37.0,...,77.0,47.0,,,55.0,93.2,85.1,,,89.9


In [55]:
df_qb.dtypes

Player                                     object
Team                                       object
Pos                                        object
passing-plays-completed_home               object
passing-plays-completed_division           object
passing-plays-completed_last_2_weeks       object
passing-plays-completed_last_4_weeks       object
passing-plays-completed_top_10_nfl        float64
passing-plays-attempted_home              float64
passing-plays-attempted_division          float64
passing-plays-attempted_last_2_weeks       object
passing-plays-attempted_last_4_weeks       object
passing-plays-attempted_top_10_nfl         object
passing-gross-yards_home                   object
passing-gross-yards_division               object
passing-gross-yards_last_2_weeks           object
passing-gross-yards_last_4_weeks           object
passing-gross-yards_top_10_nfl            float64
passing-touchdowns_home                    object
passing-touchdowns_division                object


### bring in football reference aggregage data

In [73]:
fr_qb = pd.read_html('./nfl_data/2021_passing_stats.xls')
fr_qb = pd.DataFrame(fr_qb[0]) # Saves df var to dataframe
fr_qb.fillna(0, inplace=True)
fr_qb['Player'] = fr_qb['Player'].map(lambda x: x.lstrip('*').rstrip('*'))
fr_qb.drop(columns = ['Rk', 'Age', 'QBrec', '4QC', 'GWD'], inplace = True)
fr_qb.head()

Unnamed: 0,Player,Tm,Pos,G,GS,Cmp,Att,Cmp%,Yds,TD,TD%,Int,Int%,1D,Lng,Y/A,AY/A,Y/C,Y/G,Rate,QBR,Sk,Yds.1,Sk%,NY/A,ANY/A
0,Tom Brady,TAM,QB,17,17,485,719,67.5,5316,43,6.0,12,1.7,269,62,7.4,7.8,11.0,312.7,102.1,68.1,22,144,3.0,6.98,7.41
1,Justin Herbert,LAC,QB,17,17,443,672,65.9,5014,38,5.7,15,2.2,256,72,7.5,7.6,11.3,294.9,97.7,65.6,31,214,4.4,6.83,6.95
2,Matthew Stafford,LAR,QB,17,17,404,601,67.2,4886,41,6.8,17,2.8,233,79,8.1,8.2,12.1,287.4,102.9,63.8,30,243,4.8,7.36,7.45
3,Patrick Mahomes,KAN,QB,17,17,436,658,66.3,4839,37,5.6,13,2.0,260,75,7.4,7.6,11.1,284.6,98.5,62.2,28,146,4.1,6.84,7.07
4,Derek Carr,LVR,QB,17,17,428,626,68.4,4804,23,3.7,14,2.2,217,61,7.7,7.4,11.2,282.6,94.0,52.4,40,241,6.0,6.85,6.6


In [70]:
fr_qb.shape, df_qb.shape

((118, 28), (80, 38))

In [76]:
df_qb_all =pd.merge(fr_qb, df_qb, on = ['Player'], how = 'outer')
df_qb_all.shape

(123, 63)

In [75]:
df_qb_all.head(3)

Unnamed: 0,Rk,Player,Tm,Age,Pos_x,G,GS,QBrec,Cmp,Att,Cmp%,Yds,TD,TD%,Int,Int%,1D,Lng,Y/A,AY/A,Y/C,Y/G,Rate,QBR,Sk,Yds.1,Sk%,NY/A,ANY/A,4QC,GWD,Team,Pos_y,passing-plays-completed_home,passing-plays-completed_division,passing-plays-completed_last_2_weeks,passing-plays-completed_last_4_weeks,passing-plays-completed_top_10_nfl,passing-plays-attempted_home,passing-plays-attempted_division,passing-plays-attempted_last_2_weeks,passing-plays-attempted_last_4_weeks,passing-plays-attempted_top_10_nfl,passing-gross-yards_home,passing-gross-yards_division,passing-gross-yards_last_2_weeks,passing-gross-yards_last_4_weeks,passing-gross-yards_top_10_nfl,passing-touchdowns_home,passing-touchdowns_division,passing-touchdowns_last_2_weeks,passing-touchdowns_last_4_weeks,passing-touchdowns_top_10_nfl,passing-plays-intercepted_home,passing-plays-intercepted_division,passing-plays-intercepted_last_2_weeks,passing-plays-intercepted_last_4_weeks,passing-plays-intercepted_top_10_nfl,passing-longest-yards_home,passing-longest-yards_division,passing-longest-yards_last_2_weeks,passing-longest-yards_last_4_weeks,passing-longest-yards_top_10_nfl,qb-rating-nfl_home,qb-rating-nfl_division,qb-rating-nfl_last_2_weeks,qb-rating-nfl_last_4_weeks,qb-rating-nfl_top_10_nfl
0,1,Tom Brady,TAM,44,QB,17,17,13-4-0,485,719,67.5,5316,43,6.0,12,1.7,269,62,7.4,7.8,11.0,312.7,102.1,68.1,22,144,3.0,6.98,7.41,3,5,Tampa Bay Buccaneers,QB,28.1,27.17,0.0,30.0,33.5,43.1,40.33,0.0,54.0,51.25,308.7,298.5,0.0,329.0,375.75,2.8,2.83,0.0,1.0,2.0,0.5,0.67,0.0,1.0,0.75,62.0,62.0,0.0,55.0,58.0,103.1,105.6,0.0,72.2,94.0
1,2,Justin Herbert,LAC,23,QB,17,17,9-8-0,443,672,65.9,5014,38,5.7,15,2.2,256,72,7.5,7.6,11.3,294.9,97.7,65.6,31,214,4.4,6.83,6.95,5,5,Los Angeles Chargers,QB,24.11,26.17,0.0,0.0,26.25,36.89,42.17,0.0,0.0,38.0,278.44,277.0,0.0,0.0,293.0,2.33,2.67,0.0,0.0,2.5,0.78,0.67,0.0,0.0,1.0,72.0,47.0,0.0,0.0,47.0,100.3,95.7,0.0,0.0,102.7
2,3,Matthew Stafford,LAR,33,QB,17,17,12-5-0,404,601,67.2,4886,41,6.8,17,2.8,233,79,8.1,8.2,12.1,287.4,102.9,63.8,30,243,4.8,7.36,7.45,3,4,Los Angeles Rams,QB,24.55,23.25,26.0,28.33,25.7,35.91,34.0,40.0,41.0,38.6,288.27,274.5,283.0,328.67,301.5,2.55,2.0,3.0,2.33,2.2,0.82,1.0,2.0,1.0,1.2,75.0,68.0,35.0,70.0,79.0,106.6,100.1,89.9,101.9,96.2


In [74]:
df_qb_all.dtypes

Rk                             object
Player                         object
Tm                             object
Age                            object
Pos_x                          object
                               ...   
qb-rating-nfl_home            float64
qb-rating-nfl_division        float64
qb-rating-nfl_last_2_weeks    float64
qb-rating-nfl_last_4_weeks    float64
qb-rating-nfl_top_10_nfl      float64
Length: 68, dtype: object

In [None]:
# go through 63 columns and figure out which we want as int and which as float

In [None]:
int_cols = ['Tgt', 'Rec', 'TD', 'Yds', 'Lng', 'Fmb', 'G', 'GS', '1D']
float_cols = ['Ctch%', 'Y/R', 'R/G', 'Y/Tgt', 'Y/G']

for int_col in int_cols:
    df_wr[f'{int_col}'] = df_wr[f'{int_col}'].astype(int)

for float_col in float_cols:
    df_wr[f'{float_col}'] = df_wr[f'{float_col}'].astype(float)