In [1]:
import sys
sys.path.append("..")

In [2]:
import pandas as pd
import numpy as np
import requests
import bs4
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import time
from tqdm import tqdm
import re
from src.NFL_scraper_util import NFLFootballGame

## Get Weeks

In [3]:
weeks = list(range(1,23))
years = list(range(2013,2024))

yearly_weeks = []

for year in years:
    if year >= 2021:
        yearly_weeks.append(weeks)
    else:
        yearly_weeks.append(weeks[:-1])

In [4]:
weeks_df = pd.DataFrame({"year": years, "week": yearly_weeks})
weeks_df = weeks_df.explode("week")
weeks_df

Unnamed: 0,year,week
0,2013,1
0,2013,2
0,2013,3
0,2013,4
0,2013,5
...,...,...
10,2023,18
10,2023,19
10,2023,20
10,2023,21


In [6]:
base_weeks = f"https://www.pro-football-reference.com/years/YEAR/week_WEEK.htm"
def fill_link(row):
    row["weekly_link"] = base_weeks.replace("YEAR", str(row.year)).replace("WEEK", str(row.week))
    return row

In [7]:
weeks_df = weeks_df.apply(fill_link, axis=1)
weeks_df

Unnamed: 0,year,week,weekly_link
0,2013,1,https://www.pro-football-reference.com/years/2...
0,2013,2,https://www.pro-football-reference.com/years/2...
0,2013,3,https://www.pro-football-reference.com/years/2...
0,2013,4,https://www.pro-football-reference.com/years/2...
0,2013,5,https://www.pro-football-reference.com/years/2...
...,...,...,...
10,2023,18,https://www.pro-football-reference.com/years/2...
10,2023,19,https://www.pro-football-reference.com/years/2...
10,2023,20,https://www.pro-football-reference.com/years/2...
10,2023,21,https://www.pro-football-reference.com/years/2...


In [12]:
weeks_df_no_2023 = weeks_df.iloc[:-21,:]

In [13]:
weekly_game_links = []
base = 'https://www.pro-football-reference.com'

for link in tqdm(weeks_df_no_2023['weekly_link']):
    req = Request(link)
    req.add_header('User-agent', 'greg_s123')
    html = urlopen(req).read()
    weeks_soup = bs4.BeautifulSoup(html)
    right_gamelinks = weeks_soup.find_all("td", {"class": "right gamelink"})
    list_of_game_links = [base + val.a.attrs['href'] for val in weeks_soup.find_all("td", {"class": "right gamelink"})]
    weekly_game_links.append(list_of_game_links)
    time.sleep(7)

100%|████████████████████████████████████████| 213/213 [26:02<00:00,  7.34s/it]


In [15]:
weeks_df_no_2023['game_links'] = weekly_game_links

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  weeks_df_no_2023['game_links'] = weekly_game_links


In [16]:
weeks_df_no_2023.to_pickle('Data/weeks_df_w_game_links.pkl')

## processs scraped pages

In [6]:
weeks_df_no_2023_r = pd.read_pickle('Data/weeks_df_w_game_links.pkl')
print(weeks_df_no_2023_r.shape)
weeks_df_no_2023_r.sample(3)

(213, 4)


Unnamed: 0,year,week,weekly_link,game_links
8,2021,4,https://www.pro-football-reference.com/years/2...,[https://www.pro-football-reference.com/boxsco...
9,2022,19,https://www.pro-football-reference.com/years/2...,[https://www.pro-football-reference.com/boxsco...
2,2015,18,https://www.pro-football-reference.com/years/2...,[https://www.pro-football-reference.com/boxsco...


In [7]:
weeks_df_no_2023 = weeks_df_no_2023_r.explode('game_links')
weeks_df_no_2023.shape

(2723, 4)

In [8]:

# for link in weeks_df_no_2023['game_links']:

## Game MetaData

In [9]:
game_link = weeks_df_no_2023['game_links'].iloc[2601]
game_htm = urlopen(game_link).read()
all_soup = bs4.BeautifulSoup(game_htm)
game_link

'https://www.pro-football-reference.com/boxscores/202211270phi.htm'

In [10]:
## get scorebox
scorebox_tag = all_soup.find_all('div', attrs={'class': 'scorebox'})[0]
scorebox_soup = BeautifulSoup(scorebox_tag.prettify(), 'html.parser')

In [11]:
# TEAMS
away_team_name = scorebox_soup.find_all('a', attrs={'href': re.compile("/teams/")})[0].text.strip()
home_team_name = scorebox_soup.find_all('a', attrs={'href': re.compile("/teams/")})[1].text.strip()

In [12]:
# SCORES
away_score = int(all_soup.find_all('div', attrs={'class': 'score'})[0].text)
home_score = int(all_soup.find_all('div', attrs={'class': 'score'})[1].text)

In [13]:
# RECORDS
away_record = scorebox_soup.find_all(string=re.compile("-"))[0].strip()
away_win_record = away_record.split('-')[0]
away_loss_record = away_record.split('-')[1]
if len(away_record.split('-'))==3:
    away_loss_record = away_record.split('-')[2]
home_record = scorebox_soup.find_all(string=re.compile("-"))[1].strip()
home_win_record = home_record.split('-')[0]
home_loss_record = home_record.split('-')[1]
if len(home_record.split('-'))==3:
    home_loss_record = home_record.split('-')[2]

In [14]:
# COACHES
away_coach = scorebox_soup.find_all('a', attrs={'href': re.compile("/coaches/")})[0].text.strip()
home_coach = scorebox_soup.find_all('a', attrs={'href': re.compile("/coaches/")})[1].text.strip()

In [15]:
# DATE TIME
date = scorebox_soup.find_all('div', attrs={'class': 'scorebox_meta'})[0].find_all('div')[0].text.strip()
kickoff_time = scorebox_soup.find_all('div', attrs={'class': 'scorebox_meta'})[0].find_all(string=re.compile("am|pm"))[0].strip()
time_regex = '\d{1,2}:\d{2}(?:am|pm)'
kickoff_time = re.findall(time_regex, kickoff_time, re.IGNORECASE)[0]

In [16]:
#STADIUM
stadium = scorebox_soup.find_all('a', attrs={'href': re.compile("/stadiums/")})[0].text.strip()

In [24]:
current_game = NFLFootballGame(home_team_name, away_team_name, date, kickoff_time, stadium)
current_game.home_coach = home_coach
current_game.away_coach = away_coach
current_game.home_score = home_score
current_game.away_score = away_score
current_game.away_score = away_score

## Tables

In [20]:
def filter_team_splits(df, levels=2):
    if levels==2:
        df = (df
                 [~df.Player.Player.isnull()]
                 [df.Player.Player!='Player']
                 .reset_index(drop=True)
                )
    elif levels==1:
        df = (df
                 [~df.Player.isnull()]
                 [df.Player!='Player']
                 .reset_index(drop=True)
                )
    return df

In [63]:
df0_quarter_totals = pd.read_html(str(all_soup.find_all('table')[0]))[0]
df0_quarter_totals = df0_quarter_totals.drop(columns=['Unnamed: 0']).rename(columns={'Unnamed: 1': 'Tm'})
df0_quarter_totals

Unnamed: 0,Tm,1,2,3,4,Final
0,Green Bay Packers,14,6,3,10,33
1,Philadelphia Eagles,13,14,7,6,40


In [64]:
df1_scoring = pd.read_html(str(all_soup.find_all('table')[1]))[0]
df1_scoring['Quarter'] = df1_scoring['Quarter'].ffill()
df1_scoring

Unnamed: 0,Quarter,Time,Tm,Detail,GNB,PHI
0,1.0,11:19,Eagles,Kenneth Gainwell 4 yard rush (Jake Elliott kick),0,7
1,1.0,9:03,Eagles,Miles Sanders 15 yard rush (Jake Elliott kick ...,0,13
2,1.0,6:45,Packers,AJ Dillon 20 yard rush (Mason Crosby kick),7,13
3,1.0,3:12,Packers,Randall Cobb 11 yard pass from Aaron Rodgers (...,14,13
4,2.0,14:54,Eagles,Miles Sanders 2 yard rush (Jake Elliott kick),14,20
5,2.0,7:41,Packers,Aaron Jones 23 yard pass from Aaron Rodgers (M...,20,20
6,2.0,0:13,Eagles,Quez Watkins 30 yard pass from Jalen Hurts (Ja...,20,27
7,3.0,7:14,Eagles,A.J. Brown 6 yard pass from Jalen Hurts (Jake ...,20,34
8,3.0,2:03,Packers,Mason Crosby 29 yard field goal,23,34
9,4.0,11:11,Eagles,Jake Elliott 31 yard field goal,23,37


In [68]:
df3_pregame_info = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Weather"))[0], 'html.parser').find_all('table')[0]))[0]
df3_pregame_info.columns = df3_pregame_info.iloc[0,:]
df3_pregame_info = df3_pregame_info.iloc[1:,:].reset_index(drop=True)
df3_pregame_info

Unnamed: 0,Game Info,Game Info.1
0,Won Toss,Packers (deferred)
1,Roof,outdoors
2,Surface,grass
3,Duration,3:09
4,Attendance,69879
5,Weather,"62 degrees, relative humidity 86%, wind 24 mph"
6,Vegas Line,Philadelphia Eagles -6.5
7,Over/Under,46.0 (over)


In [70]:
df4_officials = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Referee"))[0], 'html.parser').find_all('table')[0]))[0]
df4_officials.columns = df4_officials.iloc[0,:]
df4_officials = df4_officials.iloc[1:,:].reset_index(drop=True)
df4_officials

Unnamed: 0,Officials,Officials.1
0,Referee,Shawn Smith
1,Umpire,Bryan Neale
2,Down Judge,Mark Hittner
3,Line Judge,Michael Dolce
4,Back Judge,Dino Paganelli
5,Side Judge,Clay Reynard
6,Field Judge,Dyrol Prioleau


In [75]:
df5_expected_totals = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("TOvr"))[0], 'html.parser').find_all('table')[0]))[0]
df5_expected_totals.columns = df5_expected_totals.columns.set_levels(['Defense', 'Offense', 'Special Teams', 'Tm', 'Total'],level=0)
df5_expected_totals

Unnamed: 0_level_0,Tm,Total,Offense,Offense,Offense,Offense,Defense,Defense,Defense,Defense,Special Teams,Special Teams,Special Teams,Special Teams,Special Teams,Special Teams
Unnamed: 0_level_1,Tm,Total,Tot,Pass,Rush,TOvr,Tot,Pass,Rush,TOvr,Tot,KO,KR,P,PR,FG/XP
0,Packers,-7.0,11.56,8.92,2.95,-7.04,-20.82,4.45,-26.23,7.72,-0.51,-6.5,7.84,1.24,-1.33,-1.76
1,Eagles,7.0,20.82,-4.45,26.23,-7.72,-11.56,-8.92,-2.95,7.04,0.51,-7.84,6.5,1.33,-1.24,1.76


In [78]:
df6_team_stats = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Time of Possession"))[0], 'html.parser').find_all('table')[0]))[0]
df6_team_stats = df6_team_stats.rename(columns={'Unnamed: 0': 'Stat'})
df6_team_stats

Unnamed: 0,Stat,GNB,PHI
0,First Downs,19,29
1,Rush-Yds-TDs,21-106-1,49-363-3
2,Cmp-Att-Yd-TD-INT,17-25-253-3-2,16-28-153-2-0
3,Sacked-Yards,3-17,2-16
4,Net Pass Yards,236,137
5,Total Yards,342,500
6,Fumbles-Lost,0-0,2-1
7,Turnovers,2,1
8,Penalties-Yards,4-35,3-25
9,Third Down Conv.,1-7,8-15


In [103]:
df7_Pass_Rush_Rec = pd.read_html(str(all_soup.find_all('table')[2]))[0]
df7_Pass_Rush_Rec.columns = df7_Pass_Rush_Rec.columns.set_levels(['Fumbles', 'Passing', 'Receiving', 'Rushing', 'Player', 'Tm'],level=0)
df7_Pass_Rush_Rec = filter_team_splits(df7_Pass_Rush_Rec)
df7_Pass_Rush_Rec

  df = (df


Unnamed: 0_level_0,Player,Tm,Passing,Passing,Passing,Passing,Passing,Passing,Passing,Passing,...,Rushing,Rushing,Rushing,Receiving,Receiving,Receiving,Receiving,Receiving,Fumbles,Fumbles
Unnamed: 0_level_1,Player,Tm,Cmp,Att,Yds,TD,Int,Sk,Yds.1,Lng,...,Yds,TD,Lng,Tgt,Rec,Yds,TD,Lng,Fmb,FL
0,Aaron Rodgers,GNB,11,16,140,2,2,3,17,30,...,-1,0,-1,0,0,0,0,0,0,0
1,Jordan Love,GNB,6,9,113,1,0,0,0,63,...,0,0,0,0,0,0,0,0,0,0
2,AJ Dillon,GNB,0,0,0,0,0,0,0,0,...,64,1,20,4,3,24,0,11,0,0
3,Aaron Jones,GNB,0,0,0,0,0,0,0,0,...,43,0,10,4,3,56,1,30,0,0
4,Christian Watson,GNB,0,0,0,0,0,0,0,0,...,0,0,0,6,4,110,1,63,0,0
5,Allen Lazard,GNB,0,0,0,0,0,0,0,0,...,0,0,0,3,2,24,0,17,0,0
6,Robert Tonyan,GNB,0,0,0,0,0,0,0,0,...,0,0,0,3,3,20,0,17,0,0
7,Randall Cobb,GNB,0,0,0,0,0,0,0,0,...,0,0,0,4,2,19,1,11,0,0
8,Tyler Davis,GNB,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,Jalen Hurts,PHI,16,28,153,2,0,2,16,30,...,157,0,42,0,0,0,0,0,1,0


In [106]:
df8_defense = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Tackles"))[0], 'html.parser').find_all('table')[0]))[0]
df8_defense.columns = df8_defense.columns.set_levels(['Def Interceptions', 'Fumbles', 'Tackles', 'Player', 'Tm', 'Sk'],level=0)
df8_defense = filter_team_splits(df8_defense)
df8_defense

  df = (df


Unnamed: 0_level_0,Player,Tm,Def Interceptions,Def Interceptions,Def Interceptions,Def Interceptions,Def Interceptions,Sk,Tackles,Tackles,Tackles,Tackles,Tackles,Fumbles,Fumbles,Fumbles,Fumbles
Unnamed: 0_level_1,Player,Tm,Int,Yds,TD,Lng,PD,Sk,Comb,Solo,Ast,TFL,QBHits,FR,Yds,TD,FF
0,Justin Hollins,GNB,0,0,0,0,0,1.0,2,2,0,2,1,0,0,0,0
1,Preston Smith,GNB,0,0,0,0,0,0.5,6,3,3,1,1,0,0,0,0
2,Jarran Reed,GNB,0,0,0,0,0,0.5,2,0,2,0,1,0,0,0,0
3,Quay Walker,GNB,0,0,0,0,0,0.0,11,4,7,0,0,1,63,0,0
4,Adrian Amos,GNB,0,0,0,0,0,0.0,9,5,4,0,0,0,0,0,0
5,Rudy Ford,GNB,0,0,0,0,0,0.0,9,6,3,0,0,0,0,0,1
6,Rasul Douglas,GNB,0,0,0,0,1,0.0,8,6,2,0,0,0,0,0,0
7,Kingsley Enagbare,GNB,0,0,0,0,1,0.0,7,3,4,0,0,0,0,0,0
8,Jaire Alexander,GNB,0,0,0,0,1,0.0,6,4,2,0,0,0,0,0,0
9,Krys Barnes,GNB,0,0,0,0,0,0.0,6,5,1,0,0,0,0,0,0


In [107]:
df9_kick_punt_returns = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Kick Returns"))[0], 'html.parser').find_all('table')[0]))[0]
df9_kick_punt_returns.columns = df9_kick_punt_returns.columns.set_levels(['Kick Returns', 'Punt Returns', 'Player', 'Tm'],level=0)
df9_kick_punt_returns = filter_team_splits(df9_kick_punt_returns)
df9_kick_punt_returns

  df = (df


Unnamed: 0_level_0,Player,Tm,Kick Returns,Kick Returns,Kick Returns,Kick Returns,Kick Returns,Punt Returns,Punt Returns,Punt Returns,Punt Returns,Punt Returns
Unnamed: 0_level_1,Player,Tm,Rt,Yds,Y/Rt,TD,Lng,Ret,Yds,Y/R,TD,Lng
0,Keisean Nixon,GNB,5,172,34.4,0,53,0,0,,0,0
1,Boston Scott,PHI,4,91,22.8,0,28,0,0,,0,0


In [108]:
df10_kicking_punting = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("FGA"))[0], 'html.parser').find_all('table')[0]))[0]
df10_kicking_punting.columns = df10_kicking_punting.columns.set_levels(['Punting', 'Scoring', 'Player', 'Tm'],level=0)
df10_kicking_punting = filter_team_splits(df10_kicking_punting)
df10_kicking_punting

  df = (df


Unnamed: 0_level_0,Player,Tm,Scoring,Scoring,Scoring,Scoring,Punting,Punting,Punting,Punting
Unnamed: 0_level_1,Player,Tm,XPM,XPA,FGM,FGA,Pnt,Yds,Y/P,Lng
0,Mason Crosby,GNB,3.0,4.0,2.0,2.0,0,0,,0
1,Pat O'Donnell,GNB,,,,,2,83,41.5,42
2,Jake Elliott,PHI,4.0,5.0,2.0,2.0,0,0,,0
3,Arryn Siposs,PHI,,,,,1,52,52.0,52


In [112]:
df11_advanced_passing = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("IAY"))[0], 'html.parser').find_all('table')[0]))[0]
df11_advanced_passing = filter_team_splits(df11_advanced_passing, levels=1)
df11_advanced_passing

Unnamed: 0,Player,Tm,Cmp,Att,Yds,1D,1D%,IAY,IAY/PA,CAY,...,BadTh,Bad%,Sk,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr
0,Aaron Rodgers,GNB,11,16,140,8,42.1,96,6.0,62,...,1,6.3%,3,5,1,1,5,26.3%,0,
1,Jordan Love,GNB,6,9,113,4,44.4,96,10.7,42,...,1,11.1%,0,1,0,0,0,0.0%,0,
2,Jalen Hurts,PHI,16,28,153,6,20.0,197,7.0,94,...,3,12.0%,2,10,3,1,6,17.1%,5,17.4


In [116]:
df12_advanced_rushing = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Att/Br"))[0], 'html.parser').find_all('table')[0]))[0]
df12_advanced_rushing = filter_team_splits(df12_advanced_rushing, levels=1)
df12_advanced_rushing

Unnamed: 0,Player,Tm,Att,Yds,TD,1D,YBC,YBC/Att,YAC,YAC/Att,BrkTkl,Att/Br
0,Aaron Jones,GNB,12,43,0,2,25,2.1,18,1.5,0,
1,AJ Dillon,GNB,8,64,1,3,44,5.5,20,2.5,0,
2,Aaron Rodgers,GNB,1,-1,0,0,-1,-1.0,0,0.0,0,
3,Miles Sanders,PHI,21,143,2,7,92,4.4,51,2.4,0,
4,Jalen Hurts,PHI,17,157,0,10,134,7.9,23,1.4,0,
5,Kenneth Gainwell,PHI,8,39,1,3,31,3.9,8,1.0,0,
6,Boston Scott,PHI,3,24,0,1,19,6.3,5,1.7,0,


In [119]:
df13_advanced_receiving = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Rec/Br"))[0], 'html.parser').find_all('table')[0]))[0]
df13_advanced_receiving = filter_team_splits(df13_advanced_receiving, levels=1)
df13_advanced_receiving

Unnamed: 0,Player,Tm,Tgt,Rec,Yds,TD,1D,YBC,YBC/R,YAC,YAC/R,ADOT,BrkTkl,Rec/Br,Drop,Drop%,Int,Rat
0,Christian Watson,GNB,6,4,110,1,4.0,45,11.3,65,16.3,13.7,0,,0,0.0,0,149.3
1,AJ Dillon,GNB,4,3,24,0,2.0,-9,-3.0,33,11.0,-3.8,1,3.0,1,25.0,0,89.6
2,Aaron Jones,GNB,4,3,56,1,2.0,18,6.0,38,12.7,9.0,0,,1,25.0,0,156.2
3,Robert Tonyan,GNB,3,3,20,0,1.0,14,4.7,6,2.0,4.7,0,,0,0.0,0,94.4
4,Randall Cobb,GNB,4,2,19,1,1.0,16,8.0,3,1.5,7.8,0,,1,25.0,0,103.1
5,Allen Lazard,GNB,3,2,24,0,2.0,20,10.0,4,2.0,11.3,0,,0,0.0,1,51.4
6,Tyler Davis,GNB,1,0,0,0,,0,,0,,10.0,0,,0,0.0,1,0.0
7,A.J. Brown,PHI,6,4,46,1,3.0,22,5.5,24,6.0,7.7,0,,1,16.7,0,129.2
8,DeVonta Smith,PHI,9,4,50,0,2.0,38,9.5,12,3.0,9.1,0,,2,22.2,0,62.3
9,Miles Sanders,PHI,3,3,17,0,0.0,9,3.0,8,2.7,3.0,0,,0,0.0,0,90.3


In [120]:
df14_advanced_defense = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("QBKD"))[0], 'html.parser').find_all('table')[0]))[0]
df14_advanced_defense = filter_team_splits(df14_advanced_defense, levels=1)
df14_advanced_defense

Unnamed: 0,Player,Tm,Int,Tgt,Cmp,Cmp%,Yds,Yds/Cmp,Yds/Tgt,TD,...,Air,YAC,Bltz,Hrry,QBKD,Sk,Prss,Comb,MTkl,MTkl%
0,Rasul Douglas,GNB,0,5,4,80.0%,63.0,15.8,12.6,2.0,...,42.0,21.0,0,0,0,0.0,0,8,0,0.0%
1,Jaire Alexander,GNB,0,8,3,37.5%,37.0,12.3,4.6,0.0,...,31.0,6.0,0,0,0,0.0,0,6,0,0.0%
2,Adrian Amos,GNB,0,3,2,66.7%,19.0,9.5,6.3,0.0,...,10.0,9.0,0,0,0,0.0,0,9,0,0.0%
3,Krys Barnes,GNB,0,2,2,100.0%,14.0,7.0,7.0,0.0,...,5.0,9.0,0,0,0,0.0,0,6,0,0.0%
4,Keisean Nixon,GNB,0,2,2,100.0%,-1.0,-0.5,-0.5,0.0,...,-8.0,7.0,0,0,0,0.0,0,3,0,0.0%
5,Rudy Ford,GNB,0,3,2,66.7%,17.0,8.5,5.7,0.0,...,11.0,6.0,0,0,0,0.0,0,9,0,0.0%
6,Quay Walker,GNB,0,1,1,100.0%,4.0,4.0,4.0,0.0,...,3.0,1.0,2,0,1,0.0,1,11,0,0.0%
7,Justin Hollins,GNB,0,0,0,,,,,,...,,,2,1,0,1.0,2,2,0,0.0%
8,Jarran Reed,GNB,0,0,0,,,,,,...,,,0,1,0,0.5,2,2,0,0.0%
9,Kingsley Enagbare,GNB,0,0,0,,,,,,...,,,0,1,0,0.0,1,7,0,0.0%


In [122]:
df15_home_starters = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("CB"))[0], 'html.parser').find_all('table')[0]))[0]
df15_home_starters

Unnamed: 0,Player,Pos
0,Jalen Hurts,QB
1,Boston Scott,RB
2,A.J. Brown,WR
3,Quez Watkins,WR
4,DeVonta Smith,WR
5,Jack Stoll,TE
6,Lane Johnson,T
7,Jordan Mailata,T
8,Landon Dickerson,G
9,Isaac Seumalo,G


In [124]:
df16_away_starters = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("CB"))[1], 'html.parser').find_all('table')[0]))[0]
df16_away_starters

Unnamed: 0,Player,Pos
0,Aaron Rodgers,QB
1,Aaron Jones,RB
2,Christian Watson,WR
3,Samori Toure,WR
4,Allen Lazard,WR
5,Marcedes Lewis,TE
6,Josh Myers,OL
7,Yosh Nijman,T
8,David Bakhtiari,T
9,Elgton Jenkins,G


In [133]:
df17_home_snaps = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Pct"))[0], 'html.parser').find_all('table')[0]))[0]
df17_home_snaps.columns = df17_home_snaps.columns.set_levels(['Def.', 'Off.', 'ST', 'Player', 'Pos'],level=0)
df17_home_snaps = filter_team_splits(df17_home_snaps)
df17_home_snaps

Unnamed: 0_level_0,Player,Pos,Off.,Off.,Def.,Def.,ST,ST
Unnamed: 0_level_1,Player,Pos,Num,Pct,Num,Pct,Num,Pct
0,Isaac Seumalo,G,80,100%,0,0%,7,23%
1,Jordan Mailata,T,80,100%,0,0%,7,23%
2,Jalen Hurts,QB,80,100%,0,0%,0,0%
3,Lane Johnson,T,80,100%,0,0%,0,0%
4,Jason Kelce,C,80,100%,0,0%,0,0%
5,A.J. Brown,WR,77,96%,0,0%,1,3%
6,Landon Dickerson,G,76,95%,0,0%,6,19%
7,DeVonta Smith,WR,74,93%,0,0%,1,3%
8,Quez Watkins,WR,66,83%,0,0%,1,3%
9,Jack Stoll,TE,64,80%,0,0%,11,35%


In [139]:
df18_away_snaps = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("Pct"))[1], 'html.parser').find_all('table')[0]))[0]
df18_away_snaps.columns = df18_away_snaps.columns.set_levels(['Def.', 'Off.', 'ST', 'Player', 'Pos'],level=0)
df18_away_snaps = filter_team_splits(df18_away_snaps)
df18_away_snaps

Unnamed: 0_level_0,Player,Pos,Off.,Off.,Def.,Def.,ST,ST
Unnamed: 0_level_1,Player,Pos,Num,Pct,Num,Pct,Num,Pct
0,Josh Myers,C,49,100%,0,0%,6,19%
1,Yosh Nijman,T,49,100%,0,0%,6,19%
2,Jon Runyan Jr.,G,49,100%,0,0%,6,19%
3,Elgton Jenkins,G,49,100%,0,0%,0,0%
4,David Bakhtiari,T,49,100%,0,0%,0,0%
5,Allen Lazard,WR,49,100%,0,0%,0,0%
6,Christian Watson,WR,45,92%,0,0%,0,0%
7,Aaron Rodgers,QB,39,80%,0,0%,0,0%
8,Aaron Jones,RB,35,71%,0,0%,0,0%
9,Robert Tonyan,TE,25,51%,0,0%,0,0%


In [142]:
df19_home_drives = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("LOS"))[0], 'html.parser').find_all('table')[0]))[0]
df19_home_drives

Unnamed: 0,#,Quarter,Time,LOS,Plays,Length,Net Yds,Result
0,1,1,15:00,PHI 25,10,3:41,75,Touchdown
1,2,1,10:23,GNB 29,3,1:21,29,Touchdown
2,3,1,6:40,PHI 28,4,1:40,9,Downs
3,4,1,3:09,PHI 25,7,3:15,75,Touchdown
4,5,2,11:12,PHI 22,6,2:34,54,Fumble
5,6,2,7:41,PHI 28,3,1:53,2,Punt
6,7,2,2:16,PHI 29,9,2:03,71,Touchdown
7,8,3,12:42,PHI 14,11,5:23,86,Touchdown
8,9,3,2:03,PHI 26,14,5:52,61,Field Goal
9,10,4,9:00,PHI 21,10,6:44,43,Field Goal


In [143]:
df20_away_drives = pd.read_html(str(bs4.BeautifulSoup(all_soup.find_all(string=re.compile("LOS"))[1], 'html.parser').find_all('table')[0]))[0]
df20_away_drives

Unnamed: 0,#,Quarter,Time,LOS,Plays,Length,Net Yds,Result
0,1,1,11:19,GNB 9,3,0:56,6,Interception
1,2,1,9:02,GNB 41,4,2:22,59,Touchdown
2,3,1,5:00,PHI 37,5,1:51,37,Touchdown
3,4,2,14:54,GNB 25,6,3:42,47,Interception
4,5,2,8:38,PHI 13,3,0:57,13,Touchdown
5,6,2,5:48,GNB 18,5,3:32,11,Punt
6,7,2,0:13,GNB 25,1,0:13,-1,End of Half
7,8,3,15:00,GNB 47,3,2:18,-2,Punt
8,9,3,7:19,GNB 25,10,5:16,64,Field Goal
9,10,4,11:11,GNB 25,4,2:11,75,Touchdown
