# NFL Player COVID Data
The below pulls injury report data from 2017 - 2020 from Pro Football Reference (https://www.pro-football-reference.com/). The final data will be used for a visualization of COVID-19 cases during the 2020 season. 

In [76]:
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup

In [6]:
# dictionary per data structure on pro football reference
teams = {'crd':'Arizona Cardinals',
         'atl':'Atlanta Falcons',
         'rav':'Baltimore Ravens',
         'buf':'Buffalo Bills',
         'car':'Carolina Panthers',
         'chi':'Chicago Bears',
         'cin':'Cincinnati Bengals',
         'cle':'Cleveland Browns',
         'dal':'Dallas Cowboys',
         'den':'Denver Broncos',
         'det':'Detroit Lions',
         'gnb':'Green Bay Packers',
         'htx':'Houston Texans',
         'clt':'Indianapolis Colts',
         'jax':'Jaxonville Jaguars',
         'kan':'Kansas City Chiefs',
         'rai':'Las Vegas Raiders',
         'sdg':'San Diego Chargers',
         'ram':'Los Angeles Rams',
         'mia':'Miami Dolphis',
         'min':'Minnesota Vikings',
         'nwe':'New England Patriots',
         'nor':'New Orleans Saints',
         'nyg':'New York Giants',
         'nyj':'New York Jets',
         'phi':'Philadelphia Eagles',
         'pit':'Pittsburgh Steelers',
         'sfo':'San Francisco 49ers',
         'sea':'Seattle Seahawks',
         'tam':'Tampa Bay Buccaneers',
         'oti':'Tennessee Titans',
         'was':'Washington Football Team'}
# seasons to pull data from
years = [2017, 2018, 2019, 2020]

In [50]:
final = pd.DataFrame()
for year in years: 
    for team in teams: 
        url = f'https://www.pro-football-reference.com/teams/{team}/{year}_injuries.htm'
        html=urlopen(url)
        page=BeautifulSoup(html)
        print(f'team: {team}, year: {year}')
        # team level
        org = teams.get(team)
        headers = page.findAll('table')[0].thead.tr.findAll('th')[1:]
        weeks = [head['data-stat'] for head in headers]
        dates = [head.text.split('vs. ')[0] for head in headers]
        oppsId = [head.text.split('vs. ')[1].lower() for head in headers]
        opps = [teams.get(head.text.split('vs. ')[1].lower()) for head in headers]
        players = [p.th.text for p in page.findAll('tbody')[0].findAll('tr')]
        for player in players: 
            # player level
            i = players.index(player)
            designations = [data.text for data in page.findAll('tbody')[0].findAll('tr')[i].findAll('td')]
            def inj(d): 
                if d.has_attr('data-tip'): 
                    return d.get('data-tip').split(':')[1]
                else:
                    return None
            injuries = [inj(data) for data in page.findAll('tbody')[0].findAll('tr')[i].findAll('td')]

            playerDf = pd.DataFrame({'team': org,
                                     'date': dates,
                                     'season': year,
                                     'week': weeks,
                                     'opponentId': oppsId, 
                                     'opponent': opps, 
                                     'player': player, 
                                     'designation': designations, 
                                     'injury': injuries})
            final = final.append(playerDf, ignore_index=True)
final.head()

team: crd, year: 2017
team: atl, year: 2017
team: rav, year: 2017
team: buf, year: 2017
team: car, year: 2017
team: chi, year: 2017
team: cin, year: 2017
team: cle, year: 2017
team: dal, year: 2017
team: den, year: 2017
team: det, year: 2017
team: gnb, year: 2017
team: htx, year: 2017
team: clt, year: 2017
team: jax, year: 2017
team: kan, year: 2017
team: rai, year: 2017
team: sdg, year: 2017
team: ram, year: 2017
team: mia, year: 2017
team: min, year: 2017
team: nwe, year: 2017
team: nor, year: 2017
team: nyg, year: 2017
team: nyj, year: 2017
team: phi, year: 2017
team: pit, year: 2017
team: sfo, year: 2017
team: sea, year: 2017
team: tam, year: 2017
team: oti, year: 2017
team: was, year: 2017
team: crd, year: 2018
team: atl, year: 2018
team: rav, year: 2018
team: buf, year: 2018
team: car, year: 2018
team: chi, year: 2018
team: cin, year: 2018
team: cle, year: 2018
team: dal, year: 2018
team: den, year: 2018
team: det, year: 2018
team: gnb, year: 2018
team: htx, year: 2018
team: clt,

Unnamed: 0,team,date,season,week,opponentId,opponent,player,designation,injury
0,Arizona Cardinals,09/10,2017,week_1,det,Detroit Lions,Antoine Bethea,,
1,Arizona Cardinals,09/17,2017,week_2,ind,,Antoine Bethea,,
2,Arizona Cardinals,09/25,2017,week_3,dal,Dallas Cowboys,Antoine Bethea,,
3,Arizona Cardinals,10/01,2017,week_4,sfo,San Francisco 49ers,Antoine Bethea,,
4,Arizona Cardinals,10/08,2017,week_5,phi,Philadelphia Eagles,Antoine Bethea,,


In [51]:
# storing final df as copy
output = final

# cleaning up team codes not covered by teams dict above
clean_dict = {
    'lar': 'Los Angeles Rams', 
    'lvr': 'Las Vegas Raiders', 
    'lac': 'Los Angeles Chargers', 
    'hou': 'Houston Texans', 
    'ind': 'Indianapolis Colts', 
    'ten': 'Tennessee Titans', 
    'ari': 'Arizona Cardinals', 
    'bal': 'Baltimore Ravens',
    'oak': 'Oakland Raiders'
}
output.loc[output['opponent'].isnull(), 'opponent'] = output['opponentId'].apply(lambda x: clean_dict.get(x))

In [52]:
# adding a year to date
output['year'] = output.apply(lambda x:x.season + 1 if int(x.date.split('/')[0]) < 8 else x.season, axis=1)
output['date'] = output['date'] + '/' + output['year'].astype(str)

In [71]:
# filtering out days players were not injured / out
output = output[~(output['designation']=='')]

# reorder columns
colOrder = ['season', 'week', 'date', 'team', 'opponent', 'player', 'designation', 'injury']
output = output[colOrder]

# cleaning up irrelevant designations
designations = ['Q', 'IR', 'O', 'D', 'PUP', 'C19']
output = output[output['designation'].isin(designations)]

In [72]:
output.head()

Unnamed: 0,season,week,date,team,opponent,player,designation,injury
14,2017,week_16,12/24/2017,Arizona Cardinals,New York Giants,Antoine Bethea,Q,Knee
15,2017,week_17,12/31/2017,Arizona Cardinals,Seattle Seahawks,Antoine Bethea,IR,Pectoral
19,2017,week_4,10/01/2017,Arizona Cardinals,San Francisco 49ers,Alex Boone,O,Chest
20,2017,week_5,10/08/2017,Arizona Cardinals,Philadelphia Eagles,Alex Boone,Q,Chest
41,2017,week_11,11/19/2017,Arizona Cardinals,Houston Texans,Tyvon Branch,IR,Knee


In [74]:
output.to_csv('nfl_injury_reports_2017_2020_rev2.csv')

In [75]:
output.designation.unique()

array(['Q', 'IR', 'O', 'D', 'PUP', 'C19'], dtype=object)

# An Older / Simpler Version
The below script is an older and simpler script that only pulls 

In [4]:
import pandas as pd

In [27]:
# dictionary per data structure on pro football reference
teams = {'crd':'Arizona Cardinals',
         'atl':'Atlanta Falcons',
         'rav':'Baltimore Ravens',
         'buf':'Buffalo Bills',
         'car':'Carolina Panthers',
         'chi':'Chicago Bears',
         'cin':'Cincinnati Bengals',
         'cle':'Cleveland Browns',
         'dal':'Dallas Cowboys',
         'den':'Denver Broncos',
         'det':'Detroit Lions',
         'gnb':'Green Bay Packers',
         'htx':'Houston Texans',
         'clt':'Indianapolis Colts',
         'jax':'Jaxonville Jaguars',
         'kan':'Kansas City Chiefs',
         'rai':'Las Vegas Raiders',
         'sdg':'San Diego Chargers',
         'ram':'Los Angeles Rams',
         'mia':'Miami Dolphis',
         'min':'Minnesota Vikings',
         'nwe':'New England Patriots',
         'nor':'New Orleans Saints',
         'nyg':'New York Giants',
         'nyj':'New York Jets',
         'phi':'Philadelphia Eagles',
         'pit':'Pittsburgh Steelers',
         'sfo':'San Francisco 49ers',
         'sea':'Seattle Seahawks',
         'tam':'Tampa Bay Buccaneers',
         'oti':'Tennessee Titans',
         'was':'Washington Football Team'}
# seasons to pull data from
years = [2017, 2018, 2019, 2020]

final = pd.DataFrame()
for year in years: 
    for team in teams: 
        url = f'https://www.pro-football-reference.com/teams/{team}/{year}_injuries.htm'
        df = pd.read_html(url)[0]
        # unpivoting table to put date and opponent in rows
        df_melt = pd.melt(df, id_vars=['Player'], value_vars=df.columns[1:]).rename(columns={'value': 'Status'})
        # splitting date and opponent
        df_melt['Opponent'] = df_melt['variable'].str.split(pat='vs. ', expand=True)[1]
        df_melt['Opponent Name'] = df_melt['Opponent'].str.lower().apply(lambda x: teams.get(x))
        df_melt['Date'] = df_melt['variable'].str.split(pat='vs. ', expand=True)[0]
        # adding season 
        df_melt['Season'] = year
        # adding a year to the date (incrementing +1 year for any games in Jan / Feb) 
        df_melt['Year'] = df_melt['Date'].apply(lambda x: year+1 if int(x.split('/')[0]) <= 8 else year)
        df_melt['Date'] = df_melt['Date'] + '/' + df_melt['Year'].astype(str)
        # adding team column
        df_melt['Team Name'] = teams.get(team)
        # adding column for year
        final = final.append(df_melt[['Team Name','Date','Year','Season','Opponent','Opponent Name','Player','Status']], 
                             ignore_index=True)

In [28]:
# cleaning up team codes not covered by teams dict above
clean_dict = {
    'LAR': 'Los Angeles Rams', 
    'LVR': 'Las Vegas Raiders', 
    'LAC': 'Los Angeles Chargers', 
    'HOU': 'Houston Texans', 
    'IND': 'Indianapolis Colts', 
    'TEN': 'Tennessee Titans', 
    'ARI': 'Arizona Cardinals', 
    'BAL': 'Baltimore Ravens',
    'OAK': 'Oakland Raiders'
}
final.loc[final['Opponent Name'].isnull(), 'Opponent Name'] = final['Opponent'].apply(lambda x: clean_dict.get(x))
final = final[~final['Status'].isna()].reset_index(drop=True)

In [29]:
final.head()

Unnamed: 0,Team Name,Date,Year,Season,Opponent,Opponent Name,Player,Status
0,Arizona Cardinals,09/10/2017,2017,2017,DET,Detroit Lions,Deone Bucannon,O
1,Arizona Cardinals,09/10/2017,2017,2017,DET,Detroit Lions,Aaron Dobson,IR
2,Arizona Cardinals,09/10/2017,2017,2017,DET,Detroit Lions,Alani Fua,IR
3,Arizona Cardinals,09/10/2017,2017,2017,DET,Detroit Lions,Mike Iupati,Q
4,Arizona Cardinals,09/10/2017,2017,2017,DET,Detroit Lions,Jarvis Jones,IR


In [30]:
final.to_csv('nfl_injury_reports_2017_2020.csv')