# NFL Player COVID Data
The below pulls injury report data from 2017 - 2020 from Pro Football Reference (https://www.pro-football-reference.com/). The final data will be used for a visualization of COVID-19 cases during the 2020 season. 

In [143]:
import pandas as pd

In [137]:
# dictionary per data structure on pro football reference
teams = {'crd':'Arizona Cardinals',
         'atl':'Atlanta Falcons',
         'rav':'Baltimore Ravens',
         'buf':'Buffalo Bills',
         'car':'Carolina Panthers',
         'chi':'Chicago Bears',
         'cin':'Cincinnati Bengals',
         'cle':'Cleveland Browns',
         'dal':'Dallas Cowboys',
         'den':'Denver Broncos',
         'det':'Detroit Lions',
         'gnb':'Green Bay Packers',
         'htx':'Houston Texans',
         'clt':'Indianapolis Colts',
         'jax':'Jaxonville Jaguars',
         'kan':'Kansas City Chiefs',
         'rai':'Las Vegas Raiders',
         'sdg':'San Diego Chargers',
         'ram':'Los Angeles Rams',
         'mia':'Miami Dolphis',
         'min':'Minnesota Vikings',
         'nwe':'New England Patriots',
         'nor':'New Orleans Saints',
         'nyg':'New York Giants',
         'nyj':'New York Jets',
         'phi':'Philadelphia Eagles',
         'pit':'Pittsburgh Steelers',
         'sfo':'San Francisco 49ers',
         'sea':'Seattle Seahawks',
         'tam':'Tampa Bay Buccaneers',
         'oti':'Tennessee Titans',
         'was':'Washington Football Team'}
# seasons to pull data from
years = [2017, 2018, 2019, 2020]

final = pd.DataFrame()
for year in years: 
    for team in teams: 
        url = f'https://www.pro-football-reference.com/teams/{team}/{year}_injuries.htm'
        df = pd.read_html(url)[0]
        # unpivoting table to put date and opponent in rows
        df_melt = pd.melt(df, id_vars=['Player'], value_vars=df.columns[1:]).rename(columns={'value': 'Status'})
        # splitting date and opponent
        df_melt['Opponent'] = df_melt['variable'].str.split(pat='vs. ', expand=True)[1]
        df_melt['Opponent Name'] = df_melt['Opponent'].str.lower().apply(lambda x: teams.get(x))
        df_melt['Date'] = df_melt['variable'].str.split(pat='vs. ', expand=True)[0]
        # adding a year to the date (incrementing +1 year for any games in Jan / Feb) 
        df_melt['Year'] = df_melt['Date'].apply(lambda x: year+1 if int(x.split('/')[0]) <= 8 else year)
        df_melt['Date'] = df_melt['Date'] + '/' + df_melt['Year'].astype(str)
        # adding team column
        df_melt['Team Name'] = teams.get(team)
        # adding column for year
        final = final.append(df_melt[['Team Name','Date','Year','Opponent','Opponent Name','Player','Status']], 
                             ignore_index=True)

# cleaning up team codes not covered by teams dict above
clean_dict = {
    'LAR': 'Los Angeles Rams', 
    'LVR': 'Las Vegas Raiders', 
    'LAC': 'Los Angeles Chargers', 
    'HOU': 'Houston Texans', 
    'IND': 'Indianapolis Colts', 
    'TEN': 'Tennessee Titans', 
    'ARI': 'Arizona Cardinals', 
    'BAL': 'Baltimore Ravens',
    'OAK': 'Oakland Raiders'
}
final.loc[final['Opponent Name'].isnull(), 'Opponent Name'] = final['Opponent'].apply(lambda x: clean_dict.get(x))

In [138]:
final.head()

Unnamed: 0,Team Name,Date,Year,Opponent,Opponent Name,Player,Status
0,Arizona Cardinals,09/10/2017,2017,DET,Detroit Lions,Antoine Bethea,
1,Arizona Cardinals,09/10/2017,2017,DET,Detroit Lions,Alex Boone,
2,Arizona Cardinals,09/10/2017,2017,DET,Detroit Lions,Tyvon Branch,
3,Arizona Cardinals,09/10/2017,2017,DET,Detroit Lions,Aaron Brewer,
4,Arizona Cardinals,09/10/2017,2017,DET,Detroit Lions,John Brown,


In [141]:
final.to_csv('nfl_injury_reports_2017_2020.csv')