In [None]:
import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup
import requests

In [None]:
def webscrap_nfl_injury_reports(Start_Year,End_Year):
    #list of the NFL teams for url purposes
    teams = ['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal', 'den', 'det', 'gnb','htx','clt','jax','kan',
             'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']
    years = []
    
    #list of years to pull data for url purposes
    for yr in range(Start_Year,End_Year+1):
        years.append(yr)

    #starting points to iterate through
    team = 0
    year = 0
    dfname = []
    
    while team < 32:
        
        dfname.append(f'{teams[team]}_{years[year]}_injuryreport')
        
        #url for web scraping
        url = f'https://www.pro-football-reference.com/teams/{teams[team]}/{years[year]}_injuries.htm'
        
        #opening website
        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'lxml')
        
        #finding table
        table = soup.find('table', attrs={'class': 'sortable', 'id': 'team_injuries'})
        table_rows = table.find_all('tr')

        #scraping the data
        final_data = []
        for tr in table_rows:
            td = tr.find_all(['th','td'])
            row = [tr['data-tip'] if tr.has_attr("data-tip") else tr.text for tr in td]
            final_data.append(row)

        #creatingdataframe to save
        dfdata = final_data[1:]
        data_body = [[dfdata[j][i] for j in range(len(dfdata))] for i in range(len(dfdata[0]))]
        data = {key: pd.DataFrame(data_body,final_data[0]).T for key in dfname}
        
        key = f'{teams[team]}_{years[year]}_injuryreport'
        
        data[key].insert(loc=1,column='Team',value=teams[team],allow_duplicates=True)
        data[key].insert(loc=2,column='Year',value=years[year],allow_duplicates=True)
        data[key].to_csv(f'{key}.csv',index=True)

        # update of location of web scrape
        print(teams[team],years[year])

        #advancing through url
        if year < len(years)-1:
            year += 1

        else:
            year = 0
            team += 1
    print('done')

def clean_nfl_injury_reports(Start_Year,End_Year):
    dfname = []
    teams = ['crd','atl','rav','buf','car','chi','cin','cle','dal','den','det','gnb','htx','clt','jax','kan',
                     'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']

    for team in teams:
        for year in range(Start_Year,End_Year+1):
            dfname.append(f'{team}_{year}_injuryreport')
            data = {key: pd.read_csv(f'{key}.csv') for key in dfname}

    for key in data:
        data[key].drop('Unnamed: 0',axis=1,inplace=True)
        data[key] = pd.melt(data[key],id_vars=['Player','Team','Year'],var_name='Date', value_name='Status')
        data[key][['Date','Opp']] = data[key].Date.str.split("vs. ",expand=True)
        data[key][['Status','Injury']] = data[key].Status.str.split(":",expand=True)
        data[key]['Date'] = data[key]['Date'].astype(str)+'/'+data[key]['Year'].astype(str)
        data[key]['Date'] = pd.to_datetime(data[key]['Date'])
        data[key].replace({'Team':\
                           {'crd':'ARI', 'atl':'ATL', 'rav':'BAL', 'buf':'BUF', 'car':'CAR', 'chi':'CHI', 'cin':'CIN',\
                            'cle':'CLE', 'dal':'DAL', 'den':'DEN', 'det':'DET', 'gnb':'GNB','htx':'HOU','clt':'IND',\
                            'jax':'JAX','kan':'KAN','sdg':'LAC','ram':'LAR','mia':'MIA','min':'MIN','nor':'NOR','nwe':'NWE',\
                            'nyg':'NYG','nyj':'NYJ','rai':'OAK','phi':'PHI','pit':'PIT','sea':'SEA','sfo':'SFO','tam':'TAM',\
                            'oti':'TEN','was':'WAS'}},inplace=True)
        data[key].dropna(thresh=3,inplace=True)

    nfl_injury = pd.concat(data.values(),ignore_index=True)
    nfl_injury.to_csv(f'NFL_{Start_Year}_{End_Year}_Injuryreport.csv',index=False)
    print('done')

In [None]:
#webscrap_nfl_injury_reports(2017,2019)

In [None]:
clean_nfl_injury_reports(2017,2019)