In [1]:
import dash, requests, os
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup
import numpy as np
from dotenv import load_dotenv
load_dotenv()
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)




def team_data_scrape(Start_Year,End_Year,url):
    page = 0
    stat_login_url = "https://stathead.com/users/login.cgi"
    stat_user_name = os.environ.get('statheadusername')
    stat_password = os.environ.get('statheadpassword')
    stat_payload = {
        'username': stat_user_name,
        'password': stat_password
    }
    
    if url == 1:
        stat_url = 'https://stathead.com/football/tgl_finder.cgi?request=1&temperature_gtlt=lt&game_num_max=99&week_num_max=99&order_by=points&match=game&year_max={End_Year}&order_by_asc=0&week_num_min=0&game_type=E&game_num_min=0&year_min={Start_Year}&cstat[1]=all_td_team&ccomp[1]=gt&cval[1]=0&cstat[2]=third_down_att&ccomp[2]=gt&cval[2]=0&cstat[3]=vegas_line&ccomp[3]=gt&cval[3]=-50&cstat[4]=penalties&ccomp[4]=gt&cval[4]=0&cstat[5]=rush_att&ccomp[5]=gt&cval[5]=0&cstat[6]=tot_yds&ccomp[6]=gt&cval[6]=0&cstat[7]=first_down&ccomp[7]=gt&cval[7]=0&cstat[8]=punt&ccomp[8]=gt&cval[8]=0&cstat[9]=pass_cmp&ccomp[9]=gt&cval[9]=0&offset={page}'
    elif url == 2:
        stat_url = 'https://stathead.com/football/tgl_finder.cgi?request=1&temperature_gtlt=lt&game_num_max=99&week_num_max=99&order_by=all_td_opp&match=game&year_max={End_Year}&order_by_asc=0&week_num_min=0&game_type=R&game_num_min=0&year_min={Start_Year}&cstat[1]=tot_yds_opp&ccomp[1]=gt&cval[1]=0&cstat[2]=rush_yds_diff&ccomp[2]=gt&cval[2]=-500&cstat[3]=score_diff_thru_1&ccomp[3]=gt&cval[3]=-500&cstat[4]=rush_att_opp&ccomp[4]=gt&cval[4]=0&cstat[5]=kick_ret_td_tgl&ccomp[5]=gt&cval[5]=0&cstat[6]=pass_cmp_opp&ccomp[6]=gt&cval[6]=0&cstat[7]=first_down_opp&ccomp[7]=gt&cval[7]=0&cstat[8]=score_diff_1_qtr&ccomp[8]=gt&cval[8]=-500&cstat[9]=third_down_att_opp&ccomp[9]=gt&cval[9]=0&offset={page}'
    elif url != 1 or 2:
        print("Please select 1 or 2.")
        
    with requests.Session() as session:
        
        s = session.post(stat_login_url, data=stat_payload)
        
        while page < 100000:
            
            website = session.get(stat_url.format(Start_Year=Start_Year,End_Year=End_Year,page=page)).text
            soup = BeautifulSoup(website, 'html')
            table = soup.find('table', attrs={'class': 'sortable', 'id': 'results'})

            table_headers = [header.text for header in table.find('thead').find_all('th')]
            table_rows = table.find_all('tr')

            final_data = []
            
            for tr in table_rows:
                td = tr.find_all('td')
                row = [tr.text for tr in td]
                final_data.append(row)
                
            df = pd.DataFrame(final_data[1:], columns=table_headers[12:])
            
            print(page)
            
            if url == 1:
                df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nflteam_data_1_{Start_Year}_{End_Year}.csv',mode='a',index=False)
            else:
                df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nflteam_data_2_{Start_Year}_{End_Year}.csv',mode='a',index=False)

            page += 100
                
        print('Done')
        print(page)

        
        
        
def player_snap_scrape(Start_Year,End_Year):
    
    sides = ['','defense.php']
    
    ENDPOINT = "https://www.fantasypros.com/nfl/reports/snap-counts/{side}?year={year}"

    final_df = pd.DataFrame()

    for year in range(Start_Year, End_Year+1):
        
        for side in sides:
        
            res = requests.get(ENDPOINT.format(year=year,side=side))

            soup = BeautifulSoup(res.content, 'html.parser')

            table = soup.find('table', {'id': 'data'})

            df = pd.read_html(str(table))[0]

            df.columns = df.columns[:3].tolist() + [f'Week {i}' for i in df.columns[3:-2]] + df.columns[-2:].tolist()

            df['Year'] = year

            cols = df.columns[:3].tolist() + df.columns[-1:].tolist() + df.columns[3:-1].tolist()
            df = df[cols]

            final_df = pd.concat([final_df, df])
            print(year,side)

    final_df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\snapcounts_{Start_Year}_{End_Year}.csv',index=False)
    print('Done')

    
    
    
def injury_reports_scrape(Start_Year,End_Year):
    
    teams = ['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal', 'den', 'det', 'gnb','htx','clt','jax','kan',
             'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']

    ENDPOINT = 'https://www.pro-football-reference.com/teams/{team}/{year}_injuries.htm'

    final_df = pd.DataFrame()

    for year in range(Start_Year, End_Year+1):

        for team in teams:

            res = requests.get(ENDPOINT.format(year=year,team=team))

            soup = BeautifulSoup(res.content, 'lxml')

            table = soup.find('table', attrs={'class': 'sortable', 'id': 'team_injuries'})
            table_rows = table.find_all('tr')

            final_data = []
            for tr in table_rows:
                td = tr.find_all(['th','td'])
                row = [tr['data-tip'] if tr.has_attr("data-tip") else tr.text for tr in td]
                final_data.append(row)

            dfdata = final_data[1:]
            data_body = [[dfdata[j][i] for j in range(len(dfdata))] for i in range(len(dfdata[0]))]

            df = pd.DataFrame(data_body,final_data[0]).T

            df.insert(loc=1,column='Team',value=team)
            df.insert(loc=2,column='Year',value=year)

            final_df = pd.concat([final_df, df])
            final_df.rename(columns={'PlayerÂ ':'Player'},inplace=True)
            print(team,year)

    final_df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nlf_injuryreport_{Start_Year}_{End_Year}.csv',index=False)
    print('Done')    

    
    
    
def player_stats_scape(Start_Year,End_Year):
    page = 0
    location = 5000
    stat_login_url = "https://stathead.com/users/login.cgi"
    stat_user_name = os.environ.get('statheadusername')
    stat_password = os.environ.get('statheadpassword')
    stat_payload = {
        'username': stat_user_name,
        'password': stat_password
    }
    stat_url = "https://stathead.com/football/pgl_finder.cgi?request=1&game_num_max=99&week_num_max=99&order_by=all_td&season_start=1&qb_gwd=0&order_by_asc=0&qb_comeback=0&week_num_min=0&game_num_min=0&year_min={Start_Year}&match=game&year_max={End_Year}&season_end=-1&age_min=0&game_type=R&age_max=99&positions[]=qb&positions[]=rb&positions[]=wr&positions[]=te&positions[]=e&positions[]=t&positions[]=g&positions[]=c&positions[]=ol&positions[]=dt&positions[]=de&positions[]=dl&positions[]=ilb&positions[]=olb&positions[]=lb&positions[]=cb&positions[]=s&positions[]=db&positions[]=k&positions[]=p&cstat[1]=punt_ret&ccomp[1]=gt&cval[1]=0&cstat[2]=sacks&ccomp[2]=gt&cval[2]=0&cstat[3]=fumbles&ccomp[3]=gt&cval[3]=0&cstat[4]=rush_att&ccomp[4]=gt&cval[4]=0&cstat[5]=pass_defended&ccomp[5]=gt&cval[5]=0&cstat[6]=pass_cmp&ccomp[6]=gt&cval[6]=0&cstat[7]=targets&ccomp[7]=gt&cval[7]=0&cstat[8]=kick_ret&ccomp[8]=gt&cval[8]=0&offset={page}"
    
    with requests.Session() as session:
        
        s = session.post(stat_login_url, data=stat_payload)
        
        while page < 100000:
            
            website = session.get(stat_url.format(Start_Year=Start_Year,End_Year=End_Year,page=page)).text
            soup = BeautifulSoup(website, 'html')
            table = soup.find('table', attrs={'class': 'sortable', 'id': 'results'})

            table_headers = [header.text for header in table.find('thead').find_all('th')]
            table_rows = table.find_all('tr')

            final_data = []
            
            for tr in table_rows:
                td = tr.find_all('td')
                row = [tr.text for tr in td]
                final_data.append(row)
                
            df = pd.DataFrame(final_data[1:], columns=table_headers[11:])
            df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\player_stats_{Start_Year}_{End_Year}.csv',mode='a',index=False)

            if page > location:
                print(page)
                location += 5000
                
            page += 100
            
        print('Done')
        print(page)

        
        
        
def data_clean(Start_Year,End_Year):
    
    snaps = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\snapcounts_{Start_Year}_{End_Year}.csv')
    snaps.drop(['Unnamed: 0','TTL','AVG'],axis=1,inplace=True)
    snaps = pd.melt(snaps, id_vars=['Player','Pos','Team','Year'], var_name='Week', value_name='Snaps')
    snaps['Year'] = snaps['Year'].astype(str)
    snaps['Player'] = snaps['Player'].astype(str)
    snaps['Week'] = snaps['Week'].astype(str)
    snaps.replace({'Team':{'FA':'','GB':'GNB','JAC':'JAX','KC':'KAN','Multi':'','NE':'NWE','NO':'NOR','SF':'SFO','TB':'TAM'},\
                    'Week':{'Week 1':'1','Week 2':'2','Week 3':'3','Week 4':'4','Week 5':'5','Week 6':'6',\
                            'Week 7':'7','Week 8':'8','Week 9':'9','Week 10':'10','Week 11':'11','Week 12':'12',\
                            'Week 13':'13','Week 14':'14','Week 15':'15','Week 16':'16','Week 17':'17'}},inplace=True)
    snaps['Player'] = snaps['Player']+' '
    snaps.replace({'Player':{' Jr. ':'',' Jr ':'',' Sr. ':'',' Sr ':'',' III ':'',' II ':'',' IV ':'',' V ':''}},regex=True,inplace=True)
    snaps['Player'] = snaps['Player'].str.strip(' ')
    
    
    stats = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\player_stats_{Start_Year}_{End_Year}.csv')
    stats.dropna(how='all',inplace=True)
    stats.set_index('Player',inplace=True)
    stats.drop('Player',inplace=True)
    stats.reset_index(inplace=True)
    stats.drop('Lg',axis=1,inplace=True)
    stats.rename(columns={'Tm':'Team','Unnamed: 6':'Away_Home','Cmp':'IPass_Cmp','Att':'IPass_Att','Cmp%':'IPass_Cmp%','Yds':'IPass_Yds',\
                 'TD':'IPass_TD','Int':'IPass_Int','Rate':'IQB_Rate','Sk':'I_Sk','Yds.1':'ISk_Yds','Y/A':'IPass_Y/A',\
                 'AY/A':'IPass_AdjY/A','Att.1':'IRush_Att','Yds.2':'IRush_Yds','Y/A.1':'IRush_Y/A','TD.1':'IRush_TD',\
                 'Tgt':'IRec_Tgt','Rec':'IRec_Rec','Yds.3':'IRec_Yds','Y/R':'IRec_Y/R','TD.2':'IRec_TD','Ctch%':'IRec_Ctch%',\
                 'Y/Tgt':'IRec_Y/Tgt','XPM':'IXP_Made','XPA':'IXP_Att','XP%':'IXP%','FGM':'IFG_Made','FGA':'IFG_Att',\
                 'FG%':'IFG%','2PM':'I2pt_Made','Sfty':'ISfty','TD.3':'ITot_TD','Pts':'ITot_Pts','Rt':'IKR_Rt','Yds.4':'IKR_Yds',\
                 'Y/Rt':'IKR_Y/Rt','TD.4':'IKR_TD','Ret':'IPR_Rt','Yds.5':'IPR_Yds','Y/R.1':'IPR_Y/Rt','TD.5':'IPR_TD',\
                 'Sk.1':'ITack_Sk','Solo':'ITack_Solo','Ast':'ITack_Ast','Comb':'ITack_Tot','TFL':'ITack_TFL',\
                 'QBHits':'ITack_QBHits','Int.1':'IDef_Int','Yds.6':'IDef_IntYds','TD.6':'IDef_IntTD','PD':'IDef_PD',\
                 'Fmb':'IFmb_Fmb','FL':'IFmb_Lost','FF':'IFmb_Forced','FR':'IFmb_Recov','Yds.7':'IFmb_Yds','TD.7':'IFmb_TD'},\
                 inplace=True)

    stats_cols = []
    for col in stats.columns:
        stats_cols.append(col)

    stats.replace({'Away_Home':{'@':'Away',None:'Home'}},inplace=True)
    stats[['IPass_Cmp','IPass_Att','IRec_Rec','IRec_Tgt','IXP_Made','IXP_Att','IFG_Made','IFG_Att']] = stats[['IPass_Cmp','IPass_Att','IRec_Rec','IRec_Tgt','IXP_Made','IXP_Att','IFG_Made','IFG_Att']].astype(float)
    stats['IPass_Cmp%'] = stats['IPass_Cmp']/stats['IPass_Att']
    stats['IRec_Ctch%'] = stats['IRec_Rec']/stats['IRec_Tgt']
    stats['IXP%'] = stats['IXP_Made']/stats['IXP_Att']
    stats['IFG%'] = stats['IFG_Made']/stats['IFG_Att']
    stats[stats_cols[11:]] = stats[stats_cols[11:]].astype(float)
    stats[stats_cols[11:]] = stats[stats_cols[11:]].fillna(value=0)
    stats['Date'] = pd.to_datetime(stats['Date'],errors='coerce',format='%Y-%m-%d')
    stats.insert(loc=8,column='Year',value=stats['Date'].dt.year)
    stats['Player'] = stats['Player'].astype(str)
    stats['Week'] = stats['Week'].astype(str)
    stats['Year'] = stats['Year'].astype(str)
    stats['Player'] = stats['Player']+' '
    stats.replace({'Player':{' Jr. ':'',' Jr ':'',' Sr. ':'',' Sr ':'',' III ':'',' II ':'',' IV ':'',' V ':''}},regex=True,inplace=True)
    stats['Player'] = stats['Player'].str.strip(' ')
    
    
    
    team1 = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nflteam_data_1_{Start_Year}_{End_Year}.csv')
    team2 = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nflteam_data_2_{Start_Year}_{End_Year}.csv')
    team1.dropna(thresh=10,inplace=True)
    team2.dropna(thresh=10,inplace=True)
    team1.drop('LTime',axis=1,inplace=True)
    team2.drop(['LTime'],axis=1,inplace=True)
    test1 = team1.columns
    test2 = team2.columns
    team1.rename(columns={'Tm':'Team','Unnamed: 5':'Away_Home','PF':'Points_For','PA':'Points_Against','PC':'Points_Comb',\
                         'vs. Line':'Vs_Line','Cmp':'TPass_Cmp','Att':'TPass_Att','Cmp%':'TPass_Cmp%','Yds':'TPass_Yds',\
                          'TD':'TPass_TD','Int':'TPass_Int','Sk':'TSack','Yds.1':'TSack_Yds','Rate':'TQB_Rate',\
                          'Att.1':'TRush_Att','Yds.2':'TRush_Yds','Y/A':'TRush_Y/A','TD.1':'TRush_TD','Tot':'TTot_Yds',\
                          'Ply':'TO_Play#','Y/P':'TO_Y/P','DPly':'TD_Play#','DY/P':'TD_Y/P','TO':'TTot_TO','ToP':'TO_ToP',\
                          'Time.1':'TGame_Dur','Yds.3':'TPen_Yds','OppPen':'TOpp_Pen','OppYds':'TOpp_Pen_Yds',\
                          'CombPen':'TComb_Pen','CombPenYds':'TComb_Pen_Yds','1stD':'T1st_Downs','Rsh':'T1st_by_Rsh',\
                          'Pass':'T1st_by_Pass','Pen.1':'T1st_by_Pen','3DAtt':'T3rd_Down_Att','3DConv':'T3rd_Down_Conv',\
                          '3D%':'T3rd_Down%','4DAtt':'T4th_Down_Att','4DConv':'T4th_Down_Conv','4D%':'T4th_Down%',\
                          'TD.2':'TTot_TD','XPA':'TXP_Att','XPM':'TXP_Made','FGA':'TFG_Att','FGM':'TFG_Made','2PA':'T2Pt_Att',\
                          '2PM':'T2Pt_Made','Sfty':'TSfty','Pnt':'TTimes_Punted','Yds.4':'TPunt_Yds','Y/P.1':'TPunt_Yds_Avg'},inplace=True)
    team2.rename(columns={'Tm':'Team','Unnamed: 5':'Away_Home','TD':'TOpp_Tot_TD','XPA':'TOpp_XP_Att','XPM':'TOpp_XP_Made',\
                          'Att':'TOpp_FG_Att','Md':'TOpp_FG_Made','Sfty':'TOpp_Sfty','Cmp':'TOpp_Pass_Cmp','Att.1':'TOpp_Pass_Att',\
                          'Cmp%':'TOpp_Pass_Cmp%','Yds':'TOpp_Pass_Yds','TD.1':'TOpp_Pass_TD','Int':'TOpp_Pass_Int','Sk':'TOpp_Sk',\
                          'Yds.1':'TOpp_Sk_Yds','Rate':'TOpp_QB_Rate','Att.2':'TOpp_Rush_Att','Yds.2':'TOpp_Rush_Yds',\
                          'Y/A':'TOpp_Rush_Y/A','TD.2':'TOpp_Rush_TD','Tot':'TOpp_Tot_Yds','TO':'TOpp_Tot_TO',\
                          '1stDOpp':'TOpp_1st_Downs','Rush':'TOpp_1st_by_Rsh','Pass':'TOpp_1st_by_Pass','Pen':'TOpp_1st_by_Pen',\
                          'Opp3DAtt':'TOpp_3rd_Down_Att','Opp3DConv':'TOpp_3rd_Down_Conv','Opp3D%':'TOpp_3rd_Down%',\
                          'Opp4DAtt':'TOpp_4th_Down_Att','Opp4DConv':'TOpp_4th_Down_Conv','Opp4D%':'TOpp_4th_Down%',\
                          'Rush.1':'TMargin_Rush','Pass.1':'TMargin_Pass','Tot.1':'TMargin_TotYds','TO.1':'TTO_TD',\
                          'KR':'TKR_TD','PR':'TPR_TD','IR':'TInt_TD','FR':'TFmb_TD','OR':'TOtherRet_TD',\
                          'RetTD':'TAll_Ret_TD','Q1':'TMar_Thru_Q1','Q2':'TMar_Thru_Q2','Q3':'TMar_Thru_Q3',\
                          'Q1.1':'TScore_Diff_Q1','Q2.1':'TScore_Diff_Q2','Q3.1':'TScore_Diff_Q3',\
                          'Q4':'TScore_Diff_Q4','1stHalf':'TScore_Diff_1stHalf','2ndHalf':'TScore_Diff_2ndHalf'},inplace=True)

    team = pd.merge(left=team1,right=team2,\
                     how='outer',\
                     on=['Team','Year','Date','Time','Away_Home','Opp','Week','G#','Day','Result','OT'])

    team.set_index('Team',inplace=True)
    team.drop('Tm',inplace=True)
    team.reset_index(inplace=True)

    team_cols = []

    for col in team.columns:
        team_cols.append(col)

    team.replace({'Away_Home':{'@':'Away',None:'Home'}},inplace=True)
    team[team_cols[11:]] = team[team_cols[11:]].fillna(value=0)
    team[['TPass_Cmp','TPass_Att','T3rd_Down_Att','T3rd_Down_Conv','T4th_Down_Att',\
          'T4th_Down_Conv','TOpp_Pass_Cmp','TOpp_Pass_Att','TOpp_3rd_Down_Att',\
          'TOpp_3rd_Down_Conv','TOpp_4th_Down_Att','TOpp_4th_Down_Conv']] = team[['TPass_Cmp','TPass_Att','T3rd_Down_Att',\
                                                                                  'T3rd_Down_Conv','T4th_Down_Att',\
                                                                                  'T4th_Down_Conv','TOpp_Pass_Cmp',\
                                                                                  'TOpp_Pass_Att','TOpp_3rd_Down_Att',\
                                                                                  'TOpp_3rd_Down_Conv','TOpp_4th_Down_Att',\
                                                                                  'TOpp_4th_Down_Conv']].astype(float)
    team['TPass_Cmp%'] = team['TPass_Cmp']/team['TPass_Att']
    team['T3rd_Down%'] = team['T3rd_Down_Conv']/team['T3rd_Down_Att']
    team['T4th_Down%'] = team['T4th_Down_Conv']/team['T4th_Down_Att']
    team['TOpp_Pass_Cmp%'] = team['TOpp_Pass_Cmp']/team['TOpp_Pass_Att']
    team['TOpp_3rd_Down%'] = team['TOpp_3rd_Down_Conv']/team['TOpp_3rd_Down_Att']
    team['TOpp_4th_Down%'] = team['TOpp_4th_Down_Conv']/team['TOpp_4th_Down_Att']
    team['Date'] = pd.to_datetime(team['Date'],errors='coerce',format='%Y-%m-%d')
    team['TGame_Dur'] = team['TGame_Dur']+':00'
    team['TO_ToP'] = '00:'+team['TO_ToP']
    team['TGame_Dur'] = pd.to_timedelta(team['TGame_Dur'],errors='coerce')
    team['TGame_Dur'] = team['TGame_Dur'].dt.total_seconds()
    team['TO_ToP'] = pd.to_timedelta(team['TO_ToP'],errors='coerce')
    team['TO_ToP'] = team['TO_ToP'].dt.total_seconds()
    team[team_cols[11:16]] = team[team_cols[11:16]].astype(float)
    team[team_cols[17]] = team[team_cols[17]].astype(float)
    team[team_cols[19:]] = team[team_cols[19:]].astype(float)
    team.insert(loc=9,column='Month',value=team['Date'].dt.month)
    
    
    
    injury = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nfl_injuryreport_{Start_Year}_{End_Year}.csv',low_memory=False)
    injury.drop('Unnamed: 0',axis=1,inplace=True)
    injury = pd.melt(injury,id_vars=['Player','Team','Year'],var_name='Date', value_name='Status')
    injury[['Date','Opp']] = injury.Date.str.split("vs. ",expand=True)
    injury[['Status','Injury']] = injury.Status.str.split(":",expand=True)
    injury['Date'] = injury['Date'].astype(str)+'/'+injury['Year'].astype(str)
    injury['Date'] = pd.to_datetime(injury['Date'])
    injury.replace({'Team':\
                       {'crd':'ARI', 'atl':'ATL', 'rav':'BAL', 'buf':'BUF', 'car':'CAR', 'chi':'CHI', 'cin':'CIN',\
                        'cle':'CLE', 'dal':'DAL', 'den':'DEN', 'det':'DET', 'gnb':'GNB','htx':'HOU','clt':'IND',\
                        'jax':'JAX','kan':'KAN','sdg':'LAC','ram':'LAR','mia':'MIA','min':'MIN','nor':'NOR','nwe':'NWE',\
                        'nyg':'NYG','nyj':'NYJ','rai':'OAK','phi':'PHI','pit':'PIT','sea':'SEA','sfo':'SFO','tam':'TAM',\
                        'oti':'TEN','was':'WAS'}},inplace=True)
    injury.dropna(axis=0,subset=['Status','Injury'],how='all',inplace=True)
    injury['Injury'] = injury['Injury'].str.strip(' ')
    injury.replace({'Injury':{'right':'','left':'','Right':'','Left':'','Biceps':'Bicep',\
                              'Triceps':'Tricep','Ankles':'Ankle','hip':'Hip','Hips':'Hip','Knees':'Knee',\
                              'Virus':'Illness','Triceps':'Tricep','Oblique':'Abdomen',\
                              'NotInjuryRelated':'Not Injury Related','MedicalIllness':'Illness',\
                              'LowerLeg':'Lower Leg','CoreMuscle':'Abdomen','Abdominal':'Abdomen'}},\
                               regex=True,inplace=True)
    injury['Player'] = injury['Player']+' '
    injury.replace({'Player':{' Jr. ':'',' Jr ':'',' Sr. ':'',' Sr ':'',' III ':'',' II ':'',' IV ':'',' V ':''}},regex=True,inplace=True)
    injury['Player'] = injury['Player'].str.strip(' ')


    stats_snaps = pd.merge(left=stats,right=snaps,how='outer',on=['Player','Year','Week'])
    stats_snaps['Team_x'] = stats_snaps['Team_x'].fillna(stats_snaps['Team_y'])
    stats_snaps['Pos_x'] = stats_snaps['Pos_y'].fillna(stats_snaps['Pos_x'])
    stats_snaps.drop(['Team_y','Pos_y'],axis=1,inplace=True)
    stats_snaps.rename(columns={'Team_x':'Team','Pos_x':'Pos'},inplace=True)

    stats_snaps_injury = pd.merge(left=stats_snaps,right=injury,how='outer',on=['Player','Year','Week'])
    
#     stats_snaps_injury = pd.merge
    
#     combined = pd.merge
    
#     combined.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nfl_total_{Start_Year}_{End_Year}.csv',index=False)
    
#     print("Done")

In [2]:
Start_Year = 2017
End_Year = 2019
Start = 2017
End = 2019

In [None]:
# reserve = injury['Status']=='reserve/future'
# injury_reserve = injury[reserve]
# injury_reserve.head()

In [None]:
# team_data_scrape(Start,End,1)

In [None]:
# player_snap_scrape(Start,End)

In [None]:
# injury_reports_scrape(Start,End)

In [None]:
# player_stats_scape(Start,End)

In [None]:
# data_clean(Start,End)

In [None]:
snaps = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\snapcounts_{Start_Year}_{End_Year}.csv')
snaps.drop(['Unnamed: 0','TTL','AVG'],axis=1,inplace=True)
snaps = pd.melt(snaps, id_vars=['Player','Pos','Team','Year'], var_name='Week', value_name='Snaps')
snaps['Year'] = snaps['Year'].astype(str)
snaps['Player'] = snaps['Player'].astype(str)
snaps['Week'] = snaps['Week'].astype(str)
snaps.replace({'Team':{'FA':'','GB':'GNB','JAC':'JAX','KC':'KAN','Multi':'','NE':'NWE','NO':'NOR','SF':'SFO','TB':'TAM'},\
                'Week':{'Week 1':'1','Week 2':'2','Week 3':'3','Week 4':'4','Week 5':'5','Week 6':'6',\
                        'Week 7':'7','Week 8':'8','Week 9':'9','Week 10':'10','Week 11':'11','Week 12':'12',\
                        'Week 13':'13','Week 14':'14','Week 15':'15','Week 16':'16','Week 17':'17'}},inplace=True)
snaps['Player'] = snaps['Player']+' '
snaps.replace({'Player':{' Jr. ':'',' Jr ':'',' Sr. ':'',' Sr ':'',' III ':'',' II ':'',' IV ':'',' V ':''}},regex=True,inplace=True)
snaps['Player'] = snaps['Player'].str.strip(' ')

for col in snaps.columns:
    print(col)


In [None]:
stats = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\player_stats_{Start_Year}_{End_Year}.csv')
stats.dropna(how='all',inplace=True)
stats.drop(stats[stats['Player'] == 'Player'].index, inplace = True)
stats.drop('Lg',axis=1,inplace=True)
stats.rename(columns={'Tm':'Team','Unnamed: 6':'Away_Home','Cmp':'IPass_Cmp','Att':'IPass_Att','Cmp%':'IPass_Cmp%','Yds':'IPass_Yds',\
             'TD':'IPass_TD','Int':'IPass_Int','Rate':'IQB_Rate','Sk':'I_Sk','Yds.1':'ISk_Yds','Y/A':'IPass_Y/A',\
             'AY/A':'IPass_AdjY/A','Att.1':'IRush_Att','Yds.2':'IRush_Yds','Y/A.1':'IRush_Y/A','TD.1':'IRush_TD',\
             'Tgt':'IRec_Tgt','Rec':'IRec_Rec','Yds.3':'IRec_Yds','Y/R':'IRec_Y/R','TD.2':'IRec_TD','Ctch%':'IRec_Ctch%',\
             'Y/Tgt':'IRec_Y/Tgt','XPM':'IXP_Made','XPA':'IXP_Att','XP%':'IXP%','FGM':'IFG_Made','FGA':'IFG_Att',\
             'FG%':'IFG%','2PM':'I2pt_Made','Sfty':'ISfty','TD.3':'ITot_TD','Pts':'ITot_Pts','Rt':'IKR_Rt','Yds.4':'IKR_Yds',\
             'Y/Rt':'IKR_Y/Rt','TD.4':'IKR_TD','Ret':'IPR_Rt','Yds.5':'IPR_Yds','Y/R.1':'IPR_Y/Rt','TD.5':'IPR_TD',\
             'Sk.1':'ITack_Sk','Solo':'ITack_Solo','Ast':'ITack_Ast','Comb':'ITack_Tot','TFL':'ITack_TFL',\
             'QBHits':'ITack_QBHits','Int.1':'IDef_Int','Yds.6':'IDef_IntYds','TD.6':'IDef_IntTD','PD':'IDef_PD',\
             'Fmb':'IFmb_Fmb','FL':'IFmb_Lost','FF':'IFmb_Forced','FR':'IFmb_Recov','Yds.7':'IFmb_Yds','TD.7':'IFmb_TD'},\
             inplace=True)

stats_cols = []
for col in stats.columns:
    stats_cols.append(col)

stats.replace({'Away_Home':{'@':'Away',None:'Home'}},inplace=True)
stats[['IPass_Cmp','IPass_Att','IRec_Rec','IRec_Tgt','IXP_Made','IXP_Att','IFG_Made','IFG_Att']] = stats[['IPass_Cmp','IPass_Att','IRec_Rec','IRec_Tgt','IXP_Made','IXP_Att','IFG_Made','IFG_Att']].astype(float)
stats['IPass_Cmp%'] = stats['IPass_Cmp']/stats['IPass_Att']
stats['IRec_Ctch%'] = stats['IRec_Rec']/stats['IRec_Tgt']
stats['IXP%'] = stats['IXP_Made']/stats['IXP_Att']
stats['IFG%'] = stats['IFG_Made']/stats['IFG_Att']
stats[stats_cols[11:]] = stats[stats_cols[11:]].astype(float)
stats[stats_cols[11:]] = stats[stats_cols[11:]].fillna(value=0)
stats['Date'] = pd.to_datetime(stats['Date'],errors='coerce',format='%Y-%m-%d')
stats.insert(loc=8,column='Year',value=stats['Date'].dt.year)
stats['Player'] = stats['Player'].astype(str)
stats['Week'] = stats['Week'].astype(str)
stats['Year'] = stats['Year'].astype(str)
stats['Player'] = stats['Player']+' '
stats.replace({'Player':{' Jr. ':'',' Jr ':'',' Sr. ':'',' Sr ':'',' III ':'',' II ':'',' IV ':'',' V ':''}},regex=True,inplace=True)
stats['Player'] = stats['Player'].str.strip(' ')

for col in stats.columns:
    print(col)

In [None]:
team1 = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nflteam_data_1_{Start_Year}_{End_Year}.csv')
team2 = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nflteam_data_2_{Start_Year}_{End_Year}.csv')
team1.dropna(thresh=10,inplace=True)
team2.dropna(thresh=10,inplace=True)
team1.drop('LTime',axis=1,inplace=True)
team2.drop(['LTime'],axis=1,inplace=True)
test1 = team1.columns
test2 = team2.columns
team1.rename(columns={'Tm':'Team','Unnamed: 5':'Away_Home','PF':'Points_For','PA':'Points_Against','PC':'Points_Comb',\
                     'vs. Line':'Vs_Line','Cmp':'TPass_Cmp','Att':'TPass_Att','Cmp%':'TPass_Cmp%','Yds':'TPass_Yds',\
                      'TD':'TPass_TD','Int':'TPass_Int','Sk':'TSack','Yds.1':'TSack_Yds','Rate':'TQB_Rate',\
                      'Att.1':'TRush_Att','Yds.2':'TRush_Yds','Y/A':'TRush_Y/A','TD.1':'TRush_TD','Tot':'TTot_Yds',\
                      'Ply':'TO_Play#','Y/P':'TO_Y/P','DPly':'TD_Play#','DY/P':'TD_Y/P','TO':'TTot_TO','ToP':'TO_ToP',\
                      'Time.1':'TGame_Dur','Yds.3':'TPen_Yds','OppPen':'TOpp_Pen','OppYds':'TOpp_Pen_Yds',\
                      'CombPen':'TComb_Pen','CombPenYds':'TComb_Pen_Yds','1stD':'T1st_Downs','Rsh':'T1st_by_Rsh',\
                      'Pass':'T1st_by_Pass','Pen.1':'T1st_by_Pen','3DAtt':'T3rd_Down_Att','3DConv':'T3rd_Down_Conv',\
                      '3D%':'T3rd_Down%','4DAtt':'T4th_Down_Att','4DConv':'T4th_Down_Conv','4D%':'T4th_Down%',\
                      'TD.2':'TTot_TD','XPA':'TXP_Att','XPM':'TXP_Made','FGA':'TFG_Att','FGM':'TFG_Made','2PA':'T2Pt_Att',\
                      '2PM':'T2Pt_Made','Sfty':'TSfty','Pnt':'TTimes_Punted','Yds.4':'TPunt_Yds','Y/P.1':'TPunt_Yds_Avg'},inplace=True)
team2.rename(columns={'Tm':'Team','Unnamed: 5':'Away_Home','TD':'TOpp_Tot_TD','XPA':'TOpp_XP_Att','XPM':'TOpp_XP_Made',\
                      'Att':'TOpp_FG_Att','Md':'TOpp_FG_Made','Sfty':'TOpp_Sfty','Cmp':'TOpp_Pass_Cmp','Att.1':'TOpp_Pass_Att',\
                      'Cmp%':'TOpp_Pass_Cmp%','Yds':'TOpp_Pass_Yds','TD.1':'TOpp_Pass_TD','Int':'TOpp_Pass_Int','Sk':'TOpp_Sk',\
                      'Yds.1':'TOpp_Sk_Yds','Rate':'TOpp_QB_Rate','Att.2':'TOpp_Rush_Att','Yds.2':'TOpp_Rush_Yds',\
                      'Y/A':'TOpp_Rush_Y/A','TD.2':'TOpp_Rush_TD','Tot':'TOpp_Tot_Yds','TO':'TOpp_Tot_TO',\
                      '1stDOpp':'TOpp_1st_Downs','Rush':'TOpp_1st_by_Rsh','Pass':'TOpp_1st_by_Pass','Pen':'TOpp_1st_by_Pen',\
                      'Opp3DAtt':'TOpp_3rd_Down_Att','Opp3DConv':'TOpp_3rd_Down_Conv','Opp3D%':'TOpp_3rd_Down%',\
                      'Opp4DAtt':'TOpp_4th_Down_Att','Opp4DConv':'TOpp_4th_Down_Conv','Opp4D%':'TOpp_4th_Down%',\
                      'Rush.1':'TMargin_Rush','Pass.1':'TMargin_Pass','Tot.1':'TMargin_TotYds','TO.1':'TTO_TD',\
                      'KR':'TKR_TD','PR':'TPR_TD','IR':'TInt_TD','FR':'TFmb_TD','OR':'TOtherRet_TD',\
                      'RetTD':'TAll_Ret_TD','Q1':'TMar_Thru_Q1','Q2':'TMar_Thru_Q2','Q3':'TMar_Thru_Q3',\
                      'Q1.1':'TScore_Diff_Q1','Q2.1':'TScore_Diff_Q2','Q3.1':'TScore_Diff_Q3',\
                      'Q4':'TScore_Diff_Q4','1stHalf':'TScore_Diff_1stHalf','2ndHalf':'TScore_Diff_2ndHalf'},inplace=True)

team = pd.merge(left=team1,right=team2,\
                 how='outer',\
                 on=['Team','Year','Date','Time','Away_Home','Opp','Week','G#','Day','Result','OT'])

team.set_index('Team',inplace=True)
team.drop('Tm',inplace=True)
team.reset_index(inplace=True)

team_cols = []

for col in team.columns:
    team_cols.append(col)

team.replace({'Away_Home':{'@':'Away',None:'Home'}},inplace=True)
team[team_cols[11:]] = team[team_cols[11:]].fillna(value=0)
team[['TPass_Cmp','TPass_Att','T3rd_Down_Att','T3rd_Down_Conv','T4th_Down_Att',\
      'T4th_Down_Conv','TOpp_Pass_Cmp','TOpp_Pass_Att','TOpp_3rd_Down_Att',\
      'TOpp_3rd_Down_Conv','TOpp_4th_Down_Att','TOpp_4th_Down_Conv']] = team[['TPass_Cmp','TPass_Att','T3rd_Down_Att',\
                                                                              'T3rd_Down_Conv','T4th_Down_Att',\
                                                                              'T4th_Down_Conv','TOpp_Pass_Cmp',\
                                                                              'TOpp_Pass_Att','TOpp_3rd_Down_Att',\
                                                                              'TOpp_3rd_Down_Conv','TOpp_4th_Down_Att',\
                                                                              'TOpp_4th_Down_Conv']].astype(float)
team['TPass_Cmp%'] = team['TPass_Cmp']/team['TPass_Att']
team['T3rd_Down%'] = team['T3rd_Down_Conv']/team['T3rd_Down_Att']
team['T4th_Down%'] = team['T4th_Down_Conv']/team['T4th_Down_Att']
team['TOpp_Pass_Cmp%'] = team['TOpp_Pass_Cmp']/team['TOpp_Pass_Att']
team['TOpp_3rd_Down%'] = team['TOpp_3rd_Down_Conv']/team['TOpp_3rd_Down_Att']
team['TOpp_4th_Down%'] = team['TOpp_4th_Down_Conv']/team['TOpp_4th_Down_Att']
team['Date'] = pd.to_datetime(team['Date'],errors='coerce',format='%Y-%m-%d')
team['TGame_Dur'] = team['TGame_Dur']+':00'
team['TO_ToP'] = '00:'+team['TO_ToP']
team['TGame_Dur'] = pd.to_timedelta(team['TGame_Dur'],errors='coerce')
team['TGame_Dur'] = team['TGame_Dur'].dt.total_seconds()
team['TO_ToP'] = pd.to_timedelta(team['TO_ToP'],errors='coerce')
team['TO_ToP'] = team['TO_ToP'].dt.total_seconds()
team[team_cols[11:16]] = team[team_cols[11:16]].astype(float)
team[team_cols[17]] = team[team_cols[17]].astype(float)
team[team_cols[19:]] = team[team_cols[19:]].astype(float)
team.insert(loc=9,column='Month',value=team['Date'].dt.month)


for col in team.columns:
    print(col)


In [16]:
injury = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\nfl_data\nfl_injuryreport_{Start_Year}_{End_Year}.csv',low_memory=False)
injury.drop('Unnamed: 0',axis=1,inplace=True)
injury = pd.melt(injury,id_vars=['Player','Team','Year'],var_name='Date', value_name='Status')
injury[['Date','Opp']] = injury.Date.str.split('vs. ',expand=True)
injury[['Month','Day']] = injury.Date.str.split('/',expand=True)
injury[['Status','Injury']] = injury.Status.str.split(":",expand=True)
injury.dropna(axis=0,subset=['Status','Injury'],how='all',inplace=True)
injury[['Year','Month','Day']] = injury[['Year','Month','Day']].astype(int)
injury['Date'] = injury['Date']+'/'+(np.where(injury['Month']<=2,injury['Year']+1,injury['Year'])).astype(str)
injury['Date'] = pd.to_datetime(injury['Date'])
past_week17 = injury[(injury['Month']==1)&(injury['Day']>=4)].index
playoffs = injury[(injury['Month']==2)].index
injury.drop(past_week17,inplace = True)
injury.drop(playoffs,inplace=True)
injury.replace({'Team':\
                   {'crd':'ARI', 'atl':'ATL', 'rav':'BAL', 'buf':'BUF', 'car':'CAR', 'chi':'CHI', 'cin':'CIN',\
                    'cle':'CLE', 'dal':'DAL', 'den':'DEN', 'det':'DET', 'gnb':'GNB','htx':'HOU','clt':'IND',\
                    'jax':'JAX','kan':'KAN','sdg':'LAC','ram':'LAR','mia':'MIA','min':'MIN','nor':'NOR','nwe':'NWE',\
                    'nyg':'NYG','nyj':'NYJ','rai':'OAK','phi':'PHI','pit':'PIT','sea':'SEA','sfo':'SFO','tam':'TAM',\
                    'oti':'TEN','was':'WAS'}},inplace=True)
injury['Injury'] = injury['Injury'].str.strip(' ')
injury.replace({'Injury':{'right':'','left':'','Right':'','Left':'','Biceps':'Bicep',\
                          'Triceps':'Tricep','Ankles':'Ankle','hip':'Hip','Hips':'Hip','Knees':'Knee',\
                          'Virus':'Illness','Triceps':'Tricep','Oblique':'Abdomen',\
                          'NotInjuryRelated':'Not Injury Related','MedicalIllness':'Illness',\
                          'LowerLeg':'Lower Leg','CoreMuscle':'Abdomen','Abdominal':'Abdomen'}},\
                           regex=True,inplace=True)
injury['Player'] = injury['Player']+' '
injury.replace({'Player':{' Jr. ':'',' Jr ':'',' Sr. ':'',' Sr ':'',' III ':'',' II ':'',' IV ':'',' V ':''}},regex=True,inplace=True)
injury['Player'] = injury['Player'].str.strip(' ')
injury.sort_values(by=['Month','Day'],ascending=[True,False]).head(1500)
# for col in injury.columns:
#     print(col)


Unnamed: 0,Player,Team,Year,Date,Status,Opp,Month,Day,Injury
2063939,Bryson Albright,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Undisclosed
2063946,Christian Campbell,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Ankle
2063947,Jeremy Cash,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Knee
2063951,Larry Fitzgerald,ARI,2018,2018-09-30,Questionable,SEA,9,30,Hamstring
2063952,D.J. Foster,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Knee
2063962,Gabe Martin,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Achilles
2063963,Praise Martin-Oguike,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Undisclosed
2063965,Arthur Moats,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Knee
2063967,Robert Nkemdiche,ARI,2018,2018-09-30,Questionable,SEA,9,30,Knee
2063968,Jonathan Owens,ARI,2018,2018-09-30,Injured Reserve,SEA,9,30,Knee


In [None]:
stats_snaps = pd.merge(left=stats,right=snaps,how='outer',on=['Player','Year','Week'])
stats_snaps['Team_x'] = stats_snaps['Team_x'].fillna(stats_snaps['Team_y'])
stats_snaps['Pos_x'] = stats_snaps['Pos_y'].fillna(stats_snaps['Pos_x'])
stats_snaps.drop(['Team_y','Pos_y'],axis=1,inplace=True)
stats_snaps.rename(columns={'Team_x':'Team','Pos_x':'Pos'},inplace=True)

for col in stats_snaps.columns:
    print(col)

In [None]:
stats_snaps_injury = pd.merge(left=stats_snaps,right=injury,how='outer',on=['Player','Date'])
stats_snaps_injury.sort_values(by='Status',ascending=True,inplace=True)
stats_snaps_injury.to_csv('test.csv')