In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup
import requests
import os
from dotenv import load_dotenv
load_dotenv()
pd.set_option('display.max_columns', None)

def team_data_scrape(Start_Year,End_Year,url):
    page = 0
    stat_login_url = "https://stathead.com/users/login.cgi"
    stat_user_name = os.environ.get('statheadusername')
    stat_password = os.environ.get('statheadpassword')
    stat_payload = {
        'username': stat_user_name,
        'password': stat_password
    }
    
    if url == 1:
        stat_url = 'https://stathead.com/football/tgl_finder.cgi?request=1&temperature_gtlt=lt&game_num_max=99&week_num_max=99&order_by=points&match=game&year_max={End_Year}&order_by_asc=0&week_num_min=0&game_type=E&game_num_min=0&year_min={Start_Year}&cstat[1]=all_td_team&ccomp[1]=gt&cval[1]=0&cstat[2]=third_down_att&ccomp[2]=gt&cval[2]=0&cstat[3]=vegas_line&ccomp[3]=gt&cval[3]=-50&cstat[4]=penalties&ccomp[4]=gt&cval[4]=0&cstat[5]=rush_att&ccomp[5]=gt&cval[5]=0&cstat[6]=tot_yds&ccomp[6]=gt&cval[6]=0&cstat[7]=first_down&ccomp[7]=gt&cval[7]=0&cstat[8]=punt&ccomp[8]=gt&cval[8]=0&cstat[9]=pass_cmp&ccomp[9]=gt&cval[9]=0&offset={page}'
    elif url == 2:
        stat_url = 'https://stathead.com/football/tgl_finder.cgi?request=1&temperature_gtlt=lt&game_num_max=99&week_num_max=99&order_by=all_td_opp&match=game&year_max={End_Year}&order_by_asc=0&week_num_min=0&game_type=R&game_num_min=0&year_min={Start_Year}&cstat[1]=tot_yds_opp&ccomp[1]=gt&cval[1]=0&cstat[2]=rush_yds_diff&ccomp[2]=gt&cval[2]=-500&cstat[3]=score_diff_thru_1&ccomp[3]=gt&cval[3]=-500&cstat[4]=rush_att_opp&ccomp[4]=gt&cval[4]=0&cstat[5]=kick_ret_td_tgl&ccomp[5]=gt&cval[5]=0&cstat[6]=pass_cmp_opp&ccomp[6]=gt&cval[6]=0&cstat[7]=first_down_opp&ccomp[7]=gt&cval[7]=0&cstat[8]=score_diff_1_qtr&ccomp[8]=gt&cval[8]=-500&cstat[9]=third_down_att_opp&ccomp[9]=gt&cval[9]=0&offset={page}'
    elif url != 1 or 2:
        print("Please select 1 or 2.")
        
    with requests.Session() as session:
        
        s = session.post(stat_login_url, data=stat_payload)
        
        while page < 100000:
            
            website = session.get(stat_url.format(Start_Year=Start_Year,End_Year=End_Year,page=page)).text
            soup = BeautifulSoup(website, 'html')
            table = soup.find('table', attrs={'class': 'sortable', 'id': 'results'})

            table_headers = [header.text for header in table.find('thead').find_all('th')]
            table_rows = table.find_all('tr')

            final_data = []
            
            for tr in table_rows:
                td = tr.find_all('td')
                row = [tr.text for tr in td]
                final_data.append(row)
                
            df = pd.DataFrame(final_data[1:], columns=table_headers[12:])
            
            print(page)
            
            if url == 1:
                df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\nflteam_data_1_{Start_Year}_{End_Year}.csv',mode='a',index=False)
            else:
                df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\nflteam_data_2_{Start_Year}_{End_Year}.csv',mode='a',index=False)

            page += 100
                
        print('Done')
        print(page)

def player_snap_scrape(Start_Year,End_Year):
    
    sides = ['','defense.php']
    
    ENDPOINT = "https://www.fantasypros.com/nfl/reports/snap-counts/{side}?year={year}"

    final_df = pd.DataFrame()

    for year in range(Start_Year, End_Year+1):
        
        for side in sides:
        
            res = requests.get(ENDPOINT.format(year=year,side=side))

            soup = BeautifulSoup(res.content, 'html.parser')

            table = soup.find('table', {'id': 'data'})

            df = pd.read_html(str(table))[0]

            df.columns = df.columns[:3].tolist() + [f'Week {i}' for i in df.columns[3:-2]] + df.columns[-2:].tolist()

            df['Year'] = year

            cols = df.columns[:3].tolist() + df.columns[-1:].tolist() + df.columns[3:-1].tolist()
            df = df[cols]

            final_df = pd.concat([final_df, df])
            print(year,side)

    final_df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\snapcounts_{Start_Year}_{End_Year}.csv',index=False)
    print('Done')

def injury_reports_scrape(Start_Year,End_Year):
    
    teams = ['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal', 'den', 'det', 'gnb','htx','clt','jax','kan',
             'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']

    ENDPOINT = 'https://www.pro-football-reference.com/teams/{team}/{year}_injuries.htm'

    final_df = pd.DataFrame()

    for year in range(Start_Year, End_Year+1):

        for team in teams:

            res = requests.get(ENDPOINT.format(year=year,team=team))

            soup = BeautifulSoup(res.content, 'lxml')

            table = soup.find('table', attrs={'class': 'sortable', 'id': 'team_injuries'})
            table_rows = table.find_all('tr')

            final_data = []
            for tr in table_rows:
                td = tr.find_all(['th','td'])
                row = [tr['data-tip'] if tr.has_attr("data-tip") else tr.text for tr in td]
                final_data.append(row)

            dfdata = final_data[1:]
            data_body = [[dfdata[j][i] for j in range(len(dfdata))] for i in range(len(dfdata[0]))]

            df = pd.DataFrame(data_body,final_data[0]).T

            df.insert(loc=1,column='Team',value=team)
            df.insert(loc=2,column='Year',value=year)

            final_df = pd.concat([final_df, df])
            print(team,year)

    final_df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\injury_reports\nlf_injuryreport_{Start_Year}_{End_Year}.csv',index=False)
    print('Done')    

def player_stats_scape(Start_Year,End_Year):
    page = 0
    location = 5000
    stat_login_url = "https://stathead.com/users/login.cgi"
    stat_user_name = os.environ.get('statheadusername')
    stat_password = os.environ.get('statheadpassword')
    stat_payload = {
        'username': stat_user_name,
        'password': stat_password
    }
    stat_url = "https://stathead.com/football/pgl_finder.cgi?request=1&game_num_max=99&week_num_max=99&order_by=all_td&season_start=1&qb_gwd=0&order_by_asc=0&qb_comeback=0&week_num_min=0&game_num_min=0&year_min={Start_Year}&match=game&year_max={End_Year}&season_end=-1&age_min=0&game_type=R&age_max=99&positions[]=qb&positions[]=rb&positions[]=wr&positions[]=te&positions[]=e&positions[]=t&positions[]=g&positions[]=c&positions[]=ol&positions[]=dt&positions[]=de&positions[]=dl&positions[]=ilb&positions[]=olb&positions[]=lb&positions[]=cb&positions[]=s&positions[]=db&positions[]=k&positions[]=p&cstat[1]=punt_ret&ccomp[1]=gt&cval[1]=0&cstat[2]=sacks&ccomp[2]=gt&cval[2]=0&cstat[3]=fumbles&ccomp[3]=gt&cval[3]=0&cstat[4]=rush_att&ccomp[4]=gt&cval[4]=0&cstat[5]=pass_defended&ccomp[5]=gt&cval[5]=0&cstat[6]=pass_cmp&ccomp[6]=gt&cval[6]=0&cstat[7]=targets&ccomp[7]=gt&cval[7]=0&cstat[8]=kick_ret&ccomp[8]=gt&cval[8]=0&offset={page}"
    
    with requests.Session() as session:
        
        s = session.post(stat_login_url, data=stat_payload)
        
        while page < 100000:
            
            website = session.get(stat_url.format(Start_Year=Start_Year,End_Year=End_Year,page=page)).text
            soup = BeautifulSoup(website, 'html')
            table = soup.find('table', attrs={'class': 'sortable', 'id': 'results'})

            table_headers = [header.text for header in table.find('thead').find_all('th')]
            table_rows = table.find_all('tr')

            final_data = []
            
            for tr in table_rows:
                td = tr.find_all('td')
                row = [tr.text for tr in td]
                final_data.append(row)
                
            df = pd.DataFrame(final_data[1:], columns=table_headers[11:])
            df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\player_stats\player_stats_{Start_Year}_{End_Year}.csv',mode='a',index=False)

            if page > location:
                print(page)
                location += 5000
                
            page += 100
            
        print('Done')
        print(page)

def player_snap_stats_clean(Start_Year,End_Year):
    
    snaps = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\snapcounts_{Start_Year}_{End_Year}.csv')
    snaps.drop(['Unnamed: 0','TTL','AVG'],axis=1,inplace=True)
    snaps = pd.melt(snaps, id_vars=['Player','Pos','Team','Year'], var_name='Week', value_name='Snaps')
    snaps['Year'] = snaps['Year'].astype(str)
    snaps.replace({'Team':{'FA':'','GB':'GNB','JAC':'JAX','KC':'KAN','Multi':'','NE':'NWE','NO':'NOR','SF':'SFO','TB':'TAM'},\
                    'Week':{'Week 1':'1','Week 2':'2','Week 3':'3','Week 4':'4','Week 5':'5','Week 6':'6',\
                            'Week 7':'7','Week 8':'8','Week 9':'9','Week 10':'10','Week 11':'11','Week 12':'12',\
                            'Week 13':'13','Week 14':'14','Week 15':'15','Week 16':'16','Week 17':'17'}},inplace=True)
    
    stats = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\player_stats\player_stats_{Start_Year}_{End_Year}.csv')
    
    combined = pd.merge(left=stats,right=snaps,how='outer',left_on=['Team','Year','Week'],right_on=['Team','Year','Week'])
    
    combined.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\player_team_snaps_{Start_Year}_{End_Year}.csv',index=False)
    
    print("Done")

def injury_reports_clean(Start_Year,End_Year):
    dfname = []
    teams = ['crd','atl','rav','buf','car','chi','cin','cle','dal','den','det','gnb','htx','clt','jax','kan',
                     'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']

    for team in teams:
        for year in range(Start_Year,End_Year+1):
            dfname.append(f'{team}_{year}_injuryreport')
            data = {key: pd.read_csv(f'{key}.csv') for key in dfname}

    for key in data:
        data[key].drop('Unnamed: 0',axis=1,inplace=True)
        data[key] = pd.melt(data[key],id_vars=['Player','Team','Year'],var_name='Date', value_name='Status')
        data[key][['Date','Opp']] = data[key].Date.str.split("vs. ",expand=True)
        data[key][['Status','Injury']] = data[key].Status.str.split(":",expand=True)
        data[key]['Date'] = data[key]['Date'].astype(str)+'/'+data[key]['Year'].astype(str)
        data[key]['Date'] = pd.to_datetime(data[key]['Date'])
        data[key].replace({'Team':\
                           {'crd':'ARI', 'atl':'ATL', 'rav':'BAL', 'buf':'BUF', 'car':'CAR', 'chi':'CHI', 'cin':'CIN',\
                            'cle':'CLE', 'dal':'DAL', 'den':'DEN', 'det':'DET', 'gnb':'GNB','htx':'HOU','clt':'IND',\
                            'jax':'JAX','kan':'KAN','sdg':'LAC','ram':'LAR','mia':'MIA','min':'MIN','nor':'NOR','nwe':'NWE',\
                            'nyg':'NYG','nyj':'NYJ','rai':'OAK','phi':'PHI','pit':'PIT','sea':'SEA','sfo':'SFO','tam':'TAM',\
                            'oti':'TEN','was':'WAS'}},inplace=True)
        data[key].dropna(thresh=3,inplace=True)

    nfl_injury = pd.concat(data.values(),ignore_index=True)
    nfl_injury.to_csv(f'NFL_{Start_Year}_{End_Year}_Injuryreport.csv',index=False)
    print('Done')

In [None]:
Start_Year = 2017
End_Year = 2019
Start = 2017
End = 2019

In [None]:
# team_data_scrape(Start,End,1)

In [None]:
# player_snap_scrape(Start,End)

In [None]:
# injury_reports_scrape(Start,End)

In [None]:
# player_stats_scape(Start,End)

In [None]:
# player_snap_stats_clean(Start,End)

In [None]:
'injury_reports_clean(Start,End)'

In [None]:
team1 = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\nflteam_data_1_{Start_Year}_{End_Year}.csv')
team2 = pd.read_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\nflteam_data_2_{Start_Year}_{End_Year}.csv')
team1.dropna(thresh=10,inplace=True)
team2.dropna(thresh=10,inplace=True)
team1.drop('LTime',axis=1,inplace=True)
team2.drop(['LTime'],axis=1,inplace=True)
test1 = team1.columns
test2 = team2.columns
team1.rename(columns={'Tm':'Team','Unnamed: 5':'Away_Home','PF':'Points_For','PA':'Points_Against','PC':'Points_Comb',\
                     'vs. Line':'Vs_Line','Cmp':'Pass_Cmp','Att':'Pass_Att','Cmp%':'Pass_Cmp%','Yds':'Pass_Yds',\
                      'TD':'Pass_TD','Int':'Pass_Int','Sk':'Sack','Yds.1':'Sack_Yds','Rate':'QB_Rate',\
                      'Att.1':'Rush_Att','Yds.2':'Rush_Yds','Y/A':'Rush_Y/A','TD.1':'Rush_TD','Tot':'Tot_Yds',\
                      'Ply':'O_Play#','Y/P':'O_Y/P','DPly':'D_Play#','DY/P':'D_Y/P','TO':'Tot_TO','ToP':'O_ToP',\
                      'Time.1':'Game_Dur','Yds.3':'Pen_Yds','OppPen':'Opp_Pen','OppYds':'Opp_Pen_Yds',\
                      'CombPen':'Comb_Pen','CombPenYds':'Comb_Pen_Yds','1stD':'1st_Downs','Rsh':'1st_by_Rsh',\
                      'Pass':'1st_by_Pass','Pen.1':'1st_by_Pen','3DATT':'3rd_Down_Att','3DConv':'3rd_Down_Conv',\
                      '3D%':'3rd_Down%','4DAtt':'4th_Down_Att','4DConv':'4th_Down_Conv','4D%':'4th_Down%',\
                      'TD.2':'Tot_TD','XPA':'XP_Att','XPM':'XP_Made','FGA':'FG_Att','FGM':'FG_Made','2PA':'2Pt_Att',\
                      '2PM':'2Pt_Made','Pnt':'Times_Punted','Yds.4':'Punt_Yds','Y/P.1':'Punt_Yds_Avg'},inplace=True)
team2.rename(columns={'Tm':'Team','Unnamed: 5':'Away_Home','TD':'Opp_Tot_TD','XPA':'Opp_XP_Att','XPM':'Opp_XP_Made',\
                      'Att':'Opp_FG_Att','Md':'Opp_FG_Made','Sfty':'Opp_Sfty','Cmp':'Opp_Pass_Cmp','Att.1':'Opp_Pass_Att',\
                      'Cmp%':'Opp_Pass_Cmp%','Yds':'Opp_Pass_Yds','TD.1':'Opp_Pass_TD','Int':'Opp_Pass_Int','Sk':'Opp_Sk',\
                      'Yds.1':'Opp_Sk_Yds','Rate':'Opp_QB_Rate','Att.2':'Opp_Rush_Att','Yds.2':'Opp_Rush_Yds',\
                      'Y/A':'Opp_Rush_Y/A','TD.2':'Opp_Rush_TD','Tot':'Opp_Tot_Yds','TO':'Opp_Tot_TO',\
                      '1stDOpp':'Opp_1st_Downs','Rush':'Opp_1st_by_Rsh','Pass':'Opp_1st_by_Pass','Pen':'Opp_1st_by_Pen',\
                      'Opp3DATT':'Opp_3rd_Down_Att','Opp3DConv':'Opp_3rd_Down_Conv','Opp3D%':'Opp_3rd_Down%',\
                      'Opp4DAtt':'Opp_4th_Down_Att','Opp4DConv':'Opp_4th_Down_Conv','Opp4D%':'Opp_4th_Down%',\
                      'Rush.1':'Margin_Rush','Pass.1':'Margin_Pass','Tot.1':'Margin_TotYds','TO.1':'TO_TD',\
                      'KR':'KR_TD','PR':'PR_TD','IR':'Int_TD','FR':'Fmb_TD','OR':'OtherRet_TD',\
                      'RetTD':'All_Ret_TD','Q1':'Mar_Thru_Q1','Q2':'Mar_Thru_Q2','Q3':'Mar_Thru_Q3',\
                      'Q1.1':'Score_Diff_Q1','Q2.1':'Score_Diff_Q2','Q3.1':'Score_Diff_Q3',\
                      'Q4':'Score_Diff_Q4','1stHalf':'Score_Diff_1stHalf','2ndHalf':'Score_Diff_2ndHalf'},inplace=True)

team = pd.merge(left=team1,right=team2,\
                 how='outer',\
                 on=['Team','Year','Date','Time','Away_Home','Opp','Week','G#','Day','Result','OT'])

cols = ['Points_For','Points_Against',\
        'PD','Points_Comb','Spread','Vs_Line','Over/Under','OU Result','Pass_Cmp','Pass_Att','Pass_Cmp%','Pass_Yds',\
        'Pass_TD','Pass_Int','Sack','Sack_Yds','QB_Rate','Rush_Att','Rush_Yds','Rush_Y/A','Rush_TD','Tot_Yds','O_Play#',\
        'O_Y/P','D_Play#','D_Y/P','Tot_TO','O_ToP','Game_Dur','Pen','Pen_Yds','Opp_Pen','Opp_Pen_Yds','Comb_Pen','Comb_Pen_Yds',\
        '1st_Downs','1st_by_Rsh','1st_by_Pass','1st_by_Pen','3DAtt','3rd_Down_Conv','3rd_Down%','4th_Down_Att','4th_Down_Conv',\
        '4th_Down%','Tot_TD','XP_Att','XP_Made','FG_Att','FG_Made','2Pt_Att','2Pt_Made','Sfty','Times_Punted','Punt_Yds',\
        'Punt_Yds_Avg','Opp_Tot_TD','Opp_XP_Att','Opp_XP_Made','Opp_FG_Att','Opp_FG_Made','Opp_Sfty','Opp_Pass_Cmp',\
        'Opp_Pass_Att','Opp_Pass_Cmp%','Opp_Pass_Yds','Opp_Pass_TD','Opp_Pass_Int','Opp_Sk','Opp_Sk_Yds','Opp_QB_Rate',\
        'Opp_Rush_Att','Opp_Rush_Yds','Opp_Rush_Y/A','Opp_Rush_TD','Opp_Tot_Yds','Opp_Tot_TO','Opp_1st_Downs','Opp_1st_by_Rsh',\
        'Opp_1st_by_Pass','Opp_1st_by_Pen','Opp3DAtt','Opp_3rd_Down_Conv','Opp_3rd_Down%','Opp_4th_Down_Att',\
        'Opp_4th_Down_Conv','Opp_4th_Down%','Margin_Rush','Margin_Pass','Margin_TotYds','TO_TD','KR_TD','PR_TD','Int_TD',\
        'Fmb_TD','OtherRet_TD','All_Ret_TD','Mar_Thru_Q1','Mar_Thru_Q2','Mar_Thru_Q3','Score_Diff_Q1','Score_Diff_Q2',\
        'Score_Diff_Q3','Score_Diff_Q4','Score_Diff_1stHalf','Score_Diff_2ndHalf']

team.replace({'Away_Home':{'@':'Away',None:'Home'}},inplace=True)
team[cols] = team[cols].fillna(value=0)
team['Date'] = pd.to_datetime(team['Date'],errors='coerce',format='%Y-%m-%d')
team['Game_Dur'] = team['Game_Dur']+':00'
team['O_ToP'] = '00:'+team['O_ToP']
team['Game_Dur'] = pd.to_timedelta(team['Game_Dur'],errors='coerce')
team['Game_Dur'] = team['Game_Dur'].dt.total_seconds()
team['O_ToP'] = pd.to_timedelta(team['O_ToP'],errors='coerce')
team['O_ToP'] = team['O_ToP'].dt.total_seconds()
team.insert(loc=9,column='Month',value=team['Date'].dt.month)