In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup
import requests
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
def team_snaps_scrape(Start_Year,End_Year):
    stat_page = 0
    stat_login_url = "https://stathead.com/users/login.cgi"
    stat_user_name = os.environ.get('statheadusername')
    stat_password = os.environ.get('statheadpassword')
    stat_payload = {
        'username': user_name,
        'password': password
    }
    stat_url = f"https://stathead.com/football/tgl_finder.cgi?request=1&temperature_gtlt=lt&game_num_max=99\
&week_num_max=99&order_by=plays_offense&match=game&year_max={str(End_Year)}&order_by_asc=0&week_num_min=0&game_type\
=R&game_num_min=0&year_min={str(Start_Year)}&offset="
    
    with requests.Session() as session:
        s = session.post(login_url, data=payload)
        while page < 1600:
            page1 = str(page)
            website = session.get(url+page1).text
            soup = BeautifulSoup(website, 'html')
            table = soup.find('table', attrs={'class': 'sortable', 'id': 'results'})

            table_headers = [header.text for header in table.find('thead').find_all('th')]
            table_rows = table.find_all('tr')

            final_data = []
            for tr in table_rows:
                td = tr.find_all('td')
                row = [tr.text for tr in td]
                final_data.append(row)
            df = pd.DataFrame(final_data, columns=table_headers[3:])
            df.to_csv('nflteamsnaps.csv',mode='a',header=False)
            page += 100
            print(page)

def player_snap_scrape(Start_Year,End_Year):
    
    pro_login_url = "https://secure.fantasypros.com/accounts/login/"
    pro_user_name = os.environ.get('fantasyprosusername ')
    pro_password = os.environ.get('fantasyprospassword ')
    pro_payload = {
        'username': pro_user_name,
        'password': pro_password
    }

    sides = ['','defense.php']
    o_d = ['Offense','Defense']
    years = []

    for yr in range(Start_Year,End_Year+1):
        years.append(yr)

    side = 0
    year = 0
    dfname = []

    while year < len(years):

        dfname.append(f'player_snaps_{years[year]}_scrape')

        url = f"https://www.fantasypros.com/nfl/reports/snap-counts/{sides[side]}?year={years[year]}"

        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'html')

        table = soup.find('table', attrs={'id': 'data','class': 'table'})
        table_headers = [header.text for header in table.find('thead').find_all('th')]
        table_rows = table.find_all('tr')

        final_data = []

        for tr in table_rows:
            td = tr.find_all('td')
            row = [tr.text for tr in td]
            final_data.append(row)

        data = {key: pd.DataFrame(final_data[1:], columns=table_headers) for key in dfname}

        key = f'player_snaps_{years[year]}_scrape'

        data[key].drop(['TTL','AVG'],axis=1,inplace=True)
        data[key].to_csv(f'FantasyPros_Fantasy_Football_{years[year]}_{o_d[side]}_Snap_Counts.csv',index=False)

        print(o_d[side],years[year])

        if side < len(sides)-1:
            side += 1

        else:
            side = 0
            year += 1
    print('done')

def player_snap_clean(Start_Year,End_Year):
    dfname = []
    try:
        for num in range(Start_Year,End_Year+1):
            dfname.append(f'{num}_Offense')
            dfname.append(f'{num}_Defense')
            data = {key: pd.read_csv(f'FantasyPros_Fantasy_Football_{key}_Snap_Counts.csv') for key in dfname}

        while Start_Year < End_Year+1:
            for key in data:
                data[key].insert(loc=3,column='Year',value=year,allow_duplicates=True)
                data[key]['Year'] = data[key]['Year'].astype(int)
                Start_Year += 0.5

        for key in data:
            data[key] = pd.melt(data[key],id_vars=['Player','Pos','Team','Year'],var_name='Week', value_name='Snaps')
            data[key].replace({'Team':\
                      {'GB':'GNB','JAC':'JAX','KC':'KAN','NE':'NWE','NO':'NOR','SF':\
                       'SFO','TB':'TAM','Multi':None,'LV':'OAK',},'Snaps':{'bye':None}},inplace=True)
            data[key].dropna(thresh=3,inplace=True)

        player = pd.concat(data.values(), ignore_index=True)

        team = pd.read_csv('nflteamsnaps.csv')
        team.drop(['LTime','Y/P'],axis=1,inplace=True)
        team.rename(columns={'Unnamed: 6':'Away_Home','Time.1':'ToG'},inplace=True)
        team['Date'] = pd.to_datetime(team['Date'])
        team['ToG'] = pd.to_datetime(team['ToG'])
        team['Away_Home'].fillna(value='Home',inplace=True)
        team['Away_Home'].replace('@','Away',inplace=True)
        team['Away_Home'].replace('nan','Home',inplace=True)
        team['TO'].fillna(value=0,inplace=True)
        team.insert(loc=2,column='Month',value=team['Date'].dt.month)
        df_bridge = team['ToP'].str.split(":",expand=True)
        team['ToP'] = (df_bridge[0].astype(int)*60)+df_bridge[1].astype(int)
        team['Week'] = team['Week'].astype(str)

        player_snaps = pd.merge(left=player,right=team,how='outer',left_on=['Team','Year','Week'],right_on=['Tm','Year','Week'])

        print(player)
        print(team)
        print(player_snaps)
        player_snaps.to_csv(f'player_snaps_{Start_Year}_{End_Year}.csv',index=False)
        
        
    
    except:
        print('Please make sure the labels are "Starting Year" and "Ending Year" in that order and they are not \
\nout of range of what is in the data. If you are still receiving and error, please consult the code or \
\nmake sure the columns are identical. All data must be pulled from FantasyPros.')


def injury_reports_scrape(Start_Year,End_Year):
    #list of the NFL teams for url purposes
    teams = ['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal', 'den', 'det', 'gnb','htx','clt','jax','kan',
             'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']
    years = []
    
    #list of years to pull data for url purposes
    for yr in range(Start_Year,End_Year+1):
        years.append(yr)

    #starting points to iterate through
    team = 0
    year = 0
    dfname = []
    
    while team < 32:
        
        dfname.append(f'{teams[team]}_{years[year]}_injuryreport')
        
        #url for web scraping
        url = f'https://www.pro-football-reference.com/teams/{teams[team]}/{years[year]}_injuries.htm'
        
        #opening website
        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'lxml')
        
        #finding table
        table = soup.find('table', attrs={'class': 'sortable', 'id': 'team_injuries'})
        table_rows = table.find_all('tr')

        #scraping the data
        final_data = []
        for tr in table_rows:
            td = tr.find_all(['th','td'])
            row = [tr['data-tip'] if tr.has_attr("data-tip") else tr.text for tr in td]
            final_data.append(row)

        #creatingdataframe to save
        dfdata = final_data[1:]
        data_body = [[dfdata[j][i] for j in range(len(dfdata))] for i in range(len(dfdata[0]))]
        data = {key: pd.DataFrame(data_body,final_data[0]).T for key in dfname}
        
        key = f'{teams[team]}_{years[year]}_injuryreport'
        
        data[key].insert(loc=1,column='Team',value=teams[team],allow_duplicates=True)
        data[key].insert(loc=2,column='Year',value=years[year],allow_duplicates=True)
        data[key].to_csv(f'{key}.csv',index=True)

        # update of location of web scrape
        print(teams[team],years[year])

        #advancing through url
        if year < len(years)-1:
            year += 1

        else:
            year = 0
            team += 1
    print('done')

def injury_reports_clean(Start_Year,End_Year):
    dfname = []
    teams = ['crd','atl','rav','buf','car','chi','cin','cle','dal','den','det','gnb','htx','clt','jax','kan',
                     'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']

    for team in teams:
        for year in range(Start_Year,End_Year+1):
            dfname.append(f'{team}_{year}_injuryreport')
            data = {key: pd.read_csv(f'{key}.csv') for key in dfname}

    for key in data:
        data[key].drop('Unnamed: 0',axis=1,inplace=True)
        data[key] = pd.melt(data[key],id_vars=['Player','Team','Year'],var_name='Date', value_name='Status')
        data[key][['Date','Opp']] = data[key].Date.str.split("vs. ",expand=True)
        data[key][['Status','Injury']] = data[key].Status.str.split(":",expand=True)
        data[key]['Date'] = data[key]['Date'].astype(str)+'/'+data[key]['Year'].astype(str)
        data[key]['Date'] = pd.to_datetime(data[key]['Date'])
        data[key].replace({'Team':\
                           {'crd':'ARI', 'atl':'ATL', 'rav':'BAL', 'buf':'BUF', 'car':'CAR', 'chi':'CHI', 'cin':'CIN',\
                            'cle':'CLE', 'dal':'DAL', 'den':'DEN', 'det':'DET', 'gnb':'GNB','htx':'HOU','clt':'IND',\
                            'jax':'JAX','kan':'KAN','sdg':'LAC','ram':'LAR','mia':'MIA','min':'MIN','nor':'NOR','nwe':'NWE',\
                            'nyg':'NYG','nyj':'NYJ','rai':'OAK','phi':'PHI','pit':'PIT','sea':'SEA','sfo':'SFO','tam':'TAM',\
                            'oti':'TEN','was':'WAS'}},inplace=True)
        data[key].dropna(thresh=3,inplace=True)

    nfl_injury = pd.concat(data.values(),ignore_index=True)
    nfl_injury.to_csv(f'NFL_{Start_Year}_{End_Year}_Injuryreport.csv',index=False)
    print('done')

def player_stats_scape(Start_Year,End_Year):
    page = 0
    login_url = "https://stathead.com/users/login.cgi"
    user_name = os.environ.get('statheadusername')
    password = os.environ.get('statheadpassword')
    payload = {
        'username': user_name,
        'password': password
    }
    url = f"https://stathead.com/football/pgl_finder.cgi?request=1&game_num_max=99&week_num_max=99&order_by=all_td\
&season_start=1&qb_gwd=0&order_by_asc=0&qb_comeback=0&week_num_min=0&game_num_min=0&year_min={Start_Year}&match=game\
&year_max={End_Year}&season_end=-1\&age_min=0&game_type=R&age_max=99&positions[]=qb&positions[]=rb&positions[]=wr\
&positions[]=te&positions[]=e&positions[]=t&positions[]=g&positions[]=c&positions[]=ol&positions[]=dt&positions[]=de\
&positions[]=dl&positions[]=ilb&positions[]=olb&positions[]=lb&positions[]=cb&positions[]=s&positions[]=db&positions[]=k\
&positions[]=p&cstat[1]=two_pt_md&ccomp[1]=gt&cval[1]=0&cstat[2]=rush_att&ccomp[2]=gt&cval[2]=0&cstat[3]=pass_att\
&ccomp[3]=gt&cval[3]=0&cstat[4]=punt_ret_td&ccomp[4]=gt&cval[4]=0&cstat[5]=kick_ret_td&ccomp[5]=gt&cval[5]=0\
&cstat[6]=targets&ccomp[6]=gt&cval[6]=0&cstat[7]=touches&ccomp[7]=gt&cval[7]=0&cstat[8]=fumbles&ccomp[8]=gt&cval[8]=0&offset="
    
    with requests.Session() as session:
    s = session.post(login_url, data=payload)
    while page < 61500:
        page1 = str(page)
        website = session.get(url+page1).text
        soup = BeautifulSoup(website, 'html')

        table = soup.find('table', attrs={'class': 'sortable', 'id': 'results'})

        table_headers = [header.text for header in table.find('thead').find_all('th')]
        table_rows = table.find_all('tr')

        final_data = []
        for tr in table_rows:
            td = tr.find_all('td')
            row = [tr.text for tr in td]
            final_data.append(row)
        df = pd.DataFrame(final_data, columns=table_headers[10:])
        df.to_csv(f'player_stats_{Start_Year}_{End_Year}.csv',index=False)
        page += 100
        print(page)

In [None]:
team_snaps_scrape(2017,2019)

In [None]:
player_snap_scrape(2017,2019)

In [None]:
injury_reports_scrape(2017,2019)

In [None]:
player_snap_clean(2017,2019)

In [None]:
injury_reports_clean(2017,2019)

In [None]:
player_stats_scape(2017,2019)