In [12]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup
import requests
import os
from dotenv import load_dotenv
load_dotenv()

def team_snaps_scrape(Start_Year,End_Year):
    page = 0
    stat_login_url = "https://stathead.com/users/login.cgi"
    stat_user_name = os.environ.get('statheadusername')
    stat_password = os.environ.get('statheadpassword')
    stat_payload = {
        'username': stat_user_name,
        'password': stat_password
    }
    stat_url = f"https://stathead.com/football/tgl_finder.cgi?request=1&temperature_gtlt=lt&game_num_max=99&week_num_max=99&order_by=vegas_line&match=game&year_max={str(End_Year)}&order_by_asc=0&week_num_min=0&game_type=R&game_num_min=0&year_min={str(Start_Year)}&offset="
    
    with requests.Session() as session:
        
        s = session.post(stat_login_url, data=stat_payload)
        
        while page < 10000:
            
            page1 = str(page)
            website = session.get(stat_url+page1).text
            soup = BeautifulSoup(website, 'html')
            table = soup.find('table', attrs={'class': 'sortable', 'id': 'results'})

            table_headers = [header.text for header in table.find('thead').find_all('th')]
            table_rows = table.find_all('tr')

            final_data = []
            
            for tr in table_rows:
                td = tr.find_all('td')
                row = [tr.text for tr in td]
                final_data.append(row)
                
            df = pd.DataFrame(final_data, columns=table_headers[3:])
            df.to_csv(f'nflteamsnaps_{Start_Year}_{End_Year}.csv',index=False)
            page += 100
            print(page)

def player_snap_scrape(Start_Year,End_Year):
    
    ENDPOINT = "https://www.fantasypros.com/nfl/reports/snap-counts/?year={year}"

    final_df = pd.DataFrame()

    for year in range(Start_Year, End_Year+1):
        res = requests.get(ENDPOINT.format(year=year))

        soup = BeautifulSoup(res.content, 'html.parser')

        table = soup.find('table', {'id': 'data'})

        df = pd.read_html(str(table))[0]

        df.columns = df.columns[:3].tolist() + [f'Week {i}' for i in df.columns[3:-2]] + df.columns[-2:].tolist()

        df['Year'] = year

        cols = df.columns[:3].tolist() + df.columns[-1:].tolist() + df.columns[3:-1].tolist()
        df = df[cols]

        final_df = pd.concat([final_df, df])

    final_df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\snapcounts_{Start_Year}_{End_Year}.csv')
    print('Done')

def player_snap_clean(Start_Year,End_Year):
    
    dfname = []
    
    for num in range(Start_Year,End_Year+1):
        dfname.append(f'{num}_Offense')
        dfname.append(f'{num}_Defense')
        data = {key: pd.read_csv(f'FantasyPros_Fantasy_Football_{key}_Snap_Counts.csv') for key in dfname}

    yr = Start_Year
        
    while yr < End_Year+1:
        for key in data:
            data[key].insert(loc=3,column='Year',value=yr,allow_duplicates=True)
            data[key]['Year'] = data[key]['Year'].astype(int)
            yr += 0.5

    for key in data:
        data[key] = pd.melt(data[key],id_vars=['Player','Pos','Team','Year'],var_name='Week', value_name='Snaps')
        data[key].replace({'Team':\
                  {'GB':'GNB','JAC':'JAX','KC':'KAN','NE':'NWE','NO':'NOR','SF':\
                   'SFO','TB':'TAM','Multi':None,'LV':'OAK',},'Snaps':{'bye':None}},inplace=True)
        data[key].dropna(thresh=3,inplace=True)

    player = pd.concat(data.values(), ignore_index=True)

    team = pd.read_csv(f'nflteamsnaps_{Start_Year}_{End_Year}.csv')
    team.dropna(thresh=5,inplace=True)
    team.drop(['LTime','Y/P'],axis=1,inplace=True)
    team.rename(columns={'Unnamed: 5':'Away_Home','Time.1':'ToG','Tm':'Team'},inplace=True)
    team['Date'] = pd.to_datetime(team['Date'])
    team['ToG'] = pd.to_datetime(team['ToG'])
    team['Away_Home'].fillna(value='Home',inplace=True)
    team['Away_Home'].replace('@','Away',inplace=True)
    team['Away_Home'].replace('nan','Home',inplace=True)
    team['TO'].fillna(value=0,inplace=True)
    team.insert(loc=2,column='Month',value=team['Date'].dt.month)
    df_bridge = team['ToP'].str.split(":",expand=True)
    team['ToP'] = (df_bridge[0].astype(int)*60)+df_bridge[1].astype(int)
    team['Week'] = team['Week'].astype(str)

    player_snaps = pd.merge(left=player,right=team,how='outer',left_on=['Team','Year','Week'],right_on=['Team','Year','Week'])
    
    player_snaps.to_csv(f'player_snaps_{Start_Year}_{End_Year}.csv',index=False)
    print("Done")

def injury_reports_scrape(Start_Year,End_Year):
    
    #list of the NFL teams for url purposes
    teams = ['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal', 'den', 'det', 'gnb','htx','clt','jax','kan',
             'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']
    years = []
    
    #list of years to pull data for url purposes
    for yr in range(Start_Year,End_Year+1):
        years.append(yr)

    #starting points to iterate through
    team = 0
    year = 0
    dfname = []
    
    while team < 32:
        
        dfname.append(f'{teams[team]}_{years[year]}_injuryreport')
        
        #url for web scraping
        url = f'https://www.pro-football-reference.com/teams/{teams[team]}/{years[year]}_injuries.htm'
        
        #opening website
        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'lxml')
        
        #finding table
        table = soup.find('table', attrs={'class': 'sortable', 'id': 'team_injuries'})
        table_rows = table.find_all('tr')

        #scraping the data
        final_data = []
        for tr in table_rows:
            td = tr.find_all(['th','td'])
            row = [tr['data-tip'] if tr.has_attr("data-tip") else tr.text for tr in td]
            final_data.append(row)

        #creatingdataframe to save
        dfdata = final_data[1:]
        data_body = [[dfdata[j][i] for j in range(len(dfdata))] for i in range(len(dfdata[0]))]
        data = {key: pd.DataFrame(data_body,final_data[0]).T for key in dfname}
        
        key = f'{teams[team]}_{years[year]}_injuryreport'
        
        data[key].insert(loc=1,column='Team',value=teams[team],allow_duplicates=True)
        data[key].insert(loc=2,column='Year',value=years[year],allow_duplicates=True)
        data[key].to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\injury_reports\{team}_{year}_injuryreport.csv',index=True)

        #advancing through url
        if year < len(years)-1:
            year += 1

        else:
            year = 0
            team += 1
    print('Done')

def injury_reports_clean(Start_Year,End_Year):
    dfname = []
    teams = ['crd','atl','rav','buf','car','chi','cin','cle','dal','den','det','gnb','htx','clt','jax','kan',
                     'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']

    for team in teams:
        for year in range(Start_Year,End_Year+1):
            dfname.append(f'{team}_{year}_injuryreport')
            data = {key: pd.read_csv(f'{key}.csv') for key in dfname}

    for key in data:
        data[key].drop('Unnamed: 0',axis=1,inplace=True)
        data[key] = pd.melt(data[key],id_vars=['Player','Team','Year'],var_name='Date', value_name='Status')
        data[key][['Date','Opp']] = data[key].Date.str.split("vs. ",expand=True)
        data[key][['Status','Injury']] = data[key].Status.str.split(":",expand=True)
        data[key]['Date'] = data[key]['Date'].astype(str)+'/'+data[key]['Year'].astype(str)
        data[key]['Date'] = pd.to_datetime(data[key]['Date'])
        data[key].replace({'Team':\
                           {'crd':'ARI', 'atl':'ATL', 'rav':'BAL', 'buf':'BUF', 'car':'CAR', 'chi':'CHI', 'cin':'CIN',\
                            'cle':'CLE', 'dal':'DAL', 'den':'DEN', 'det':'DET', 'gnb':'GNB','htx':'HOU','clt':'IND',\
                            'jax':'JAX','kan':'KAN','sdg':'LAC','ram':'LAR','mia':'MIA','min':'MIN','nor':'NOR','nwe':'NWE',\
                            'nyg':'NYG','nyj':'NYJ','rai':'OAK','phi':'PHI','pit':'PIT','sea':'SEA','sfo':'SFO','tam':'TAM',\
                            'oti':'TEN','was':'WAS'}},inplace=True)
        data[key].dropna(thresh=3,inplace=True)

    nfl_injury = pd.concat(data.values(),ignore_index=True)
    nfl_injury.to_csv(f'NFL_{Start_Year}_{End_Year}_Injuryreport.csv',index=False)
    print('Done')

def player_stats_scape(Start_Year,End_Year):
    
    pro_login_url = "https://secure.fantasypros.com/accounts/login/"
    pro_user_name = os.environ.get('fantasyprosusername ')
    pro_password = os.environ.get('fantasyprospassword ')
    pro_payload = {
        'username': pro_user_name,
        'password': pro_password
    }
    
    position = ['qb','rb','wr','te','dl','lb','db']
    weeks = []
    years = []
    dfname = []

    for wk in range(1,2):
        weeks.append(wk)

    for yr in range(2017,2018):
        years.append(yr)
    
    with requests.Session() as session:
        
        s = session.post(pro_login_url, data=pro_payload)
        
        for pos in position:
            for week in weeks:
                for year in years:
                    
                    dfname.append(f'player_stats_{pos}_{year}_{week}_scrape')
                    
                    url = f"https://www.fantasypros.com/nfl/stats/{pos}.php?league=3836944&year={year}&week={week}&range=week"
            
                    r = requests.get(url)
                    soup = BeautifulSoup(r.content, 'html')

                    table = soup.find('table', attrs={'id': 'data','class': 'table'})
                    table_headers = [header.text for header in table.find('thead').find_all('th')]
                    table_rows = table.find_all('tr')

                    final_data = []

                    for tr in table_rows:
                        td = tr.find_all('td')
                        row = [tr.text for tr in td]
                        final_data.append(row)

                    data = {key: pd.DataFrame(final_data[1:], columns=table_headers) for key in dfname}

                    key = f'player_stats_{pos}_{year}_{week}_scrape'

                    #data[key].to_csv(f'player_stats_{position[pos]}_{years[year]}_{weeks[week]}.csv',index=False)
                    print(key)
        print(data)
        print('Done')

In [13]:
Start = 2017
End = 2019

In [None]:
#team_snaps_scrape(Start,End)

In [14]:
"player_snap_scrape(Start,End)""

Done


In [None]:
injury_reports_scrape(Start,End)

In [None]:
#player_snap_clean(Start,End)

In [None]:
#injury_reports_clean(Start,End)

In [None]:
#player_stats_scape(Start,End)

In [None]:
ENDPOINT = "https://www.fantasypros.com/nfl/reports/snap-counts/?year={year}"

final_df = pd.DataFrame()

for year in range(Start_Year, End_Year+1):
    res = requests.get(ENDPOINT.format(year=year))

    soup = BeautifulSoup(res.content, 'html.parser')

    table = soup.find('table', {'id': 'data'})

    df = pd.read_html(str(table))[0]

    df.columns = df.columns[:3].tolist() + [f'Week {i}' for i in df.columns[3:-2]] + df.columns[-2:].tolist()

    df['Year'] = year

    cols = df.columns[:3].tolist() + df.columns[-1:].tolist() + df.columns[3:-1].tolist()
    df = df[cols]

    final_df = pd.concat([final_df, df])

final_df.to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\snapcounts\snapcounts_{Start_Year}_{End_Year}.csv')
print('Done')

In [None]:
teams = ['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal', 'den', 'det', 'gnb','htx','clt','jax','kan',
         'sdg','ram','mia','min','nor','nwe','nyg','nyj','rai','phi','pit','sea','sfo','tam','oti','was']
ENDPOINT = 'https://www.pro-football-reference.com/teams/{team}/{year}_injuries.htm'

for year in range(Start_Year, End_Year+1):
    for team in teams:
        res = requests.get(ENDPOINT.format(year=year,team=team))
        
        soup = BeautifulSoup(r.content, 'lxml')

        table = soup.find('table', attrs={'class': 'sortable', 'id': 'team_injuries'})
        table_rows = table.find_all('tr')

        final_data = []
        for tr in table_rows:
            td = tr.find_all(['th','td'])
            row = [tr['data-tip'] if tr.has_attr("data-tip") else tr.text for tr in td]
            final_data.append(row)

        dfdata = final_data[1:]
        data_body = [[dfdata[j][i] for j in range(len(dfdata))] for i in range(len(dfdata[0]))]
        data = {key: pd.DataFrame(data_body,final_data[0]).T for key in dfname}

        key = f'{teams[team]}_{years[year]}_injuryreport'

        data[key].insert(loc=1,column='Team',value=teams[team],allow_duplicates=True)
        data[key].insert(loc=2,column='Year',value=years[year],allow_duplicates=True)
        data[key].to_csv(rf'C:\Users\cudde\OneDrive\Podcasting\Fantasy Sidelines\Injury Data Python\Data_Collect_Clean\injury_reports\{team}_{year}_injuryreport.csv',index=True)

print('Done')