In [3]:

from urllib.parse import urlparse, unquote # to process a URL and get the filename
from bs4 import BeautifulSoup # to parse the HTML
from tqdm import tqdm # to create the progress bar
import pandas as pd # to read in the spreadsheet
import requests # to make the request to the website
import os # to get directory information
import subprocess # to run shell commands
import json 
import nflscraPy
import time

In [None]:
with open('config.json', 'r') as f:
    config = json.load(f)

data_folder = config['data_folder']
start_year = config['start_year']
end_year   = config['end_year']

for season in range(start_year, end_year + 1):
    season_str = str(season)
    if season_str == '2017':
        break
    
    season_folder = os.path.join(data_folder, season_str)
    os.makedirs(season_folder, exist_ok=True)
    
    gamelogs = nflscraPy._gamelogs(season)
    gamelog_stats_list = []
    gamelog_meta_list = []
    processed_weeks = set()
    
    for idx, row in gamelogs.iterrows():
        current_week = row['week']
        processed_weeks.add(current_week)
        if len(processed_weeks) > 1:
            break

        link = row['boxscore_stats_link']
        game_stats = nflscraPy._gamelog_statistics(link)
        gamelog_meta = nflscraPy._gamelog_metadata(link)
        time.sleep(1)
        
        gamelog_stats_list.append(game_stats)
        gamelog_meta_list.append(gamelog_meta)
    
    unique_weeks = sorted(gamelogs['week'].unique())
    desired_weeks = unique_weeks[:2]
    filtered_gamelogs = gamelogs[gamelogs['week'].isin(desired_weeks)]

    stats_df = pd.concat(gamelog_stats_list, ignore_index=True)
    meta_df = pd.concat(gamelog_meta_list, ignore_index=True)
    season_data = pd.merge(filtered_gamelogs, stats_df, on='boxscore_stats_link', how='left')
    season_data = pd.merge(season_data, meta_df, on='boxscore_stats_link', how='left')
    season_data['season_year'] = season

    weeks = season_data['week'].unique()
    for week in weeks:
        week_folder = os.path.join(season_folder, f"Week {week}")
        os.makedirs(week_folder, exist_ok=True)
        
        week_data = season_data[season_data['week'] == week]
        week_file = os.path.join(week_folder, f"week_{week}_data.csv")
        week_data.to_csv(week_file, index=False)


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Season Gamelogs For: https://www.pro-football-reference.com/years/2014/games.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Gamelog Statistics For: https://www.pro-football-reference.com/boxscores/201409040sea.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Gamelog Metadata For: https://www.pro-football-reference.com/boxscores/201409040sea.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
P

  meta_df = pd.concat(gamelog_meta_list, ignore_index=True)


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Season Gamelogs For: https://www.pro-football-reference.com/years/2015/games.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Gamelog Statistics For: https://www.pro-football-reference.com/boxscores/201509100nwe.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Gamelog Metadata For: https://www.pro-football-reference.com/boxscores/201509100nwe.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
P

  meta_df = pd.concat(gamelog_meta_list, ignore_index=True)


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Season Gamelogs For: https://www.pro-football-reference.com/years/2016/games.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Gamelog Statistics For: https://www.pro-football-reference.com/boxscores/201609080den.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Processing Gamelog Metadata For: https://www.pro-football-reference.com/boxscores/201609080den.htm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
P

  meta_df = pd.concat(gamelog_meta_list, ignore_index=True)
