In [2]:
# Video: https://www.youtube.com/watch?v=2JDR6jv0fGA
import pandas as pd
import numpy as np

seasons = [str(season) for season in range(2013, 2023)]

full_team_names = {
    'crd': 'Arizona Cardinals', 'atl': 'Atlanta Falcons', 'rav': 'Baltimore Ravens', 'buf': 'Buffalo Bills', 'car': 'Carolina Panthers', 
    'chi': 'Chicago Bears', 'cin': 'Cincinnati Bengals', 'cle': 'Cleveland Browns', 'dal': 'Dallas Cowboys', 'den': 'Denver Broncos', 
    'det': 'Detroit Lions', 'gnb': 'Green Bay Packers', 'htx': 'Houston Texans', 'clt': 'Indianapolis Colts', 'jax': 'Jacksonville Jaguars', 
    'kan': 'San Diego Chargers', 'sdg': 'Los Angeles Chargers', 'ram': 'Los Angeles Rams', 'rai': 'Las Vegas Raider', 'mia': 'Miami Dolphins', 
    'min': 'Minnesota Vikings', 'nwe': 'New England Patriots', 'nor': 'New Orleans Saints', 'nyg': 'New York Giants', 'nyj': 'New York Jets', 
    'phi': 'Philadelphia Eagles', 'pit': 'Pittsburgh Steelers', 'sea': 'Seattle Seahawks', 'sfo': 'San Francisco 49ers', 'tam': 'Tampa Bay Buccaneers', 'oti': 'Tennessee Titans', 'was': 'Washington Redskins'
}
teams = list(full_team_names.keys())
print(len(seasons))

10


In [3]:
import random
import time
import os

# code block to get the initial game boxscores from PFR and convert it to a csv file
boxscore_df = pd.DataFrame()
folder_path = 'csv_files'
file_name = 'gamelogs_2013-2023.csv'

if not os.path.exists(folder_path):
    os.makedirs(folder_path)

file_path = os.path.join(folder_path, file_name)

if not os.path.exists(file_path):
    for season in seasons:
        for team in teams:
            # getting the gamelog for every team in every year
            url = "https://www.pro-football-reference.com/teams/" + team + "/" + season + "/gamelog/"
            print(url)
            off_df = pd.read_html(url, header=1, attrs={'id':'gamelog' + season})[0]
            def_df = pd.read_html(url, header=1, attrs={'id':'gamelog_opp' + season})[0]
            team_df = pd.concat([off_df, def_df], axis=1)
            
            team_df.insert(loc=0, column='Season', value=season)
            team_df.insert(loc=2, column='Team', value=full_team_names[team])
            
            boxscore_df = pd.concat([boxscore_df, team_df], ignore_index=True)
            
            time.sleep(random.randint(4, 5))
        
        print(boxscore_df)
    boxscore_df.to_csv(file_path, index=False)
else:
    boxscore_df = pd.read_csv(file_path)

In [4]:
# Cleans up the dataframe, makes sure to name some columns
new_columns = {'Unnamed: 4': 'Win', 'Unnamed: 6':'Home', 'Tm':'Off_Pts', 'Opp.1':'Def_Pts'}
boxscore_df = boxscore_df.rename(columns=new_columns)

In [5]:
# Replacing values in columns for Win and Home
boxscore_df['Win'] = boxscore_df['Win'].apply(lambda x: 1 if x == 'W' else(0 if x == 'L' else 0.5))
boxscore_df['Home'] = boxscore_df['Home'].apply(lambda x: 0 if x == '@' else 1)

# Replacing values in the OT column of the dataframe
boxscore_df['OT'] = boxscore_df['OT'].apply(lambda x: 1 if x == 'OT' else 0)

In [6]:
matchup_df = boxscore_df[:]
print(matchup_df.columns)
# Creating a new dataframe to use

Index(['Season', 'Week', 'Team', 'Day', 'Date', 'Unnamed: 3', 'Win', 'OT',
       'Home', 'Opp', 'Off_Pts', 'Def_Pts', 'Cmp', 'Att', 'Yds', 'TD', 'Int',
       'Sk', 'Yds.1', 'Y/A', 'NY/A', 'Cmp%', 'Rate', 'Att.1', 'Yds.2', 'Y/A.1',
       'TD.1', 'FGM', 'FGA', 'XPM', 'XPA', 'Pnt', 'Yds.3', '3DConv', '3DAtt',
       '4DConv', '4DAtt', 'ToP', 'Week.1', 'Day.1', 'Date.1', 'Unnamed: 3.1',
       'Unnamed: 4.1', 'OT.1', 'Unnamed: 6.1', 'Opp.2', 'Tm.1', 'Opp.1.1',
       'Cmp.1', 'Att.2', 'Yds.4', 'TD.2', 'Int.1', 'Sk.1', 'Yds.1.1', 'Y/A.2',
       'NY/A.1', 'Cmp%.1', 'Rate.1', 'Att.1.1', 'Yds.2.1', 'Y/A.1.1', 'TD.1.1',
       'FGM.1', 'FGA.1', 'XPM.1', 'XPA.1', 'Pnt.1', 'Yds.3.1', '3DConv.1',
       '3DAtt.1', '4DConv.1', '4DAtt.1', 'ToP.1'],
      dtype='object')


In [7]:
# Dropping columns which would be available after the game in this cell
matchup_df = matchup_df.drop(matchup_df.columns[10:], axis=1)
matchup_df = matchup_df.drop(matchup_df.columns[[5, 7]], axis=1)

In [8]:
print(matchup_df.columns)

Index(['Season', 'Week', 'Team', 'Day', 'Date', 'Win', 'Home', 'Opp'], dtype='object')


In [9]:
print(matchup_df)

      Season  Week                 Team  Day          Date  Win  Home  \
0       2013     1    Arizona Cardinals  Sun   September 8  0.0     0   
1       2013     2    Arizona Cardinals  Sun  September 15  1.0     1   
2       2013     3    Arizona Cardinals  Sun  September 22  0.0     0   
3       2013     4    Arizona Cardinals  Sun  September 29  1.0     0   
4       2013     5    Arizona Cardinals  Sun     October 6  1.0     1   
...      ...   ...                  ...  ...           ...  ...   ...   
5721    2023    13  Washington Redskins  Sun    December 3  0.5     1   
5722    2023    15  Washington Redskins  Sun   December 17  0.5     0   
5723    2023    16  Washington Redskins  Sun   December 24  0.5     0   
5724    2023    17  Washington Redskins  Sun   December 31  0.5     1   
5725    2023    18  Washington Redskins  Sun     January 7  0.5     1   

                       Opp  
0           St. Louis Rams  
1            Detroit Lions  
2       New Orleans Saints  
3     T

In [12]:
# Will need to implement so it updates weekly

# https://www.pro-football-reference.com/teams/mia/2023.htm#team_stats
# Example link for team rankings

import random
import time
import os

# code block to get the team rankings(Offense/Defense) from PFR and convert it to a csv file
rankings_df = pd.DataFrame()
folder_path = 'csv_files'
file_name = 'rankings_2013-2023.csv'

if not os.path.exists(folder_path):
    os.makedirs(folder_path)

file_path = os.path.join(folder_path, file_name)

if not os.path.exists(file_path):
    for season in seasons:
        for team in teams:
            rks_url = "https://www.pro-football-reference.com/teams/" + team + "/" + season +".htm#team_stats"
            print(rks_url)
            tm_rk_df = pd.read_html(rks_url, header=1, attrs={'id':'team_stats'})[0]
            cvn_url = "https://www.pro-football-reference.com/teams/nwe/2023.htm#team_conversions"
            cvn_df = pd.read_html(rks_url, header=1, attrs={'id':'team_conversions'})[0]
            rkgs_df = pd.concat([tm_rk_df, cvn_df], axis=1)
            rkgs_df.insert(loc=0, column='Season', value=season)
            rkgs_df.insert(loc=1, column='Team', value=full_team_names[team])
            rkgs_df = rkgs_df.drop([0, 1])
            
            time.sleep(random.randint(4, 5))
        
        print(rkgs_df)
    rkgs_df.to_csv(file_path, index=False)
else:
    rkgs_df = pd.read_csv(file_path)

https://www.pro-football-reference.com/teams/crd/2013.htm#team_stats
https://www.pro-football-reference.com/teams/atl/2013.htm#team_stats
https://www.pro-football-reference.com/teams/rav/2013.htm#team_stats
https://www.pro-football-reference.com/teams/buf/2013.htm#team_stats
https://www.pro-football-reference.com/teams/car/2013.htm#team_stats
https://www.pro-football-reference.com/teams/chi/2013.htm#team_stats
https://www.pro-football-reference.com/teams/cin/2013.htm#team_stats
https://www.pro-football-reference.com/teams/cle/2013.htm#team_stats
https://www.pro-football-reference.com/teams/dal/2013.htm#team_stats
https://www.pro-football-reference.com/teams/den/2013.htm#team_stats
https://www.pro-football-reference.com/teams/det/2013.htm#team_stats
https://www.pro-football-reference.com/teams/gnb/2013.htm#team_stats
https://www.pro-football-reference.com/teams/htx/2013.htm#team_stats
https://www.pro-football-reference.com/teams/clt/2013.htm#team_stats
https://www.pro-football-reference

KeyboardInterrupt: 