In [1]:
import requests
import pandas as pd
import json
import config
import time
import os.path
from pathlib import Path

In [2]:
# API key
key = config.sports_key

### Define the functions required to execute the get requests and loop over the weeks of the season to build the dataframe

Get the data for each week of a season.  Save the data as a .csv file with the name being the season parameter passed to the API.

In [3]:

def get_players_by_week(season, week):
    
    """
    Function takes in two parameters:
    - Season: Input in the format '2020REG' or '2020POST' (string)
    - Week: Week of play (int)
    
    Outputs a json object of every player's stat line for the indicated week.
    """

    # Define the URL being used to access the data
    URL = f'https://api.sportsdata.io/api/nfl/fantasy/json/PlayerGameStatsByWeek/{season}/{week}'

    # Format the key for use with the URL
    # get request
    r = requests.get(URL, headers = {'Ocp-Apim-Subscription-Key': f'{key}'})
    # Convert response to JSON object
    data = r.json()

    return data

def get_scores(season, week):
    
    """
    Function takes in two parameters:
    - Season: Input in the format '2020REG' or '2020POST' (string)
    - Week: Week of play (int)
    
    Outputs a json object of game scores for the indicated week.
    """

    # Define the URL being used to access the data
    URL = f'https://api.sportsdata.io/api/nfl/odds/json/ScoresByWeek/{season}/{week}'

    # Format the key for use with the URL
    # get request
    r = requests.get(URL, headers = {'Ocp-Apim-Subscription-Key': f'{key}'})
    # Convert response to JSON object
    data = r.json()

    return data

def get_season_stats(season, weeks_to_get):
    
    """
    Take in two parameters:
    - season: Input in the format '2018REG'
    - weeks_to_get: Number of weeks of data to get.  Will always start at week 1
    
    Sends a request for player stats for each week of that season.  Converts response 
    to a dataframe using json_normalize().  Each subsequent week's dataframe is 
    appended to the first.
    """
    
    # Initialize an empty dataframe
    season_data = pd.DataFrame()
    
    # Loop over the weeks in the season, turning the response to a dataframe object
    for i in range(1, weeks_to_get + 1):
        print(f'Getting stats for week {i} of {season}.')
        
        # Store JSON object as temporary variable
        temp = get_players_by_week(season, i)
        # Convert JSON object to dataframe
        temp_df = pd.json_normalize(temp)
        # Append temp_df to season_data
        season_data = season_data.append(temp_df)
        print('Sleeping for 300 seconds')
        
        # Respect the 5 minute interval between requests
        
        time.sleep(300)
    
    # Write the data to CSV for safe keeping
    print('Done looping.  Saving dataframe to a csv file.')
    season_data.to_csv(f'Data/{season}.csv')
    print(f'.csv saved at ".../Data/{season}.csv"')
    return season_data


def get_weekly_scores(seasons, weeks_to_get):
    
    """
    Take in two parameters:
    - seasons: Input a list of seasons in the format ['2018REG', '2019REG']
    - weeks_to_get: Number of weeks of data to get.  Will always start at week 1
    
    Sends a request for scores for each week in each season listed.  Converts response 
    to a dataframe using json_normalize().  Each subsequent week's dataframe is 
    appended to the first.
    """
    # Initialize an empty datafrane
    scores = pd.DataFrame()
    
    for season in seasons:
        for i in range(1, weeks_to_get + 1):
            print(f'Getting stats for week {i} of {season}.')
            
            # Store json obhect as temporary variable
            temp = get_scores(season, i)
            # Convert json object to pandas dataframe
            temp_df = pd.json_normalize(temp)
            # Append temp_df to scores
            scores = scores.append(temp_df)
            print('Sleeping for 30 Seconds')
            
            # Respect 30s interval between requests
            
            time.sleep(30)
            
    # Write the data to CSV for safe keeping
    print('Done looping.  Saving dataframe to a csv file.')
    scores.to_csv(f'Data/weekly_scores.csv')
    print(f'.csv saved at ".../Data/weekly_scores.csv"')
    return scores

### Get Weekly Player Data

In [4]:
# If path exists, open file.
# If not, get weekly player stats for the 2018 regular season
df_2018 = Path('Data/2018REG.csv')
if df_2018.is_file():
    df_2018 = pd.read_csv(df_2018)
    print('Data loaded successfully from .csv')
else:
    print(f'Data does not exist at the current directory.')
    print('Fetching data...')
    df_2018 = get_season_stats('2018REG', 17)

Data loaded successfully from .csv


In [5]:
# If path exists, open file.
# If not, get weekly player stats for the 2019 regular season
df_2019 = Path('Data/2019REG.csv')
if df_2019.is_file():
    df_2019 = pd.read_csv(df_2019)
    print('Data loaded successfully from .csv')
else:
    print(f'Data does not exist at the current directory.')
    print('Fetching data...')
    df_2019 = get_season_stats('2019REG', 17)

Data loaded successfully from .csv


In [6]:
# If path exists, open file.
# If not, get weekly player stats for the 2020 regular season
df_2020 = Path('Data/2020REG.csv')
if df_2020.is_file():
    df_2020 = pd.read_csv(df_2020)
    print('Data loaded successfully from .csv')
else:
    print(f'Data does not exist at the current directory.')
    print('Fetching data...')
    df_2020= get_season_stats('2020REG', 17)

Data loaded successfully from .csv


In [7]:
# If path exists, open file.
# If not, get weekly player stats for the 2019 regular season
df_2021 = Path('Data/2021REG.csv')
if df_2021.is_file():
    df_2021 = pd.read_csv(df_2021)
    print('Data loaded successfully from .csv')
else:
    print(f'Data does not exist at the current directory.')
    print('Fetching data...')
    df_2021 = get_season_stats('2021REG', 10)

Data loaded successfully from .csv


In [8]:
teams = df_2021['Team'].unique().tolist()

### Get weekly score data

Was able to find weekly score data that included total yardage per team elsewhere, at https://www.pro-football-reference.com <br>
As such I downloaded the data from there.

In [9]:
scores_2018 = pd.read_csv('Data/2018REG_scores.csv')
scores_2019 = pd.read_csv('Data/2019REG_scores.csv')
scores_2020 = pd.read_csv('Data/2020REG_scores.csv')
scores_2021 = pd.read_csv('Data/2021REG_scores.csv')

df_scores = scores_2018.append(scores_2019).append(scores_2020).append(scores_2021)

In [10]:
def home_away(df):
    """
    Assign teams as being either the home or away team.
    
    Input:
        - df: Dataframe where we want to make the assignment
    Output:
        - Two new columns titled 'HomeTeam' and 'AwayTeam'
        """
    
    if df['Unnamed: 5'] == '@':
        df['HomeTeam'] = df['Loser/tie']
        df['AwayTeam'] = df['Winner/tie']
    else:
        df['HomeTeam'] = df['Winner/tie']
        df['AwayTeam'] = df['Loser/tie']
    return df

def match_string(df):
    """
    Make a string that can be used to match team game data to player game data
    
    Input:
    - df: The dataframe to perform the operation on
     
    Output:
    - A string of format '20181TBNO'
        - Where Season-Week-AWAY-HOME is the format
    """
    return str(df['Season']) + str(df['Week']) + str(df['AwayTeam']) + str(df['HomeTeam'])

Score data needs to be formatted such that it can be merged with player data.


In [11]:
# Make a dictionary of abbreviations to full team names
abbrev_to_team = {'ARI' : 'Arizona Cardinals',
 'KC': 'Kansas City Chiefs',
 'NO': 'New Orleans Saints',
 'DET': 'Detroit Lions',
 'PHI': 'Philadelphia Eagles',
 'DAL': 'Dallas Cowboys',
 'TB': "Tampa Bay Buccaneers",
 'SEA': 'Seattle Seahawks',
 'LAR': 'Los Angeles Rams',
 'LV' : 'Las Vegas Raiders', 
 'OAK': 'Oakland Raiders',
 'HOU': 'Houston Texans',
 'SF': 'San Francisco 49ers',
 'MIN': 'Minnesota Vikings',
 'NYJ': 'New York Jets',
 'NYG': 'New York Giants',
 'CIN': 'Cincinnati Bengals',
 'DEN': 'Denver Broncos',
 'CLE': 'Cleveland Browns',
 'JAX': 'Jacksonville Jaguars',
 'CAR': 'Carolina Panthers',
 'IND': 'Indianapolis Colts',
 'BAL': 'Baltimore Ravens',
 'CHI': 'Chicago Bears',
 'BUF': 'Buffalo Bills',
 'PIT': 'Pittsburgh Steelers',
 'MIA': 'Miami Dolphins',
 'NE': 'New England Patriots',
 'LAC': 'Los Angeles Chargers',
 'TEN': 'Tennessee Titans',
 'WAS': 'Washington Football Team',
 'ATL': 'Atlanta Falcons',
 'GB': 'Green Bay Packers'}

# Make a dictionary of team name to abbreviations
team_to_abbrev = dict([(value, key) for key, value in abbrev_to_team.items()])
team_to_abbrev['Washington Redskins'] = 'WAS'

In [12]:
df_scores = df_scores.apply(lambda x: home_away(x), axis = 1)
df_scores['HomeTeam'].replace(team_to_abbrev, 
                               inplace = True)
df_scores['AwayTeam'].replace(team_to_abbrev,
                                inplace = True)
df_scores['Season'] = pd.DatetimeIndex(df_scores['Date']).year
df_scores['MatchString'] = df_scores.apply(lambda x: match_string(x), axis = 1)
df_scores.head()

Unnamed: 0,Week,Day,Date,Time,Winner/tie,Unnamed: 5,Loser/tie,Unnamed: 7,PtsW,PtsL,YdsW,TOW,YdsL,TOL,HomeTeam,AwayTeam,Season,MatchString
0,1,Thu,2018-09-06,8:20PM,Philadelphia Eagles,,Atlanta Falcons,boxscore,18.0,12.0,232.0,2.0,299.0,1.0,PHI,ATL,2018,20181ATLPHI
1,1,Sun,2018-09-09,1:00PM,Tampa Bay Buccaneers,@,New Orleans Saints,boxscore,48.0,40.0,529.0,0.0,475.0,2.0,NO,TB,2018,20181TBNO
2,1,Sun,2018-09-09,1:00PM,Baltimore Ravens,,Buffalo Bills,boxscore,47.0,3.0,369.0,1.0,153.0,2.0,BAL,BUF,2018,20181BUFBAL
3,1,Sun,2018-09-09,1:00PM,Cincinnati Bengals,@,Indianapolis Colts,boxscore,34.0,23.0,330.0,2.0,380.0,2.0,IND,CIN,2018,20181CININD
4,1,Sun,2018-09-09,1:00PM,Pittsburgh Steelers,@,Cleveland Browns,boxscore,21.0,21.0,472.0,6.0,327.0,1.0,CLE,PIT,2018,20181PITCLE


In [14]:
df_scores.drop(columns = ['Week', 'Day', 'Date'])

33

In [15]:
df_scores.to_csv('Data/game_scores.csv')