### Below is two sets of code -- for simplicity I think the second set is more representative of my project task so feel free to ignore the first

#### the first is a series of operations that ends with the dataframe on line 19 and has:
#### 1. player attributes including an id that is consistent across years
#### 2. final fantasy numbers for 2023, actual statistics for 2023, and projected statistics for 2024 (not projected fantasy #s)

### The second set starts at line 28 and is a more general api request
#### 1. extracts rushing, passing, or receiving leaders by year 
#### The goal is to compile individual player statistics for their whole career
#### I'm not quite sure how to do this though as it would require a hierarchical/multilayered index with the year or player being the top layer


In [53]:
import requests
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
season_id = 2024

In [54]:
# ESPN api url
url = "https://lm-api-reads.fantasy.espn.com/apis/v3/games/ffl/seasons/{}/segments/0/leaguedefaults/3?view=kona_player_info".format(season_id)

# create a cookies variable to tell ESPN its you making URL call
espn_cookies = {"swid" : "{your cookies}", 
               "espn_s2" : "your cookies"}

# headers needed as parameter for GET request, filter changed to null so it grabs all players
headers = {
 'Connection': 'keep-alive',
 'Accept': 'application/json, text/plain, */*',
 'User-Agent': 'your system',
'X-fantasy-filter': '{"players":{"filterSlotIds":{"value":[0,2,23,4,6]},"sortAppliedStatTotal":{"sortAsc":false,"sortPriority":2,"value":"002023"},"sortAppliedStatTotalForScoringPeriodId":null,"sortStatId":null,"sortStatIdForScoringPeriodId":null,"sortPercOwned":{"sortPriority":3,"sortAsc":false},"limit":null,"filterRanksForSlotIds":{"value":[0,2,4,6,17,16,8,9,10,12,13,24,11,14,15]},"filterStatsForTopScoringPeriodIds":{"value":2,"additionalValue":["002024","102024","002023","022024"]}}}',
 'X-fantasy-platform': 'kona-PROD-18c32c6242c08d37606f52ca971782183e620e50',
 'X-fantasy-source': 'kona'
}

In [55]:
r = requests.get(url, headers=headers, cookies=espn_cookies) 
espn_raw_data = r.json()

# create DF with desired columns
espn_data_detail = espn_raw_data['players']
df = pd.DataFrame(espn_data_detail)
players = df[['player', 'ratings', 'id']]

In [56]:
players

Unnamed: 0,player,ratings,id
0,"{'active': True, 'defaultPositionId': 3, 'draf...","{'0': {'positionalRanking': 1, 'totalRanking':...",4241389
1,"{'active': True, 'defaultPositionId': 1, 'draf...","{'0': {'positionalRanking': 1, 'totalRanking':...",3918298
2,"{'active': True, 'defaultPositionId': 2, 'draf...","{'0': {'positionalRanking': 1, 'totalRanking':...",3117251
3,"{'active': True, 'defaultPositionId': 3, 'draf...","{'0': {'positionalRanking': 2, 'totalRanking':...",3116406
4,"{'active': True, 'defaultPositionId': 1, 'draf...","{'0': {'positionalRanking': 2, 'totalRanking':...",4040715
...,...,...,...
944,"{'active': True, 'defaultPositionId': 1, 'draf...","{'0': {'positionalRanking': 0, 'totalRanking':...",4361653
945,"{'active': True, 'defaultPositionId': 4, 'draf...","{'0': {'positionalRanking': 0, 'totalRanking':...",4361112
946,"{'active': True, 'defaultPositionId': 4, 'draf...","{'0': {'positionalRanking': 0, 'totalRanking':...",4384171
947,"{'active': True, 'defaultPositionId': 3, 'draf...","{'0': {'positionalRanking': 0, 'totalRanking':...",4569559


In [57]:
#espn_data_detail

In [58]:
#The following set of functions might be unnecessarily complicated but its because the architecture of ESPN's API has so many nested dictionaries
# for example: 
#players['player'][0]

In [59]:
def extract_inner_dict(row, col_name):
    outer_dict = row[col_name]
    if not outer_dict:
        return {}
    # Get the inner dictionary by accessing the first key of the outer dictionary
    inner_dict = next(iter(outer_dict.values()), {})
    return inner_dict

# Function to extract player details with default keys for missing values
def extract_player(dictionary):
    if not dictionary:
        return pd.Series(['Unknown', 'Unknown', 'Unknown', 'Unknown'])
    name = dictionary.get('fullName', 'Unknown')
    status = dictionary.get('injuryStatus', 'Unknown')
    pos_id = dictionary.get('defaultPositionId', 'Unknown')
    team_id = dictionary.get('proTeamId', 'Unknown')
    return pd.Series([name, status, pos_id, team_id])

# Function to extract ranking details
def extract_rank_23(dictionary):
    if not dictionary:
        return pd.Series([None, None, None])
    rank_23 = dictionary.get('totalRanking')
    positional_rank_23 = dictionary.get('positionalRanking')
    total_points_23 = dictionary.get('totalRating')
    return pd.Series([rank_23, positional_rank_23, total_points_23])

def extract_inner_dict(row, col_name):
    outer_dict = row[col_name]
    # Get the inner dictionary by accessing the first key of the outer dictionary
    inner_dict = next(iter(outer_dict.values()))
    return inner_dict

# Function to extract player details with default keys for missing values
def extract_player(dictionary):
    name = dictionary.get('fullName', 'Unknown')
    status = dictionary.get('injuryStatus', 'Unknown')
    pos_id = dictionary.get('defaultPositionId', 'Unknown')
    team_id = dictionary.get('proTeamId', 'Unknown')
    return pd.Series([name, status, pos_id, team_id])

# Function to extract ranking details
def extract_rank_23(dictionary):
    rank_23 = dictionary.get('totalRanking')
    positional_rank_23 = dictionary.get('positionalRanking')
    total_points_23 = dictionary.get('totalRating')
    return pd.Series([rank_23, positional_rank_23, total_points_23])

players['ratings'] = players.apply(lambda row: extract_inner_dict(row, 'ratings'), axis=1)

# Apply the functions and assign the results to new columns
players[['name', 'status', 'pos_id', 'team_id']] = players['player'].apply(extract_player)
players[['rank_23', 'positional_rank_23', 'total_points_23']] = players['ratings'].apply(extract_rank_23)

# Create average points column, assuming 17 games per season
players['avg_points_23'] = players['total_points_23'] / 17

players.head()

Unnamed: 0,player,ratings,id,name,status,pos_id,team_id,rank_23,positional_rank_23,total_points_23,avg_points_23
0,"{'active': True, 'defaultPositionId': 3, 'draf...","{'positionalRanking': 1, 'totalRanking': 1, 't...",4241389,CeeDee Lamb,ACTIVE,3,6,1.0,1.0,403.2,23.717647
1,"{'active': True, 'defaultPositionId': 1, 'draf...","{'positionalRanking': 1, 'totalRanking': 2, 't...",3918298,Josh Allen,ACTIVE,1,2,2.0,1.0,392.64,23.096471
2,"{'active': True, 'defaultPositionId': 2, 'draf...","{'positionalRanking': 1, 'totalRanking': 3, 't...",3117251,Christian McCaffrey,ACTIVE,2,25,3.0,1.0,391.3,23.017647
3,"{'active': True, 'defaultPositionId': 3, 'draf...","{'positionalRanking': 2, 'totalRanking': 4, 't...",3116406,Tyreek Hill,ACTIVE,3,15,4.0,2.0,376.4,22.141176
4,"{'active': True, 'defaultPositionId': 1, 'draf...","{'positionalRanking': 2, 'totalRanking': 5, 't...",4040715,Jalen Hurts,ACTIVE,1,21,5.0,2.0,356.82,20.989412


In [60]:
#players['ratings'] = players.apply(lambda row: extract_inner_dict(row, 'ratings'), axis=1)

#players[['rank_23', 'positional_rank_23', 'total_points_23']] = players['ratings'].apply(extract_rank_23)

#players[['name', 'status', 'pos_id', 'team_id']] = players['player'].apply(extract_player)



# create average point column
#players['avg_points_23'] = players['total_points_23']/17


#players.head()

In [61]:
# some dictionaries that will help map variables
teams = {0:'FA', 1:'Atlanta Falcons', 2:'Buffalo Bills', 3:'Chicago Bears', 4:'Cincinnati Bengals', 5:'Cleveland Browns', 
         6:'Dallas Cowboys', 7:'Denver Broncos', 8:'Detroit Lions', 9:'Green Bay Packers',10:'Tennessee Titans',
         11:'Indianapolis Colts', 12:'Kansas City Chiefs', 13:'Las Vegas Raiders', 14:'Los Angeles Rams', 15:'Miami Dolphins', 
         16:'Minnesota Vikings', 17:'New England Patriots', 18:'New Orleans Saints', 19:'New York Giants', 20:'New York Jets',
         21:'Philadelphia Eagles', 22:'Arizona Cardinals', 23:'Pittsburgh Steelers', 24:'Los Angeles Chargers', 25:'San Francisco 49ers',
         26:'Seattle Seahawks', 27:'Tampa Bay Buccaneers', 28:'Washington Commanders', 29:'Carolina Panthers', 30:'Jacksonville Jaguars', 31:'', 32:'', 33:'Baltimore Ravens', 34:'Houston Texans'}

positions = {1:'QB', 2:'RB', 3:'WR', 4:'TE', 5:'K', 16:'D/ST'}

espn_stat_key = {0:'attempts', 1:'completions', 3:'pass_yds', 4:'pass_td', 20: 'interceptions', 23:'carries', 24:'rush_yds', 25:'rush_td', 42:'rec_yds', 43:'rec_td', 53:'receptions'}

#mapping the first two dicts onto the df
#players['team'] = players['team_id'].map(teams) # changes teamid to the actual team name
#players['position'] = players['pos_id'].map(positions) # changes pos_id to actual position


#Maintenance/cleaning
#players = players[['id', 'name', 'status', 'rank_23', 'positional_rank_23', 'total_points_23', 'avg_points_23', 'team', 'position', 'pos_id','team_id', 'player', 'ratings']]

# changing rank & pos_rank to integers, rounding off points 
#players[['rank_23', 'positional_rank_23']] = players[['rank_23', 'positional_rank_23']].astype(int)
#players = players.round({'total_points_23':1, 'avg_points_23':2})

### now I should be able to import historical data and join it by the ID

In [62]:
# create a new df
players_copy = players.copy()

In [63]:
# this function goes into the player dictionary and extracts the mess of nested dictionaries to get the actual stats that I want
def extract_stats_2023(dictionary):
    try:
        stats_list = dictionary.get('stats', [])

        # Initialize variables with default values
        attempts_23 = completions_23 = pass_yds_23 = pass_td_23 = interceptions_23 = carries_23 = rush_yds_23 = rush_td_23 = rec_yds_23 = rec_td_23 = receptions_23 = 0

        # Loop through stats_list to find the correct set of stats
        for stats_dict in stats_list:
            if stats_dict.get('id') == "002023":  # Assuming you want the statSplitTypeId 0
                attempts_23 = stats_dict.get('stats', {}).get('0', 0)
                completions_23 = stats_dict.get('stats', {}).get('1', 0)
                pass_yds_23 = stats_dict.get('stats', {}).get('3', 0)
                pass_td_23 = stats_dict.get('stats', {}).get('4', 0)
                interceptions_23 = stats_dict.get('stats', {}).get('20', 0)
                carries_23 = stats_dict.get('stats', {}).get('23', 0)
                rush_yds_23 = stats_dict.get('stats', {}).get('24', 0)
                rush_td_23 = stats_dict.get('stats', {}).get('25', 0)
                rec_yds_23 = stats_dict.get('stats', {}).get('42', 0)
                rec_td_23 = stats_dict.get('stats', {}).get('43', 0)
                receptions_23 = stats_dict.get('stats', {}).get('53', 0)
                break  # Stop after finding the first match

        return pd.Series([attempts_23, completions_23, pass_yds_23, pass_td_23, interceptions_23, carries_23, rush_yds_23, rush_td_23, rec_yds_23, rec_td_23, receptions_23])

    except Exception as e:
        print(f"Error processing dictionary: {dictionary}")
        print(f"Exception: {e}")
        return pd.Series([0] * 10)  # Return default values if an error occurs (thanks chatGPT)

#players_copy['player'] contains the dictionaries you want to process
players_copy[['attempts_23', 'completions_23', 'pass_yds_23', 'pass_td_23', 'interceptions_23', 
              'carries_23', 'rush_yds_23', 'rush_td_23', 'rec_yds_23', 'rec_td_23', 'receptions_23']] = players_copy['player'].apply(extract_stats_2023)
#players_copy

### Next... bring in 2024 projections same as for 2023 stats --- ID changes in function but nothing else

In [64]:
def extract_proj_24(dictionary):
    try:
        stats_list = dictionary.get('stats', [])

        # Initialize variables with default values
        attempts_proj_24 = completions_proj_24 = pass_yds_proj_24 = pass_td_proj_24 = interceptions_proj_24 = carries_proj_24 = rush_yds_proj_24 = rush_td_proj_24 = rec_yds_proj_24 = rec_td_proj_24 = receptions_proj_24 = 0

        # Loop through stats_list to find the correct set of stats
        for stats_dict in stats_list:
            if stats_dict.get('id') == "102024":  # Assuming you want the statSplitTypeId 0
                attempts_proj_24 = stats_dict.get('stats', {}).get('0', 0)
                completions_proj_24 = stats_dict.get('stats', {}).get('1', 0)
                pass_yds_proj_24 = stats_dict.get('stats', {}).get('3', 0)
                pass_td_proj_24 = stats_dict.get('stats', {}).get('4', 0)
                interceptions_proj_24 = stats_dict.get('stats', {}).get('20', 0)
                carries_proj_24 = stats_dict.get('stats', {}).get('23', 0)
                rush_yds_proj_24 = stats_dict.get('stats', {}).get('24', 0)
                rush_td_proj_24 = stats_dict.get('stats', {}).get('25', 0)
                rec_yds_proj_24 = stats_dict.get('stats', {}).get('42', 0)
                rec_td_proj_24 = stats_dict.get('stats', {}).get('43', 0)
                receptions_proj_24 = stats_dict.get('stats', {}).get('53', 0)
                break  # Stop after finding the first match

        return pd.Series([attempts_proj_24, completions_proj_24, pass_yds_proj_24, pass_td_proj_24, interceptions_proj_24, carries_proj_24, rush_yds_proj_24, rush_td_proj_24, rec_yds_proj_24, rec_td_proj_24, receptions_proj_24])

    except Exception as e:
        print(f"Error processing dictionary: {dictionary}")
        print(f"Exception: {e}")
        return pd.Series([0] * 10)  # Return default values if an error occurs (thanks chatGPT)

#players_copy['player'] contains the dictionaries you want to process
players_copy[['attempts_proj_24', 'completions_proj_24', 'pass_yds_proj_24', 'pass_td_proj_24', 'interceptions_proj_24', 
              'carries_proj_24', 'rush_yds_proj_24', 'rush_td_proj_24', 'rec_yds_proj_24', 'rec_td_proj_24', 'receptions_proj_24']] = players_copy['player'].apply(extract_proj_24)
players_copy

Unnamed: 0,player,ratings,id,name,status,pos_id,team_id,rank_23,positional_rank_23,total_points_23,...,completions_proj_24,pass_yds_proj_24,pass_td_proj_24,interceptions_proj_24,carries_proj_24,rush_yds_proj_24,rush_td_proj_24,rec_yds_proj_24,rec_td_proj_24,receptions_proj_24
0,"{'active': True, 'defaultPositionId': 3, 'draf...","{'positionalRanking': 1, 'totalRanking': 1, 't...",4241389,CeeDee Lamb,ACTIVE,3,6,1.0,1.0,403.20,...,0.000000,0.000000,0.000000,0.000000,12.586774,85.778283,0.783793,1462.949998,8.856855,116.865698
1,"{'active': True, 'defaultPositionId': 1, 'draf...","{'positionalRanking': 1, 'totalRanking': 2, 't...",3918298,Josh Allen,ACTIVE,1,2,2.0,1.0,392.64,...,341.283481,3715.846647,23.077842,12.369920,94.695354,498.032762,8.545350,0.000000,0.000000,0.000000
2,"{'active': True, 'defaultPositionId': 2, 'draf...","{'positionalRanking': 1, 'totalRanking': 3, 't...",3117251,Christian McCaffrey,ACTIVE,2,25,3.0,1.0,391.30,...,0.000000,0.000000,0.000000,0.000000,262.906740,1242.305467,12.047149,526.774494,3.842288,67.130255
3,"{'active': True, 'defaultPositionId': 3, 'draf...","{'positionalRanking': 2, 'totalRanking': 4, 't...",3116406,Tyreek Hill,ACTIVE,3,15,4.0,2.0,376.40,...,0.000000,0.000000,0.000000,0.000000,4.099647,24.787526,0.261915,1437.745826,7.513137,106.342361
4,"{'active': True, 'defaultPositionId': 1, 'draf...","{'positionalRanking': 2, 'totalRanking': 5, 't...",4040715,Jalen Hurts,ACTIVE,1,21,5.0,2.0,356.82,...,320.052976,3626.080713,20.997223,11.216677,124.534329,536.613050,9.982034,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
944,"{'active': True, 'defaultPositionId': 1, 'draf...","{'positionalRanking': 0, 'totalRanking': 0, 't...",4361653,Devin Leary,ACTIVE,1,33,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
945,"{'active': True, 'defaultPositionId': 4, 'draf...","{'positionalRanking': 0, 'totalRanking': 0, 't...",4361112,Devin Culp,ACTIVE,4,27,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,19.006215,0.137946,1.895157
946,"{'active': True, 'defaultPositionId': 4, 'draf...","{'positionalRanking': 0, 'totalRanking': 0, 't...",4384171,Tanner McLachlan,ACTIVE,4,4,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
947,"{'active': True, 'defaultPositionId': 3, 'draf...","{'positionalRanking': 0, 'totalRanking': 0, 't...",4569559,Devaughn Vele,ACTIVE,3,7,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [None]:
#result_dfs = extract_proj_by_year(2023)
#result_dfs

In [65]:
# a fn letting me find players
def player_stats(name): 
    return players_copy[players_copy['name']==name]

# Second dataset -- trying to end up with player-level time series data to make projections with
## I think I am wanting to write a set of functions that will take historical data entries and append them to player data by the primary key ('id') and store in a new player df

In [66]:
def retrieve_historical_stats(group, groupYards, year):
    #set request url
    url_flex = "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=false&page=1&limit=100&category=offense%3A{}&sort={}.{}%3Adesc&season={}&seasontype=2".format(group, group, groupYards, year)
    
    #set headers
    flex_headers = {
        'Connection': 'keep-alive', 
        'Accept': 'application/json, text/plain, */*', 
        'User-Agent': 'your system'
    }
    
    #grab data
    r_flex = requests.get(url_flex, headers=flex_headers, cookies=espn_cookies) 
    espn_historical_data = r_flex.json()
    
    #Check if the expected data is present
    if 'athletes' in espn_historical_data:
        athletes = espn_historical_data['athletes']
        athletes_df = pd.DataFrame(athletes)
    else: athletes_df = pd.DataFrame() # returns a blank df
       
    # this function grabs the id and name of the player from each row under dictionary 'athlete'
    def extract_attributes(athlete_dictionary):
        id_no = athlete_dictionary.get('id')
        name = athlete_dictionary.get('displayName')
        return pd.Series([id_no, name])
    
    # this function extracts individual statistics for each player from the categories list of dicts
    def extract_season_stats(nums):
        rush_att = nums[2].get('totals')[0]
        rush_yd = nums[2].get('totals')[1]
        rush_td = nums[2].get('totals')[5]
        rec = nums[3].get('totals')[0]
        rec_yd = nums[3].get('totals')[2]
        rec_td = nums[3].get('totals')[6]
        pass_att = nums[1].get('totals')[0]
        pass_comp = nums[1].get('totals')[1]
        pass_yd = nums[1].get('totals')[3]
        pass_td = nums[1].get('totals')[7]
        
        return pd.Series([rush_att, rush_yd, rush_td, rec, rec_yd, rec_td, pass_att, pass_comp, pass_yd, pass_td])
       
    # apply both functions across the data
    athletes_df[['id', 'name']] = athletes_df['athlete'].apply(extract_attributes)
    athletes_df[['rush_att', 'rush_yd', 'rush_td', 'rec', 'rec_yd', 'rec_td', 'pass_att', 'pass_comp', 'pass_yd', 'pass_td']] = athletes_df['categories'].apply(extract_season_stats)
    athletes_df[['year']] = year
    return athletes_df[['id', 'name', 'rush_att', 'rush_yd', 'rush_td',
       'rec', 'rec_yd', 'rec_td', 'pass_att', 'pass_comp', 'pass_yd', 'pass_td', 'year', 'athlete','categories']]


In [None]:
rbs_df_2022 = retrieve_historical_stats('rushing', 'rushingYards', 2022)

In [None]:
#rbs_df_2022

In [None]:
wrs_df_2023 = retrieve_historical_stats('receiving', 'receivingYards', 2023)
wrs_df_2022 = retrieve_historical_stats('receiving', 'receivingYards', 2022)
wrs_df_2021 = retrieve_historical_stats('receiving', 'receivingYards', 2021)
wrs_df_2020 = retrieve_historical_stats('receiving', 'receivingYards', 2020)

# would probably have to set the index as date and then multiindex the concatenated dfs


In [None]:
wrs_df_2023.head()


In [None]:
def retrieve_wr_stats(player_id):
    # Combine the dataframes
    combined_df = pd.concat([wrs_df_2023, wrs_df_2022, wrs_df_2021, wrs_df_2020])
    
    # Filter the dataframe for the specific player ID
    player_stats = combined_df[combined_df['id'] == player_id]
    
    return player_stats.set_index('year')

# Example usage
stefon_diggs_id = '2976212'
stefon_diggs_stats = retrieve_wr_stats(stefon_diggs_id)
stefon_diggs_stats.head()

In [None]:
# wrs_df_2020.head()

## NEXT IS FOUR FUNCTIONS:
#### 1. retrieve_player_gamelog takes player id, year, position string and returns relevant statistics for that year
#### 2. retrieve_game_results takes the teams results for all 16/17 games of the season and puts them into a df by week
#### 3. retrieve_all combines these two prior dataframes and selects important columns to keep
#### 4. get_player_id grabs an individual player's id when given a name, can be used as input for retrieve_all


In [67]:
def retrieve_player_gamelog(id_no, year, pos): # function takes player id and year to extract 16-week game log (hopefully)
    #set request url
    money_url = f"https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/athletes/{id_no}/gamelog?region=us&lang=en&contentorigin=espn&season={year}"    
    #set headers
    money_headers = {
        'Connection': 'keep-alive', 
        'Accept': 'application/json, text/plain, */*', 
        'User-Agent': 'your system'
    }
    
    #grab data
    r_money = requests.get(money_url, headers=money_headers, cookies=espn_cookies) 
    espn_gamelog_data = r_money.json()
    
    #Check if the expected data is present
    if 'seasonTypes' in espn_gamelog_data:
        reg_season_gamelog = espn_gamelog_data['seasonTypes'][1].get('categories')[0].get('events')
        
        gamelog_df = pd.DataFrame(reg_season_gamelog)
        
        def add_stat_columns(row):
            if espn_gamelog_data['categories'][0].get('name') == 'rushing':
                car = row['stats'][0]
                rush_yd = row['stats'][1]
                rush_td = row['stats'][3]
                stat_list = [car, rush_yd, rush_td]
                columns = ['car', 'rush_yd', 'rush_td']
            elif espn_gamelog_data['categories'][0].get('name') == 'receiving':
                rec = row['stats'][0]
                tar = row['stats'][1]
                rec_yd = row['stats'][2]
                rec_td = row['stats'][4]
                stat_list = [rec, tar, rec_yd, rec_td]
                columns = ['rec', 'tar', 'rec_yd', 'rec_td']
            else:
                att = row['stats'][0]
                comp = row['stats'][1]
                pass_yd = row['stats'][2]
                pass_td = row['stats'][5]
                stat_list = [att, comp, pass_yd, pass_td]
                columns = ['att', 'comp', 'pass_yd', 'pass_td']
            return pd.Series(stat_list, index=columns)

# Assuming gamelog_df is your DataFrame
        new_columns = gamelog_df.apply(add_stat_columns, axis=1)

# Concatenate the new columns to the original DataFrame
        gamelog_df = pd.concat([gamelog_df, new_columns], axis=1)
        
    #rename eventId column
        gamelog_df.columns = ['id' if x=='eventId' else x for x in gamelog_df.columns]
    else: gamelog_df = pd.DataFrame()
    
    return gamelog_df.sort_values(by='id', ascending = True)


In [68]:
# retrieve_player_gamelog(3139477, 2022, 'QB')

In [69]:
teams = {0:'FA', 1:'Atlanta Falcons', 2:'Buffalo Bills', 3:'Chicago Bears', 4:'Cincinnati Bengals', 5:'Cleveland Browns', 
         6:'Dallas Cowboys', 7:'Denver Broncos', 8:'Detroit Lions', 9:'Green Bay Packers',10:'Tennessee Titans',
         11:'Indianapolis Colts', 12:'Kansas City Chiefs', 13:'Las Vegas Raiders', 14:'Los Angeles Rams', 15:'Miami Dolphins', 
         16:'Minnesota Vikings', 17:'New England Patriots', 18:'New Orleans Saints', 19:'New York Giants', 20:'New York Jets',
         21:'Philadelphia Eagles', 22:'Arizona Cardinals', 23:'Pittsburgh Steelers', 24:'Los Angeles Chargers', 25:'San Francisco 49ers',
         26:'Seattle Seahawks', 27:'Tampa Bay Buccaneers', 28:'Washington Commanders', 29:'Carolina Panthers', 30:'Jacksonville Jaguars', 31:'', 32:'', 33:'Baltimore Ravens', 34:'Houston Texans'}
loc = {'vs':'home', '@':'away'}

In [70]:
def retrieve_game_results(id_no, year): # function takes player id and year to extract 16-week game log (hopefully)
    #set request url
    money_url = f"https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/athletes/{id_no}/gamelog?region=us&lang=en&contentorigin=espn&season={year}"    
    #set headers
    money_headers = {
        'Connection': 'keep-alive', 
        'Accept': 'application/json, text/plain, */*', 
        'User-Agent': 'your system'
    }
    
    #grab data
    r_money = requests.get(money_url, headers=money_headers, cookies=espn_cookies) 
    espn_gamelog_data = r_money.json()
    
    if 'events' in espn_gamelog_data: 
        game_results = espn_gamelog_data['events']
        game_results_df = pd.DataFrame(game_results)
        
    else: game_results_df = pd.DataFrame()
    
    # swap axes, fill na, sort by week ascending
    game_results_df_clean = game_results_df.swapaxes("index", "columns").fillna('NA').sort_values(by='week', ascending=True)

    # Step 2: Filter out rows where 'eventNote' is 'NA' and drop specified columns
    game_results_df_clean_filtered = game_results_df_clean[game_results_df_clean['eventNote'] == 'NA']
    columns_to_drop = ['links', 'opponent', 'leagueName', 'leagueAbbreviation', 'leagueShortName', 'team']
    game_results_df_clean_filtered = game_results_df_clean_filtered.drop(columns=columns_to_drop)

    # Step 3: Map 'homeTeamId' to actual team names
    game_results_df_clean_filtered[['homeTeamId','awayTeamId', 'homeTeamScore', 'awayTeamScore']] = game_results_df_clean_filtered[['homeTeamId','awayTeamId', 'homeTeamScore', 'awayTeamScore']].astype(int)
    game_results_df_clean_filtered['home_team'] = game_results_df_clean_filtered['homeTeamId'].map(teams)
    game_results_df_clean_filtered['away_team'] = game_results_df_clean_filtered['awayTeamId'].map(teams)
    game_results_df_clean_filtered['loc'] = game_results_df_clean_filtered['atVs'].map(loc)
    return game_results_df_clean_filtered
    

### Function is updated so that it will take flexible positional arguments

In [71]:
def retrieve_all(id_no, year, pos):
    player_gamelog_df = retrieve_player_gamelog(id_no, year, pos)
    game_results_df = retrieve_game_results(id_no, year)
    merged_df = pd.merge(player_gamelog_df, game_results_df, on="id")
    if pos == 'WR':
        merged_df_sorted = merged_df[['id', 'gameDate', 'week', 'loc', 'gameResult', 'score', 'rec', 'tar', 'rec_yd', 'rec_td', 
                                  'home_team', 'away_team', 'homeTeamId', 'awayTeamId', 'homeTeamScore', 'awayTeamScore', 'atVs']].sort_values(by="week")
    elif pos == 'RB':
        merged_df_sorted = merged_df[['id', 'gameDate', 'week', 'loc', 'gameResult', 'score', 'car', 'rush_yd', 'rush_td', 
                                  'home_team', 'away_team', 'homeTeamId', 'awayTeamId', 'homeTeamScore', 'awayTeamScore', 'atVs']].sort_values(by="week")
    else: 
        merged_df_sorted = merged_df[['id', 'gameDate', 'week', 'loc', 'gameResult', 'score', 'att', 'comp', 'pass_yd', 'pass_td', 
                                  'home_team', 'away_team', 'homeTeamId', 'awayTeamId', 'homeTeamScore', 'awayTeamScore', 'atVs']].sort_values(by="week")
    
    return merged_df_sorted

In [72]:
def get_player_id(name):
    result = players_copy.loc[players_copy['name'] == name, 'id'].values
    if result.size > 0:
        return result[0]
    else:
        return None

### Three example uses:

In [73]:
tyreek_hill_22 = retrieve_all(get_player_id('Tyreek Hill'), 2022, 'WR')

In [97]:
print(get_player_id('Tyreek Hill'))

3116406


In [93]:
tyreek_hill_23 = retrieve_all(get_player_id('Tyreek Hill'), 2023, 'WR')

In [77]:
tyreek_hill_23.head()


Unnamed: 0,id,gameDate,week,loc,gameResult,score,rec,tar,rec_yd,rec_td,home_team,away_team,homeTeamId,awayTeamId,homeTeamScore,awayTeamScore,atVs
1,401547401,2023-09-10T20:25:00.000+00:00,1,away,W,36-34,11,15,215,2,Los Angeles Chargers,Miami Dolphins,24,15,34,36,@
2,401547423,2023-09-18T00:20:00.000+00:00,2,away,W,24-17,5,9,40,1,New England Patriots,Miami Dolphins,17,15,17,24,@
3,401547429,2023-09-24T17:00:00.000+00:00,3,home,W,70-20,9,11,157,1,Miami Dolphins,Denver Broncos,15,7,70,20,vs
4,401547443,2023-10-01T17:00:00.000+00:00,4,away,L,48-20,3,5,58,0,Buffalo Bills,Miami Dolphins,2,15,48,20,@
5,401547465,2023-10-08T17:00:00.000+00:00,5,home,W,31-16,8,9,181,1,Miami Dolphins,New York Giants,15,19,31,16,vs


In [94]:
# datetime conversion from string to datetime

tyreek_hill_23['gameDate'] = tyreek_hill_23['gameDate'].apply(lambda x: x[:10])
import datetime
tyreek_hill_23['gameDate'] = tyreek_hill_23['gameDate'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))
#tyreek_hill_23['rest_days'] = gamedate - gamedate
#rolling W/L record 
#opponent rolling W/L record
#physical attributes, years of experience, draft position
# dummy variables for historical injuries
# weather conditions, venue

In [95]:
tyreek_hill_23

Unnamed: 0,id,gameDate,week,loc,gameResult,score,rec,tar,rec_yd,rec_td,home_team,away_team,homeTeamId,awayTeamId,homeTeamScore,awayTeamScore,atVs
1,401547401,2023-09-10,1,away,W,36-34,11,15,215,2,Los Angeles Chargers,Miami Dolphins,24,15,34,36,@
2,401547423,2023-09-18,2,away,W,24-17,5,9,40,1,New England Patriots,Miami Dolphins,17,15,17,24,@
3,401547429,2023-09-24,3,home,W,70-20,9,11,157,1,Miami Dolphins,Denver Broncos,15,7,70,20,vs
4,401547443,2023-10-01,4,away,L,48-20,3,5,58,0,Buffalo Bills,Miami Dolphins,2,15,48,20,@
5,401547465,2023-10-08,5,home,W,31-16,8,9,181,1,Miami Dolphins,New York Giants,15,19,31,16,vs
6,401547473,2023-10-15,6,home,W,42-21,6,10,163,1,Miami Dolphins,Carolina Panthers,15,29,42,21,vs
7,401547494,2023-10-23,7,away,L,31-17,11,15,88,1,Philadelphia Eagles,Miami Dolphins,21,15,31,17,@
8,401547497,2023-10-29,8,home,W,31-17,8,13,112,1,Miami Dolphins,New England Patriots,15,17,31,17,vs
9,401547542,2023-11-19,11,home,W,20-13,10,11,146,1,Miami Dolphins,Las Vegas Raiders,15,13,20,13,vs
0,401547242,2023-11-24,12,away,W,34-13,9,12,102,1,New York Jets,Miami Dolphins,20,15,13,34,@


In [None]:
patrick_mahomes_2022 = retrieve_all(get_player_id('Patrick Mahomes'), 2022, 'QB')

In [None]:
christian_mccaffrey_2022 = retrieve_all(get_player_id('Christian McCaffrey'), 2022, 'RB')

In [None]:
rb_leaders_dict = {'Christian McCaffrey': get_player_id('Christian McCaffrey'), 
                   'Breece Hall': get_player_id('Breece Hall'), 
                   'Travis Etienne Jr.': get_player_id('Travis Etienne Jr.'), 
                   'Rachaad White':get_player_id('Rachaad White'), 
                   'Raheem Mostert':get_player_id('Raheem Mostert'), 
                   'Joe Mixon':get_player_id('Joe Mixon'), 
                  'Kyren Williams':get_player_id('Kyren Williams'),
                  'Derrick Henry':get_player_id('Derrick Henry'),
                  'Bijan Robinson':get_player_id('Bijan Robinson'),
                  'Jahmyr Gibbs':get_player_id('Jahmyr Gibbs'),
                  'Alvin Kamara':get_player_id('Alvin Kamara'),
                  'James Cook':get_player_id('James Cook'),
                  'Saquon Barkley':get_player_id('Saquon Barkley'),
                  'Tony Pollard':get_player_id('Tony Pollard'),
                  'Isiah Pacheco':get_player_id('Isiah Pacheco'),
                  'Jerome Ford':get_player_id('Jerome Ford'), 
                  'David Montgomery':get_player_id('David Montgomery'),
                  'James Conner':get_player_id('James Conner'),
                  'Kenneth Walker III':get_player_id('Kenneth Walker III'),
                  "D'andre Swift":get_player_id("D'Andre Swift"),
                  'Brian Robinson Jr.':get_player_id('Brian Robinson Jr.'),
                  'Jaylen Warren':get_player_id('Jaylen Warren'),
                  'Najee Harris':get_player_id('Najee Harris'),
                  "De'Von Achane":get_player_id("De'Von Achane"),
                  'Austin Ekeler':get_player_id('Austin Ekeler'),
                  'Chuba Hubbard':get_player_id('Chuba Hubbard'),
                  'Josh Jacobs':get_player_id('Josh Jacobs'),
                  'Javonte Williams':get_player_id('Javonte Williams'),
                   'Rhamondre Stevenson':get_player_id('Rhamondre Stevenson'),
                   'Tyler Allgeier':get_player_id('Tyler Allgeier')}
