In [267]:
from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import commonplayerinfo,leaguegamefinder, playergamelog, playercareerstats, commonteamroster
from nba_api.stats.library.parameters import SeasonAll
import requests as re
import pandas as pd
import datetime
import time

# Get a date of the birthday for current season
def get_date(x, season_starting_year):
    now = datetime.datetime.now()
    
    current_month = now.month
    current_day = now.day
    if x.month == 2 and x.day == 29:
        if (season_starting_year + 1) % 4 == 0 and (season_starting_year + 1) % 100 != 0 or (season_starting_year + 1) % 400 == 0:
            x = datetime.datetime(season_starting_year + 1, x.month, x.day)
        else:
            x = datetime.datetime(season_starting_year + 1, x.month, x.day - 1)
    else:
        if x.month >= 10:
            x = datetime.datetime(season_starting_year, x.month, x.day)
        elif x.month < 7:
            x = datetime.datetime(season_starting_year + 1, x.month, x.day)
        else:
            x = None
        
    return x

def get_team_ids():
    # Retrieve all NBA teams
    nba_teams = teams.get_teams()
    
    team_ids = {}
    
    # Extract team IDs
    for team in nba_teams:
        
        team_ids[team["full_name"]] = team['id']
    
    return team_ids

def get_list_players(year):

    season = str(year) + '-' + str(year + 1)[2:4]

    season

    player_list = pd.DataFrame()

    for team_id in get_team_ids().values():
        print(f'Fetching team id: {team_id}')
        roster = commonteamroster.CommonTeamRoster(team_id, season)
        roster_df = roster.get_data_frames()[0]
        player_list = pd.concat([player_list, roster_df], ignore_index = True)
        time.sleep(1)

    return player_list


# Get player info on the active players
def get_player_info(year):
    
    df = get_list_players(year)
    
    df['BIRTH_DATE'] = pd.to_datetime(df['BIRTH_DATE'], format = '%b %d, %Y')
        
    df['Date'] = df['BIRTH_DATE'].apply(lambda x: get_date(x, year))
    
    df = df[df['Date'] < (datetime.datetime.now() - datetime.timedelta(days=7))] #Find a way to resolve an issue if there is no game after the date

    return df

# Get the game id of the first game after a birthday
def get_game_id(team_id, date):
    
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id)
    df = gamefinder.get_data_frames()[0]
    
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
    df = df[df['GAME_DATE'] > date]
    
    df = df.sort_values('GAME_DATE', ascending = True)
    
    if df.empty:
        print("DataFrame is empty")
    else:
        return df.iloc[0, [0,4]]

# Return a row with a difference of the season and post birthday game stats, birthday - season
def get_stat_diff(player_name, player_id, game_id, season_id):
    
    game_log = playergamelog.PlayerGameLog(player_id=player_id, season=SeasonAll.all, season_type_all_star = 'Regular Season')
    game_log_df = game_log.get_data_frames()[0]

    season_avg = game_log_df[game_log_df['SEASON_ID'] == season_id]

    season_avg = pd.DataFrame(season_avg.select_dtypes(include=['int64', 'float64']).mean()).T

    specific_game_log_df = game_log_df[game_log_df['Game_ID'] == game_id]

    comb = pd.concat([season_avg,specific_game_log_df], join = 'inner', ignore_index= True)

    diff = comb.diff()

    diff.dropna(inplace=True)
    
    diff['Player_ID'] = player_id
    diff.insert(loc = 1, column = 'Player_Name', value = player_name)
    
    return diff

def create_stat_diff_df(year):
    
    all_player_info = get_player_info(year)

    player_stat_diff = pd.DataFrame()
    initial_count = 0
    total_count = len(all_player_info)

    for index, player in all_player_info.iterrows():
        initial_count += 1
        player_id = player['PLAYER_ID']
        print(f'{initial_count}/{total_count} Fetching player with id: {player_id}')
        team_id = player['TeamID']
        orig_date = pd.to_datetime(player['Date'])
        player_name = player['PLAYER']
        season_id, game_id = get_game_id(team_id, orig_date)
        stat_diff = get_stat_diff(player_name,player_id, game_id, season_id)
        player_stat_diff = pd.concat([player_stat_diff, stat_diff], ignore_index=True)
        time.sleep(1)
        
    return player_stat_diff

# Combine dataframes
def combine_df(year_start, year_end):
    player_stat_diff_to_season_avg = pd.DataFrame()
    for year in range(year_start,year_end+1,1):
        print(f'Creating Year: {year}')
        season = str(year) + '-' + str(year + 1)[2:4]
        data = pd.read_csv(r"C:\\Users\\tuke-\\OneDrive - Cal Poly\\2_Winter_2024\\GSB521\\" + str(year) + '_data.csv')
        data.insert(loc = 0, column = 'Season', value = season)
        player_stat_diff_to_season_avg = pd.concat([player_stat_diff_to_season_avg, data], ignore_index = True)
    return player_stat_diff_to_season_avg

In [255]:
# Write Dataframes
for year in range(2021,2023,1):
    print(f'Creating Year: {year}')
    data = create_stat_diff_df(year)
    data.to_csv(r"C:\\Users\\tuke-\\OneDrive - Cal Poly\\2_Winter_2024\\GSB521\\" + str(year) + '_data.csv', index=False)

Creating Year: 2021
Fetching team id: 1610612737
Fetching team id: 1610612738
Fetching team id: 1610612739
Fetching team id: 1610612740
Fetching team id: 1610612741
Fetching team id: 1610612742
Fetching team id: 1610612743
Fetching team id: 1610612744
Fetching team id: 1610612745
Fetching team id: 1610612746
Fetching team id: 1610612747
Fetching team id: 1610612748
Fetching team id: 1610612749
Fetching team id: 1610612750
Fetching team id: 1610612751
Fetching team id: 1610612752
Fetching team id: 1610612753
Fetching team id: 1610612754
Fetching team id: 1610612755
Fetching team id: 1610612756
Fetching team id: 1610612757
Fetching team id: 1610612758
Fetching team id: 1610612759
Fetching team id: 1610612760
Fetching team id: 1610612761
Fetching team id: 1610612762
Fetching team id: 1610612763
Fetching team id: 1610612764
Fetching team id: 1610612765
Fetching team id: 1610612766
1/353 Fetching player with id: 1626153
2/353 Fetching player with id: 1630552
3/353 Fetching player with id: 1

184/353 Fetching player with id: 202689
185/353 Fetching player with id: 1629628
186/353 Fetching player with id: 203095
187/353 Fetching player with id: 1629011
188/353 Fetching player with id: 203944
189/353 Fetching player with id: 1630579
190/353 Fetching player with id: 1627853
191/353 Fetching player with id: 201959
192/353 Fetching player with id: 1628371
193/353 Fetching player with id: 1630591
194/353 Fetching player with id: 1628964
195/353 Fetching player with id: 1629626
196/353 Fetching player with id: 1630181
197/353 Fetching player with id: 1629649
198/353 Fetching player with id: 1628365
199/353 Fetching player with id: 1629021
200/353 Fetching player with id: 1629678
201/353 Fetching player with id: 1629962
202/353 Fetching player with id: 203082
203/353 Fetching player with id: 201577
204/353 Fetching player with id: 1628976
205/353 Fetching player with id: 1630175
206/353 Fetching player with id: 1630169
207/353 Fetching player with id: 1630537
208/353 Fetching playe

11/355 Fetching player with id: 1627759
12/355 Fetching player with id: 1630202
13/355 Fetching player with id: 1629684
14/355 Fetching player with id: 1627763
15/355 Fetching player with id: 1631120
16/355 Fetching player with id: 1630573
17/355 Fetching player with id: 203935
18/355 Fetching player with id: 201143
19/355 Fetching player with id: 1629057
20/355 Fetching player with id: 201933
21/355 Fetching player with id: 1630551
22/355 Fetching player with id: 1630596
23/355 Fetching player with id: 1629636
24/355 Fetching player with id: 201937
25/355 Fetching player with id: 201980
26/355 Fetching player with id: 1626224
27/355 Fetching player with id: 203526
28/355 Fetching player with id: 1630241
29/355 Fetching player with id: 1629603
30/355 Fetching player with id: 1628386
31/355 Fetching player with id: 1629731
32/355 Fetching player with id: 201577
33/355 Fetching player with id: 1630171
34/355 Fetching player with id: 1631220
35/355 Fetching player with id: 1630529
36/355 

215/355 Fetching player with id: 1630188
216/355 Fetching player with id: 1628464
217/355 Fetching player with id: 1626167
218/355 Fetching player with id: 1630178
219/355 Fetching player with id: 1626149
220/355 Fetching player with id: 1629667
221/355 Fetching player with id: 1629001
222/355 Fetching player with id: 1630644
223/355 Fetching player with id: 200782
224/355 Fetching player with id: 1627777
225/355 Fetching player with id: 203954
226/355 Fetching player with id: 1629663
227/355 Fetching player with id: 1627863
228/355 Fetching player with id: 1630194
229/355 Fetching player with id: 1628470
230/355 Fetching player with id: 1626164
231/355 Fetching player with id: 101108
232/355 Fetching player with id: 203082
233/355 Fetching player with id: 1627814
234/355 Fetching player with id: 1629111
235/355 Fetching player with id: 1629013
236/355 Fetching player with id: 1630240
237/355 Fetching player with id: 1629647
238/355 Fetching player with id: 1629014
239/355 Fetching pla

In [292]:
grouped_count = df['Player_Name'].value_counts().reset_index()
grouped_count.columns = ['Player_Name', 'Count']

grouped = df.groupby('Player_Name').mean(numeric_only = True)
grouped = grouped.merge(grouped_count, on = 'Player_Name')
grouped = grouped[grouped['Count'] >= 3]
grouped = grouped.round(decimals = 2)
grouped = grouped.sort_values('PTS', ascending = True)

Creating Year: 2010
Creating Year: 2011
Creating Year: 2012
Creating Year: 2013
Creating Year: 2014
Creating Year: 2015
Creating Year: 2016
Creating Year: 2017
Creating Year: 2018
Creating Year: 2019
Creating Year: 2020
Creating Year: 2021
Creating Year: 2022
Creating Year: 2023


In [293]:
grouped

Unnamed: 0,Player_Name,Player_ID,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Count
74,Chandler Parsons,202718.0,-0.32,-2.70,-2.80,-0.16,-1.06,-1.12,-0.18,-0.93,...,-0.97,-1.46,0.42,0.67,-1.12,-0.29,-7.39,1.66,-0.06,3
543,Tyrese Maxey,1630178.0,3.23,-1.57,-1.83,-0.02,-1.49,-1.44,-0.27,-2.23,...,-0.29,3.60,-0.18,0.65,-0.41,-1.16,-6.86,2.94,0.00,3
54,Brian Roberts,203148.0,-8.92,-2.19,-3.63,-0.05,-0.84,-1.70,-0.31,-1.18,...,-0.22,-1.77,-0.51,-0.07,-1.01,-1.42,-6.39,-5.57,0.01,3
345,Kyrie Irving,202681.0,-0.37,-2.03,-1.03,-0.09,-1.11,-0.24,-0.13,-1.18,...,0.36,0.39,0.45,-0.17,-0.10,-0.33,-6.34,-6.38,-0.00,7
7,Al Jefferson,2744.0,0.19,-2.68,-2.62,-0.11,-0.02,0.10,-0.01,-0.73,...,-1.51,-0.42,-0.71,-0.40,0.03,0.61,-6.12,-12.49,0.01,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,Frank Kaminsky,1626163.0,6.86,2.56,5.11,0.05,0.81,2.59,0.01,0.61,...,2.07,-0.34,-0.45,-0.12,-0.36,0.24,6.55,6.61,-0.03,4
94,D'Angelo Russell,1626156.0,4.32,2.57,3.63,0.04,0.71,2.02,0.03,0.72,...,0.16,1.72,0.08,-0.11,-0.21,-0.07,6.57,1.76,-0.02,6
480,Sasha Vujacic,2756.0,6.62,2.57,2.78,0.13,1.72,1.66,0.13,0.29,...,0.13,-0.20,-0.22,-0.07,0.64,0.90,7.13,3.35,0.00,3
519,Toney Douglas,201962.0,6.12,2.76,3.52,0.16,2.04,2.23,0.21,0.00,...,-0.00,1.49,0.10,0.18,0.01,0.26,7.56,-0.32,-0.00,4


In [277]:
df.to_csv(r"C:\\Users\\tuke-\\OneDrive - Cal Poly\\2_Winter_2024\\GSB521\\combined_data.csv", index=False)