In [14]:
# !pip install nba_api

In [5]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playergamelog
from datetime import datetime


import pandas as pd

All players and their birthdate

In [24]:
# Pull every NBA Player
all_players = players.get_players()
print(f"Total players found: {len(all_players)}")

# Empty dataframe for each NBA Player
player_data = []

# For loop that gets DOB for each NBA player
for player in all_players[:4900]:
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player['id'])
    player_info_df = player_info.get_data_frames()[0]
    birthdate = player_info_df['BIRTHDATE'].iloc[0]
    player_data.append({
        'Name': player['full_name'],
        'Birthdate': birthdate
    })

# Pandas dataframe instead
player_df = pd.DataFrame(player_data)
player_df

# player_df.to_csv('nba_dob.csv', index = False)

4900

Adding ID column

In [28]:
nba_dob_df = pd.read_csv("/content/nba_dob.csv")

def get_player_id(name):
    player = players.find_players_by_full_name(name)
    if player:
        return player[0]['id']
    else:
        return None

# Add a new column for player IDs, applying the function to each player's name
nba_dob_df['id'] = nba_dob_df['Name'].apply(get_player_id)

# Display the updated DataFrame
nba_dob_df

Unnamed: 0,Name,Birthdate,id
0,Alaa Abdelnaby,1968-06-24T00:00:00,76001
1,Zaid Abdul-Aziz,1946-04-07T00:00:00,76002
2,Kareem Abdul-Jabbar,1947-04-16T00:00:00,76003
3,Mahmoud Abdul-Rauf,1969-03-09T00:00:00,51
4,Tariq Abdul-Wahad,1974-11-03T00:00:00,1505
...,...,...,...
4895,Ante Zizic,1997-01-04T00:00:00,1627790
4896,Jim Zoet,1953-12-30T00:00:00,78647
4897,Bill Zopf,1948-06-07T00:00:00,78648
4898,Ivica Zubac,1997-03-18T00:00:00,1627826


Filtering to only keep players with birthdays during the regular season

In [29]:
nba_dob_df['Birthdate'] = pd.to_datetime(nba_dob_df['Birthdate'])

# Getting month and day from their birthdate
nba_dob_df['Month'] = nba_dob_df['Birthdate'].dt.month
nba_dob_df['Day'] = nba_dob_df['Birthdate'].dt.day
# only in season birthdays
filtered_df = nba_dob_df[((nba_dob_df['Month'] == 4) & (nba_dob_df['Day'] < 14)) |
                 ((nba_dob_df['Month'] == 10) & (nba_dob_df['Day'] > 24)) |
                 (nba_dob_df['Month'] < 4) |
                 (nba_dob_df['Month'] > 10)]

filtered_df = filtered_df.drop(columns=['Month', 'Day'])

filtered_df

Unnamed: 0,Name,Birthdate,id
1,Zaid Abdul-Aziz,1946-04-07,76002
3,Mahmoud Abdul-Rauf,1969-03-09,51
4,Tariq Abdul-Wahad,1974-11-03,1505
5,Shareef Abdur-Rahim,1976-12-11,949
8,John Abramovic,1919-02-09,76007
...,...,...,...
4894,Paul Zipser,1994-02-18,1627835
4895,Ante Zizic,1997-01-04,1627790
4896,Jim Zoet,1953-12-30,78647
4898,Ivica Zubac,1997-03-18,1627826


In [31]:
filtered_df.to_csv("NBA_DOB_InSeason.csv", index = False)

Comparing LeBron stats after birthday compared to the game before their birthday

In [50]:
player_id = '2544'
season = '2020-21'

# game history
gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
gamelog_df = gamelog.get_data_frames()[0]

In [51]:
gamelog_df['GAME_DATE'] = pd.to_datetime(gamelog_df['GAME_DATE'])

# LeBron's birthday in the season
birthday = pd.Timestamp(year=2020, month=12, day=30)

# Find the game just before or on his birthday and the first game after. I say that NBA players party after their game if there is one on their bday.
before_birthday = gamelog_df[gamelog_df['GAME_DATE'] <= birthday].iloc[-1]
after_birthday = gamelog_df[gamelog_df['GAME_DATE'] > birthday].iloc[0]


In [53]:
print(f"Before Birthday: {before_birthday['PTS']} points")
print(f"After Birthday: {after_birthday['PTS']} points")

Before Birthday: 22 points
After Birthday: 25 points


In [60]:
LeBron_seasons = ['2003-04', '2004-05', '2005-06', '2006-07', '2007-08',
           '2008-09', '2009-10', '2010-11', '2011-12', '2012-13',
           '2013-14', '2014-15', '2015-16', '2016-17', '2017-18',
           '2018-19', '2019-20', '2020-21', '2021-22', '2022-23']

In [61]:
# LeBron info
player_id = '2544'
birthday_month = 12
birthday_day = 30

stats_before = []
stats_after = []

for season in LeBron_seasons:
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
    gamelog_df = gamelog.get_data_frames()[0]
    gamelog_df['GAME_DATE'] = pd.to_datetime(gamelog_df['GAME_DATE'])
# split to get difference in each season then average it all
    season_years = season.split('-')
    season_start_year = int(season_years[0])
    season_end_year = int(season_years[1])
    birthday_year = season_start_year if birthday_month > 9 else season_end_year
    birthday = pd.Timestamp(year=birthday_year, month=birthday_month, day=birthday_day)

    before_birthday = gamelog_df[gamelog_df['GAME_DATE'] <= birthday].iloc[-1]
    after_birthday = gamelog_df[gamelog_df['GAME_DATE'] > birthday].iloc[0]

    stats_before.append(before_birthday['PTS'])
    stats_after.append(after_birthday['PTS'])

avg_pts_before = sum(stats_before) / len(stats_before)
avg_pts_after = sum(stats_after) / len(stats_after)

print(f"Average points scored before birthday: {avg_pts_before}")
print(f"Average points scored after birthday: {avg_pts_after}")

Average points scored before birthday: 25.6
Average points scored after birthday: 26.25
