In [1]:
from nba_api.stats.static import players
import pandas as pd

# Tous les joueurs actifs
players_list = players.get_active_players()
df_players = pd.DataFrame(players_list)
df_players.to_csv('players.csv', index=False)

print(f"✅ players.csv sauvegardé : {df_players.shape[0]} joueurs")


✅ players.csv sauvegardé : 572 joueurs


In [11]:
import pandas as pd
import time
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from requests.exceptions import ReadTimeout

# Saison 2020-21 à 2023-24
seasons = [f"{y}-{str(y+1)[-2:]}" for y in range(2020, 2024)]
players_list = players.get_active_players()
sleep_time = 2.5
all_logs = []

def safe_get_log(player_id, season, retries=3):
    for i in range(retries):
        try:
            logs = playergamelog.PlayerGameLog(player_id=player_id, season=season, season_type_all_star='Regular Season')
            return logs.get_data_frames()[0]
        except ReadTimeout:
            time.sleep(5 * (i + 1))
    return pd.DataFrame()

for season in seasons:
    for i, p in enumerate(players_list):
        player_id = p['id']
        player_name = p['full_name']
        df = safe_get_log(player_id, season)
        if not df.empty:
            df['player_id'] = player_id
            df['player_name'] = player_name
            df['season'] = season
            all_logs.append(df)
            print(f"✅ {player_name} - {season} : {df.shape[0]} lignes")
        time.sleep(sleep_time)

df_stats = pd.concat(all_logs, ignore_index=True)
df_stats.to_csv('player_game_stats.csv', index=False)
print(f"\n✅ player_game_stats.csv sauvegardé : {df_stats.shape}")


✅ Precious Achiuwa - 2020-21 : 61 lignes
✅ Steven Adams - 2020-21 : 58 lignes
✅ Bam Adebayo - 2020-21 : 64 lignes
✅ Nickeil Alexander-Walker - 2020-21 : 46 lignes
✅ Grayson Allen - 2020-21 : 50 lignes
✅ Jarrett Allen - 2020-21 : 63 lignes
✅ Kyle Anderson - 2020-21 : 69 lignes
✅ Giannis Antetokounmpo - 2020-21 : 61 lignes
✅ Cole Anthony - 2020-21 : 47 lignes
✅ OG Anunoby - 2020-21 : 43 lignes
✅ Deni Avdija - 2020-21 : 54 lignes
✅ Deandre Ayton - 2020-21 : 69 lignes
✅ Marvin Bagley III - 2020-21 : 43 lignes
✅ LaMelo Ball - 2020-21 : 51 lignes
✅ Lonzo Ball - 2020-21 : 55 lignes
✅ Mo Bamba - 2020-21 : 46 lignes
✅ Desmond Bane - 2020-21 : 68 lignes
✅ Harrison Barnes - 2020-21 : 58 lignes
✅ RJ Barrett - 2020-21 : 72 lignes
✅ Nicolas Batum - 2020-21 : 67 lignes
✅ Bradley Beal - 2020-21 : 60 lignes
✅ Malik Beasley - 2020-21 : 37 lignes
✅ Saddiq Bey - 2020-21 : 70 lignes
✅ Goga Bitadze - 2020-21 : 45 lignes
✅ Bismack Biyombo - 2020-21 : 66 lignes
✅ Bogdan Bogdanović - 2020-21 : 44 lignes
✅ Bol 

In [4]:
print(df_game_logs.columns)
print(df_game_logs.head())


Index(['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE', 'PLAYER_NAME'],
      dtype='object')
  SEASON_ID  Player_ID     Game_ID     GAME_DATE      MATCHUP WL  MIN  FGM  \
0     22023       2544  0022301195  APR 14, 2024    LAL @ NOP  W   38   11   
1     22023       2544  0022301177  APR 12, 2024    LAL @ MEM  W   41   13   
2     22023       2544  0022301155  APR 09, 2024  LAL vs. GSW  L   36   14   
3     22023       2544  0022301127  APR 06, 2024  LAL vs. CLE  W   36   10   
4     22023       2544  0022301103  APR 03, 2024    LAL @ WAS  W   36    9   

   FGA  FG_PCT  ...  REB  AST  STL  BLK  TOV  PF  PTS  PLUS_MINUS  \
0   20   0.550  ...   11   17    5    1    4   0   28          19   
1   20   0.650  ...    9    5    2    0    8   1   37          -3   
2   22   0.6

In [4]:
import pandas as pd

df_stats = pd.read_csv('player_game_stats.csv')
df_games = df_stats[['Game_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'season']].drop_duplicates()
df_games.rename(columns={
    'Game_ID': 'game_id',
    'GAME_DATE': 'game_date',
    'MATCHUP': 'matchup',
    'WL': 'win_loss',
    'SEASON': 'season'
}, inplace=True)
df_games.to_csv('games.csv', index=False)
print(f"✅ games.csv sauvegardé : {df_games.shape}")


✅ games.csv sauvegardé : (9538, 5)


In [13]:
# Charger les IDs
df_stats = pd.read_csv("player_game_stats.csv")
unique_ids = df_stats[['player_id', 'Game_ID']].drop_duplicates()
game_ids = unique_ids['Game_ID'].astype(str).unique()


In [14]:
import pandas as pd
import time
from nba_api.stats.endpoints import boxscoretraditionalv2
from requests.exceptions import ReadTimeout

sleep_time = 2.5
retries = 3

# Charger les combinaisons uniques de player_id et game_id
df_stats = pd.read_csv('player_game_stats.csv')
unique_ids = df_stats[['player_id', 'Game_ID']].drop_duplicates()
boxscore_dict = {}

# Stocker les boxscores
rows = []

def get_box(game_id, retries=3):
    for i in range(retries):
        try:
            bs = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
            dataframes = bs.get_data_frames()
            if dataframes and not dataframes[0].empty:
                return dataframes[0]
            else:
                print(f"⚠️ Match {game_id} — Aucun boxscore retourné (réponse vide)")
                return pd.DataFrame()
        except ReadTimeout:
            print(f"⏱️ Timeout pour {game_id}, tentative {i+1}/{retries}")
            time.sleep(5 * (i + 1))
        except Exception as e:
            print(f"❌ Erreur inattendue pour {game_id} : {e}")
            return pd.DataFrame()
    return pd.DataFrame()

# Optimiser en récupérant chaque game_id une seule fois
game_ids = unique_ids['Game_ID'].unique()

for gid in game_ids:
    df_box = get_box(gid)
    if not df_box.empty:
        for _, row in df_box.iterrows():
            rows.append({
                'game_id': gid,
                'player_id': row['PLAYER_ID'],
                'plus_minus': row['PLUS_MINUS'],
                'team_id': row['TEAM_ID'],
                'team_abbreviation': row['TEAM_ABBREVIATION']
            })
    print(f"📦 Match {gid} traité")
    time.sleep(sleep_time)

df_boxscores = pd.DataFrame(rows)
df_boxscores.to_csv('boxscores.csv', index=False)
print(f"\n✅ boxscores.csv sauvegardé : {df_boxscores.shape}")


❌ Erreur inattendue pour 22001069 : 'resultSet'
📦 Match 22001069 traité
❌ Erreur inattendue pour 22001062 : 'resultSet'
📦 Match 22001062 traité
❌ Erreur inattendue pour 22001050 : 'resultSet'
📦 Match 22001050 traité
❌ Erreur inattendue pour 22000986 : 'resultSet'
📦 Match 22000986 traité
❌ Erreur inattendue pour 22000969 : 'resultSet'
📦 Match 22000969 traité
❌ Erreur inattendue pour 22000960 : 'resultSet'
📦 Match 22000960 traité
❌ Erreur inattendue pour 22000888 : 'resultSet'
📦 Match 22000888 traité
❌ Erreur inattendue pour 22000871 : 'resultSet'
📦 Match 22000871 traité
❌ Erreur inattendue pour 22000852 : 'resultSet'
📦 Match 22000852 traité
❌ Erreur inattendue pour 22000838 : 'resultSet'
📦 Match 22000838 traité
❌ Erreur inattendue pour 22000827 : 'resultSet'
📦 Match 22000827 traité
❌ Erreur inattendue pour 22000814 : 'resultSet'
📦 Match 22000814 traité
❌ Erreur inattendue pour 22000784 : 'resultSet'
📦 Match 22000784 traité
❌ Erreur inattendue pour 22000773 : 'resultSet'
📦 Match 22000773