# Machine Learning Pipeline

#

In [151]:
import pandas as pd
import numpy as np
from pathlib import Path
import os

import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import teamestimatedmetrics
from nba_api.stats.endpoints import playernextngames
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

#

#

### Return a list of dictionnary

In [4]:
def get_active_players():
    active_players = players.get_active_players()
    return active_players

active_players = get_active_players()
print(active_players)

[{'id': 1630173, 'full_name': 'Precious Achiuwa', 'first_name': 'Precious', 'last_name': 'Achiuwa', 'is_active': True}, {'id': 203500, 'full_name': 'Steven Adams', 'first_name': 'Steven', 'last_name': 'Adams', 'is_active': True}, {'id': 1628389, 'full_name': 'Bam Adebayo', 'first_name': 'Bam', 'last_name': 'Adebayo', 'is_active': True}, {'id': 1630534, 'full_name': 'Ochai Agbaji', 'first_name': 'Ochai', 'last_name': 'Agbaji', 'is_active': True}, {'id': 1630583, 'full_name': 'Santi Aldama', 'first_name': 'Santi', 'last_name': 'Aldama', 'is_active': True}, {'id': 1629638, 'full_name': 'Nickeil Alexander-Walker', 'first_name': 'Nickeil', 'last_name': 'Alexander-Walker', 'is_active': True}, {'id': 1628960, 'full_name': 'Grayson Allen', 'first_name': 'Grayson', 'last_name': 'Allen', 'is_active': True}, {'id': 1628386, 'full_name': 'Jarrett Allen', 'first_name': 'Jarrett', 'last_name': 'Allen', 'is_active': True}, {'id': 1630631, 'full_name': 'Jose Alvarado', 'first_name': 'Jose', 'last_name

In [5]:
player_id = [d.get('id') for d in active_players]
print(player_id)

[1630173, 203500, 1628389, 1630534, 1630583, 1629638, 1628960, 1628386, 1630631, 203937, 203507, 203648, 1630175, 1628384, 1627853, 1630166, 1629028, 1628962, 1628963, 1641735, 1631116, 1630163, 1628366, 1628964, 1631094, 1630217, 1630625, 1631230, 203084, 1630567, 1629628, 1629646, 1641734, 1628966, 201587, 203078, 1627736, 1630699, 1641777, 202722, 201976, 1630180, 1629048, 1641931, 202687, 1641710, 1641778, 203992, 202711, 1629626, 1629716, 1626164, 1630527, 1628449, 1630547, 1631103, 1631128, 1628969, 1628970, 1629052, 1627763, 1629717, 1628415, 1628971, 1627759, 1631112, 1641738, 1629650, 1630535, 1629718, 1628972, 1628973, 1628418, 1641723, 203493, 202692, 1630215, 202710, 1631219, 1631288, 203484, 1641739, 1628427, 203991, 1628975, 1628976, 1627936, 1630658, 1630608, 1630577, 1631108, 1630528, 1631321, 1641740, 1629634, 203903, 1629651, 1641730, 1629599, 1628381, 1628380, 201144, 1626192, 1641731, 1641741, 203496, 1628470, 203109, 1630595, 203552, 201939, 1630700, 203076, 163109

In [9]:
def fetch_nba_player_stats(season, existing_player_ids=[]):
    all_players_stats_list = []

    for player_id in existing_player_ids:
        try:
            gamelog = playergamelog.PlayerGameLog(player_id=str(player_id), season=season)
            df = gamelog.get_data_frames()[0]
            all_players_stats_list.append(df)
        except Exception as e:
            print(f"Erreur lors de la récupération des données pour le joueur (ID: {player_id}): {e}")

    if all_players_stats_list:
        all_players_stats = pd.concat(all_players_stats_list, ignore_index=True)
        return all_players_stats
    else:
        return pd.DataFrame() 

In [65]:
def get_perf_players_each_season(list_of_seasons):
    active_players = players.get_active_players()
    player_ids = [d.get('id') for d in active_players]
    
    all_seasons_stats_list = []

    for season in list_of_seasons:
        season_stats = fetch_nba_player_stats(season, player_ids)
        season_stats['Season'] = season  # Ajoute une colonne pour identifier la saison des stats
        all_seasons_stats_list.append(season_stats)

    # Concaténer tous les DataFrames de chaque saison en un seul DataFrame
    all_seasons_stats = pd.concat(all_seasons_stats_list, ignore_index=True)
    
    return all_seasons_stats

# Liste des saisons pour lesquelles récupérer les stats
list_of_seasons = ['2022-23', "2023-24"]

In [66]:
all_players_stats= get_perf_players_each_season(list_of_seasons)

  all_players_stats = pd.concat(all_players_stats_list, ignore_index=True)
  all_players_stats = pd.concat(all_players_stats_list, ignore_index=True)


In [67]:
all_players_stats.head()

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Season
0,22022,1630173,22201221,"APR 09, 2023",TOR vs. MIL,W,28,6,11,0.545,...,13,1,2,0,2,2,14,9,1,2022-23
1,22022,1630173,22201206,"APR 07, 2023",TOR @ BOS,L,24,6,10,0.6,...,7,0,1,0,0,3,16,-9,1,2022-23
2,22022,1630173,22201192,"APR 05, 2023",TOR @ BOS,L,20,7,11,0.636,...,9,0,0,1,1,2,16,-4,1,2022-23
3,22022,1630173,22201176,"APR 04, 2023",TOR @ CHA,W,15,4,8,0.5,...,4,1,1,0,0,1,12,9,1,2022-23
4,22022,1630173,22201163,"APR 02, 2023",TOR @ CHA,W,13,4,7,0.571,...,3,0,0,0,1,0,8,6,1,2022-23


In [68]:
all_players_stats.shape

(39815, 28)

In [69]:
all_players_stats['GAME_DATE'] = pd.to_datetime(all_players_stats['GAME_DATE'], format='%b %d, %Y')

col_type = all_players_stats["GAME_DATE"].dtypes
print(col_type)

datetime64[ns]


In [70]:
player_id_full_name = [{'Player_ID': d.get('id'), 'full_name': d.get('full_name')} for d in active_players]

In [71]:
player_id_full_name_df = pd.DataFrame(player_id_full_name)
player_id_full_name_df.head()

Unnamed: 0,Player_ID,full_name
0,1630173,Precious Achiuwa
1,203500,Steven Adams
2,1628389,Bam Adebayo
3,1630534,Ochai Agbaji
4,1630583,Santi Aldama


In [73]:
stat_player_game_with_names = pd.merge(all_players_stats, player_id_full_name_df, on='Player_ID', how='inner')

columns_order = [col for col in player_id_full_name_df.columns if col != 'id'] + [col for col in all_players_stats.columns if col != 'id']
stat_player_game_with_names = stat_player_game_with_names[columns_order]

stat_player_game_with_names.tail(10)

Unnamed: 0,Player_ID,full_name,SEASON_ID,Player_ID.1,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Season
39805,1627826,Ivica Zubac,22023,1627826,22300024,2023-11-14,LAC @ DEN,L,26,2,...,13,3,0,1,0,4,9,-3,1,2023-24
39806,1627826,Ivica Zubac,22023,1627826,22300179,2023-11-12,LAC vs. MEM,L,23,2,...,6,1,1,0,1,3,6,-6,1,2023-24
39807,1627826,Ivica Zubac,22023,1627826,22300014,2023-11-10,LAC @ DAL,L,22,5,...,8,1,0,2,1,3,12,1,1,2023-24
39808,1627826,Ivica Zubac,22023,1627826,22300160,2023-11-08,LAC @ BKN,L,29,5,...,10,1,0,3,1,5,10,-11,1,2023-24
39809,1627826,Ivica Zubac,22023,1627826,22300151,2023-11-06,LAC @ NYK,L,30,3,...,7,0,1,2,5,3,8,-18,1,2023-24
39810,1627826,Ivica Zubac,22023,1627826,22300127,2023-11-01,LAC @ LAL,L,29,3,...,9,0,0,0,1,5,7,-6,1,2023-24
39811,1627826,Ivica Zubac,22023,1627826,22300114,2023-10-31,LAC vs. ORL,W,23,4,...,9,1,0,1,1,1,9,19,1,2023-24
39812,1627826,Ivica Zubac,22023,1627826,22300099,2023-10-29,LAC vs. SAS,W,22,6,...,8,0,0,1,1,4,16,23,1,2023-24
39813,1627826,Ivica Zubac,22023,1627826,22300085,2023-10-27,LAC @ UTA,L,19,3,...,2,0,0,2,1,2,6,1,1,2023-24
39814,1627826,Ivica Zubac,22023,1627826,22300074,2023-10-25,LAC vs. POR,W,26,8,...,12,0,0,4,2,3,20,25,1,2023-24


In [75]:
stat_player_game_with_names.columns

Index(['Player_ID', 'full_name', 'SEASON_ID', 'Player_ID', 'Game_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE',
       'Season'],
      dtype='object')

In [89]:
stat_player_game_with_names.head()

Unnamed: 0,Player_ID,full_name,SEASON_ID,Player_ID.1,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Season
0,1630173,Precious Achiuwa,22022,1630173,22201221,2023-04-09,TOR vs. MIL,W,28,6,...,13,1,2,0,2,2,14,9,1,2022-23
1,1630173,Precious Achiuwa,22022,1630173,22201206,2023-04-07,TOR @ BOS,L,24,6,...,7,0,1,0,0,3,16,-9,1,2022-23
2,1630173,Precious Achiuwa,22022,1630173,22201192,2023-04-05,TOR @ BOS,L,20,7,...,9,0,0,1,1,2,16,-4,1,2022-23
3,1630173,Precious Achiuwa,22022,1630173,22201176,2023-04-04,TOR @ CHA,W,15,4,...,4,1,1,0,0,1,12,9,1,2022-23
4,1630173,Precious Achiuwa,22022,1630173,22201163,2023-04-02,TOR @ CHA,W,13,4,...,3,0,0,0,1,0,8,6,1,2022-23


#

#

In [115]:
all_players_stats_1 = pd.read_csv("../dataset/player_stats_all_2024-02-09 17-06-12.csv")

In [116]:
all_players_stats_1.head()

Unnamed: 0,Player_ID,full_name,SEASON_ID,Player_ID.1,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Season
0,203500,Steven Adams,22019,203500,21901317,2020-08-14,OKC @ LAC,L,6,0,...,4,0,0,0,1,0,0,2,1,2019-20
1,203500,Steven Adams,22019,203500,21901306,2020-08-12,OKC vs. MIA,W,20,4,...,8,0,1,0,0,0,8,-7,1,2019-20
2,203500,Steven Adams,22019,203500,21901265,2020-08-05,OKC @ LAL,W,28,7,...,7,2,0,0,1,2,18,13,1,2019-20
3,203500,Steven Adams,22019,203500,21901251,2020-08-03,OKC vs. DEN,L,34,3,...,10,1,0,0,2,6,9,-18,1,2019-20
4,203500,Steven Adams,22019,203500,21901240,2020-08-01,OKC vs. UTA,W,28,7,...,11,2,0,2,4,3,16,15,1,2019-20


In [117]:
def concat_dataframe(df1, df2):

    df_concat = pd.concat([df1, df2], axis=0)
    return df_concat

In [None]:
all_players_stats_full = concat_dataframe(all_players_stats_1, stat_player_game_with_names)

In [119]:
print("shape = ", all_players_stats_full.shape)
all_players_stats_full.head()

shape =  (89923, 30)


Unnamed: 0,full_name,SEASON_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Season,Player_ID,Player_ID.1
0,Steven Adams,22019,21901317,2020-08-14,OKC @ LAC,L,6,0,2,0.0,...,0,0,1,0,0,2,1,2019-20,203500,203500
1,Steven Adams,22019,21901306,2020-08-12,OKC vs. MIA,W,20,4,7,0.571,...,1,0,0,0,8,-7,1,2019-20,203500,203500
2,Steven Adams,22019,21901265,2020-08-05,OKC @ LAL,W,28,7,10,0.7,...,0,0,1,2,18,13,1,2019-20,203500,203500
3,Steven Adams,22019,21901251,2020-08-03,OKC vs. DEN,L,34,3,6,0.5,...,0,0,2,6,9,-18,1,2019-20,203500,203500
4,Steven Adams,22019,21901240,2020-08-01,OKC vs. UTA,W,28,7,10,0.7,...,0,2,4,3,16,15,1,2019-20,203500,203500


In [122]:
columns_to_keep = all_players_stats_full.columns.drop('Player_ID', 1).tolist() + ['Player_ID']
# Sélectionnez ces colonnes, ce qui aura pour effet de ne garder que la première occurrence de "age"
all_players_stats_full = all_players_stats_full.loc[:, columns_to_keep]

In [124]:
all_players_stats_full = all_players_stats_full.iloc[:, :29]

In [125]:
all_players_stats_full.head()

Unnamed: 0,full_name,SEASON_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Season,Player_ID
0,Steven Adams,22019,21901317,2020-08-14,OKC @ LAC,L,6,0,2,0.0,...,0,0,0,1,0,0,2,1,2019-20,203500
1,Steven Adams,22019,21901306,2020-08-12,OKC vs. MIA,W,20,4,7,0.571,...,0,1,0,0,0,8,-7,1,2019-20,203500
2,Steven Adams,22019,21901265,2020-08-05,OKC @ LAL,W,28,7,10,0.7,...,2,0,0,1,2,18,13,1,2019-20,203500
3,Steven Adams,22019,21901251,2020-08-03,OKC vs. DEN,L,34,3,6,0.5,...,1,0,0,2,6,9,-18,1,2019-20,203500
4,Steven Adams,22019,21901240,2020-08-01,OKC vs. UTA,W,28,7,10,0.7,...,2,0,2,4,3,16,15,1,2019-20,203500


In [126]:
new_order = ["Player_ID"] + [col for col in all_players_stats_full.columns if col != "Player_ID"]

all_players_stats_full = all_players_stats_full[new_order]

In [134]:
all_players_stats_full.tail()

Unnamed: 0,Player_ID,full_name,SEASON_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,Season
39810,1627826,Ivica Zubac,22023,22300127,2023-11-01 00:00:00,LAC @ LAL,L,29,3,8,...,7,9,0,0,0,1,5,7,-6,2023-24
39811,1627826,Ivica Zubac,22023,22300114,2023-10-31 00:00:00,LAC vs. ORL,W,23,4,8,...,7,9,1,0,1,1,1,9,19,2023-24
39812,1627826,Ivica Zubac,22023,22300099,2023-10-29 00:00:00,LAC vs. SAS,W,22,6,7,...,6,8,0,0,1,1,4,16,23,2023-24
39813,1627826,Ivica Zubac,22023,22300085,2023-10-27 00:00:00,LAC @ UTA,L,19,3,6,...,2,2,0,0,2,1,2,6,1,2023-24
39814,1627826,Ivica Zubac,22023,22300074,2023-10-25 00:00:00,LAC vs. POR,W,26,8,10,...,7,12,0,0,4,2,3,20,25,2023-24


In [128]:
all_players_stats_full.drop("VIDEO_AVAILABLE", axis = 1, inplace=True)

In [135]:
all_players_stats_full['GAME_DATE'] = pd.to_datetime(all_players_stats_full['GAME_DATE'])

col_type = all_players_stats_full["GAME_DATE"].dtypes
print(col_type)

datetime64[ns]


In [139]:
all_players_stats_full = all_players_stats_full.sort_values(by='GAME_DATE', ascending=False)

In [None]:
all_players_stats_full.reset_index()

In [147]:
all_players_stats_full.head()

Unnamed: 0,Player_ID,full_name,SEASON_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,Season
33919,203488,Mike Muscala,22023,22300741,2024-02-08,DET @ POR,W,17,2,5,...,1,3,2,0,1,1,0,6,-15,2023-24
27256,1630245,Ayo Dosunmu,22023,22300737,2024-02-08,CHI @ MEM,W,27,4,5,...,0,0,2,0,0,2,2,10,-1,2023-24
30519,1641748,Andre Jackson Jr.,22023,22300738,2024-02-08,MIL vs. MIN,L,18,0,2,...,1,1,3,1,0,1,0,0,-11,2023-24
30922,1630553,Keon Johnson,22023,22300735,2024-02-08,BKN vs. CLE,L,14,1,3,...,0,2,0,2,0,1,2,3,-5,2023-24
31310,1631117,Walker Kessler,22023,22300739,2024-02-08,UTA @ PHX,L,27,4,6,...,4,9,2,2,4,0,1,10,-5,2023-24


In [146]:
all_players_stats_full.dtypes

Player_ID             object
full_name             object
SEASON_ID             object
Game_ID               object
GAME_DATE     datetime64[ns]
MATCHUP               object
WL                    object
MIN                   object
FGM                   object
FGA                   object
FG_PCT               float64
FG3M                  object
FG3A                  object
FG3_PCT              float64
FTM                   object
FTA                   object
FT_PCT               float64
OREB                  object
DREB                  object
REB                   object
AST                   object
STL                   object
BLK                   object
TOV                   object
PF                    object
PTS                   object
PLUS_MINUS            object
Season                object
dtype: object

# Drop unseful data

#

#
#
#
#
#
#
#
#
#

#

# Next player games

In [154]:
def fetch_next_player_games(existing_player_ids=[]):
    all_players_stats_list = []

    for player_id in existing_player_ids:
        try:
            # Récupération des données de jeux suivants pour le joueur
            gamelog = playernextngames.PlayerNextNGames(player_id=str(player_id))
            df = gamelog.get_data_frames()[0]
            
            # Ajout de l'ID du joueur dans une nouvelle colonne du DataFrame
            df['Player_ID'] = player_id  # Ajoute l'ID du joueur dans une nouvelle colonne
            
            all_players_stats_list.append(df)
        except Exception as e:
            print(f"Erreur lors de la récupération des données pour le joueur (ID: {player_id}): {e}")

    if all_players_stats_list:
        all_players_stats = pd.concat(all_players_stats_list, ignore_index=True)
        return all_players_stats
    else:
        return pd.DataFrame()

In [158]:
player_id = [d.get('id') for d in active_players]

next_players_games = fetch_next_player_games(player_id)

Erreur lors de la récupération des données pour le joueur (ID: 1641777): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 202687): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 1629716): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 1629717): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 1631219): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 1630528): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 1641731): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 1630700): Expecting value: line 1 column 1 (char 0)
Erreur lors de la récupération des données pour le joueur (ID: 1641926): Expectin

In [159]:
player_id_full_name = [{'Player_ID': d.get('id'), 'full_name': d.get('full_name')} for d in active_players]
player_id_full_name_df = pd.DataFrame(player_id_full_name)

In [162]:
next_players_games.head()

Unnamed: 0,GAME_ID,GAME_DATE,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_NAME,VISITOR_TEAM_NAME,HOME_TEAM_ABBREVIATION,VISITOR_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_NICKNAME,GAME_TIME,HOME_WL,VISITOR_WL,Player_ID
0,22300755,"FEB 10, 2024",1610612752,1610612754,New York,Indiana,NYK,IND,Knicks,Pacers,07:30 PM,33-19,29-24,1630173
1,22300765,"FEB 12, 2024",1610612745,1610612752,Houston,New York,HOU,NYK,Rockets,Knicks,08:00 PM,23-27,33-19,1630173
2,22300778,"FEB 14, 2024",1610612753,1610612752,Orlando,New York,ORL,NYK,Magic,Knicks,07:00 PM,28-24,33-19,1630173
3,22300794,"FEB 22, 2024",1610612755,1610612752,Philadelphia,New York,PHI,NYK,76ers,Knicks,07:00 PM,30-20,33-19,1630173
4,22300816,"FEB 24, 2024",1610612752,1610612738,New York,Boston,NYK,BOS,Knicks,Celtics,08:30 PM,33-19,39-12,1630173


In [174]:
stat_player_game_with_names = pd.merge(next_players_games, player_id_full_name_df, on='Player_ID', how='right')

In [175]:
stat_player_game_with_names.head()

Unnamed: 0,GAME_ID,GAME_DATE,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_NAME,VISITOR_TEAM_NAME,HOME_TEAM_ABBREVIATION,VISITOR_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_NICKNAME,GAME_TIME,HOME_WL,VISITOR_WL,Player_ID,full_name
0,22300755,"FEB 10, 2024",1610613000.0,1610613000.0,New York,Indiana,NYK,IND,Knicks,Pacers,07:30 PM,33-19,29-24,1630173,Precious Achiuwa
1,22300765,"FEB 12, 2024",1610613000.0,1610613000.0,Houston,New York,HOU,NYK,Rockets,Knicks,08:00 PM,23-27,33-19,1630173,Precious Achiuwa
2,22300778,"FEB 14, 2024",1610613000.0,1610613000.0,Orlando,New York,ORL,NYK,Magic,Knicks,07:00 PM,28-24,33-19,1630173,Precious Achiuwa
3,22300794,"FEB 22, 2024",1610613000.0,1610613000.0,Philadelphia,New York,PHI,NYK,76ers,Knicks,07:00 PM,30-20,33-19,1630173,Precious Achiuwa
4,22300816,"FEB 24, 2024",1610613000.0,1610613000.0,New York,Boston,NYK,BOS,Knicks,Celtics,08:30 PM,33-19,39-12,1630173,Precious Achiuwa


In [176]:
# Supposons que df est votre DataFrame

# Identifier les deux dernières colonnes
last_two_columns = stat_player_game_with_names.columns[-2:]

# Identifier les autres colonnes
other_columns = stat_player_game_with_names.columns[:-2]

# Créer une nouvelle liste d'ordre des colonnes
new_column_order = list(last_two_columns) + list(other_columns)

# Réorganiser le DataFrame avec le nouvel ordre des colonnes
stat_player_game_with_names = stat_player_game_with_names[new_column_order]

# Afficher le DataFrame réorganisé
stat_player_game_with_names.head()

Unnamed: 0,Player_ID,full_name,GAME_ID,GAME_DATE,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_NAME,VISITOR_TEAM_NAME,HOME_TEAM_ABBREVIATION,VISITOR_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_NICKNAME,GAME_TIME,HOME_WL,VISITOR_WL
0,1630173,Precious Achiuwa,22300755,"FEB 10, 2024",1610613000.0,1610613000.0,New York,Indiana,NYK,IND,Knicks,Pacers,07:30 PM,33-19,29-24
1,1630173,Precious Achiuwa,22300765,"FEB 12, 2024",1610613000.0,1610613000.0,Houston,New York,HOU,NYK,Rockets,Knicks,08:00 PM,23-27,33-19
2,1630173,Precious Achiuwa,22300778,"FEB 14, 2024",1610613000.0,1610613000.0,Orlando,New York,ORL,NYK,Magic,Knicks,07:00 PM,28-24,33-19
3,1630173,Precious Achiuwa,22300794,"FEB 22, 2024",1610613000.0,1610613000.0,Philadelphia,New York,PHI,NYK,76ers,Knicks,07:00 PM,30-20,33-19
4,1630173,Precious Achiuwa,22300816,"FEB 24, 2024",1610613000.0,1610613000.0,New York,Boston,NYK,BOS,Knicks,Celtics,08:30 PM,33-19,39-12


# Store Curated Data

In [58]:
def get_now(for_files:bool=False):
    """Return today's date.

    Args:
        for_files (bool): if True, return now's date for files (without - between them)

    """
    now = datetime.now()
    if for_files:
        return now.strftime("%Y%m%d__%H%M%S")
    return now.strftime("%Y-%m-%d %H-%M-%S")

In [None]:
def get_today():
    now = datetime.now()
    return now.strftime("%Y-%m-%d")

In [177]:
def store_dataframe_next_games(df):
    today = get_now()
    file_path = f"../dataset/player_next_games_{today}.csv"
    try:
        df.to_csv(file_path, index=False)
        print(f"DataFrame sauvegardé avec succès en tant que fichier CSV à : {file_path}")
    except Exception as e:
        print(f"Erreur lors de la sauvegarde du DataFrame : {e}")

In [61]:
def store_dataframe_player_stat(df):
    today = get_now()
    file_path = f"../dataset/player_stats_all_{today}.csv"
    try:
        df.to_csv(file_path, index=False)
        print(f"DataFrame sauvegardé avec succès en tant que fichier CSV à : {file_path}")
    except Exception as e:
        print(f"Erreur lors de la sauvegarde du DataFrame : {e}")

In [178]:
store_dataframe_next_games(stat_player_game_with_names)

DataFrame sauvegardé avec succès en tant que fichier CSV à : ../dataset/player_next_games_2024-02-09 19-18-07.csv


In [None]:
def concat_dataframe(df1, df2):
    df_concatene = pd.concat([df1, df2], axis=0)
    return df_concatene

In [None]:
all_players_stats_1 = pd.DataFrame("dataset/player_stats_all_2024-02-09 17-06-12.csv")