In [667]:
# Importation des bibliothèques
import numpy as np
import pandas as pd

# Importation de l'API
from nba_api.stats.endpoints import leaguegamefinder

In [668]:
# Récupération du jeu de données
path = 'nba odds 2021-22.xlsx'
odds_df = pd.read_excel(path, usecols = ['Date', 'Team', 'ML'])
odds_df.tail()

Unnamed: 0,Date,Team,ML
2641,610,Boston,-165
2642,613,Boston,145
2643,613,Golden State,-165
2644,616,Golden State,155
2645,616,Boston,-175


In [669]:
# Suppression des espaces dans les noms des équipes
odds_df.Team = odds_df.Team.str.replace(' ', '')
odds_df.tail()

Unnamed: 0,Date,Team,ML
2641,610,Boston,-165
2642,613,Boston,145
2643,613,GoldenState,-165
2644,616,GoldenState,155
2645,616,Boston,-175


In [670]:
# Création d'un dictionnaire pour remplacer les équipes par leurs noms officiels
team_name = {'Detroit': 'DetroitPistons', 'Washington': 'WashingtonWizards',
            'Dallas': 'DallasMavericks', 'Phoenix': 'PhoenixSuns',
            'NewOrleans': 'NewOrleansPelicans', 'LAClippers': 'LAClippers',
            'OklahomaCity': 'OklahomaCityThunder', 'GoldenState': 'GoldenStateWarriors',
            'Philadelphia': 'Philadelphia76ers', 'Indiana': 'IndianaPacers',
            'Miami': 'MiamiHeat', 'Toronto': 'TorontoRaptors',
            'Orlando': 'OrlandoMagic', 'NewYork': 'NewYorkKnicks',
            'Boston': 'BostonCeltics', 'Chicago': 'ChicagoBulls',
            'SanAntonio': 'SanAntonioSpurs', 'Portland': 'PortlandTrailBlazers',
            'Denver': 'DenverNuggets', 'Memphis': 'MemphisGrizzlies',
            'Brooklyn': 'BrooklynNets', 'Houston': 'HoustonRockets',
            'Utah': 'UtahJazz', 'Minnesota':'MinnesotaTimberwolves',
            'LALakers': 'LosAngelesLakers', 'Atlanta': 'AtlantaHawks',
            'Charlotte': 'CharlotteHornets', 'Cleveland': 'ClevelandCavaliers',
            'Sacramento': 'SacramentoKings', 'Milwaukee': 'MilwaukeeBucks'}
odds_df.replace({'Team': team_name}, inplace = True)

In [671]:
def format(date):
    'Fonction pour changer le format de l\'heure de notre dataframe'
    'Elle prend notre dataframe en entrée'
    'Elle nous retourne une date qui respecte le format AAAAMMJJ'
    
    year = path.split(' ')[2].split('.')[0].split('-')[0]
    
    if odds_df['Date'][0] <= date:
        return year + str(date)
    else:
        return str(int(year) + 1) + '0' + str(date)

In [672]:
# Formattage des données pour la création de notre identifiant
odds_df['Date'] = list(map(format, odds_df['Date']))
odds_df['ID'] = list(map(str, odds_df['Date'])) + odds_df['Team']
odds_df.tail()

Unnamed: 0,Date,Team,ML,ID
2641,20220610,BostonCeltics,-165,20220610BostonCeltics
2642,20220613,BostonCeltics,145,20220613BostonCeltics
2643,20220613,GoldenStateWarriors,-165,20220613GoldenStateWarriors
2644,20220616,GoldenStateWarriors,155,20220616GoldenStateWarriors
2645,20220616,BostonCeltics,-175,20220616BostonCeltics


In [673]:
def change_odds(x):
    'Fonction pour changer le format des cotes de notre dataframe'
    'Elle prend une cote américaine en entrée'
    'Elle nous retourne une cote qui correspond à la cote française'

    return round(1 + x / 100 , 2) if x > 0 else round(1 + 100 / np.abs(x),2)

In [674]:
# Modification du format des cotes
odds_df['ML'] = odds_df['ML'].astype(int)
odds_df['ODDS'] = odds_df['ML'].apply(lambda x : change_odds(x))
odds_df.head()

Unnamed: 0,Date,Team,ML,ID,ODDS
0,20211019,BrooklynNets,105,20211019BrooklynNets,2.05
1,20211019,MilwaukeeBucks,-125,20211019MilwaukeeBucks,1.8
2,20211019,GoldenStateWarriors,140,20211019GoldenStateWarriors,2.4
3,20211019,LosAngelesLakers,-160,20211019LosAngelesLakers,1.62
4,20211020,IndianaPacers,-125,20211020IndianaPacers,1.8


In [675]:
# Récupération du deuxième jeu de données
score_df = leaguegamefinder.LeagueGameFinder().get_data_frames()[0]
score_df.drop(['SEASON_ID', 'TEAM_ABBREVIATION', 'MIN'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PTS,FGM,FGA,FG_PCT,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,1610612739,Cleveland Cavaliers,22200614,2023-01-10,CLE @ UTA,L,114,42,90,0.467,...,0.833,7,30,37,18,7,7,5,25,-2.0
1,1612709920,Raptors 905,2022200089,2023-01-10,RAP vs. CCG,L,116,40,83,0.482,...,0.789,6,31,37,23,4,4,9,29,-10.4
2,1612709919,Westchester Knicks,2022200088,2023-01-10,WES vs. GBO,W,125,49,96,0.51,...,0.778,12,47,59,31,6,8,16,17,30.2
3,1610612742,Dallas Mavericks,22200617,2023-01-10,DAL @ LAC,L,101,30,69,0.435,...,0.806,3,29,32,15,5,6,11,17,-12.0
4,1610612756,Phoenix Suns,22200615,2023-01-10,PHX @ GSW,W,125,41,90,0.456,...,0.935,19,38,57,26,5,7,21,22,12.0


In [676]:
# Suppression des tirets dans les dates et des espaces dans les noms des équipes
score_df.GAME_DATE = score_df.GAME_DATE.str.replace('-', '')
score_df.TEAM_NAME = score_df.TEAM_NAME.str.replace(' ', '')
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PTS,FGM,FGA,FG_PCT,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,1610612739,ClevelandCavaliers,22200614,20230110,CLE @ UTA,L,114,42,90,0.467,...,0.833,7,30,37,18,7,7,5,25,-2.0
1,1612709920,Raptors905,2022200089,20230110,RAP vs. CCG,L,116,40,83,0.482,...,0.789,6,31,37,23,4,4,9,29,-10.4
2,1612709919,WestchesterKnicks,2022200088,20230110,WES vs. GBO,W,125,49,96,0.51,...,0.778,12,47,59,31,6,8,16,17,30.2
3,1610612742,DallasMavericks,22200617,20230110,DAL @ LAC,L,101,30,69,0.435,...,0.806,3,29,32,15,5,6,11,17,-12.0
4,1610612756,PhoenixSuns,22200615,20230110,PHX @ GSW,W,125,41,90,0.456,...,0.935,19,38,57,26,5,7,21,22,12.0


In [677]:
# Suppression de la colonne MATCHUP et création des colonnes HOME et AWAY
score_df['HOME'] = [1 if '@' in score_df.MATCHUP[index] else 0 for index, row in score_df.iterrows()]
score_df.drop(['MATCHUP'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,WL,PTS,FGM,FGA,FG_PCT,FG3M,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME
0,1610612739,ClevelandCavaliers,22200614,20230110,L,114,42,90,0.467,10,...,7,30,37,18,7,7,5,25,-2.0,1
1,1612709920,Raptors905,2022200089,20230110,L,116,40,83,0.482,11,...,6,31,37,23,4,4,9,29,-10.4,0
2,1612709919,WestchesterKnicks,2022200088,20230110,W,125,49,96,0.51,16,...,12,47,59,31,6,8,16,17,30.2,0
3,1610612742,DallasMavericks,22200617,20230110,L,101,30,69,0.435,12,...,3,29,32,15,5,6,11,17,-12.0,1
4,1610612756,PhoenixSuns,22200615,20230110,W,125,41,90,0.456,14,...,19,38,57,26,5,7,21,22,12.0,1


In [678]:
# Suppression de la colonne WL et création des colonnes W 
score_df['WL'] = score_df['WL'].astype(str)
score_df['WIN'] = [1 if 'W' in score_df.WL[index] else 0 for index, row in score_df.iterrows()]
score_df.drop(['WL'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,WIN
0,1610612739,ClevelandCavaliers,22200614,20230110,114,42,90,0.467,10,37,...,30,37,18,7,7,5,25,-2.0,1,0
1,1612709920,Raptors905,2022200089,20230110,116,40,83,0.482,11,34,...,31,37,23,4,4,9,29,-10.4,0,0
2,1612709919,WestchesterKnicks,2022200088,20230110,125,49,96,0.51,16,37,...,47,59,31,6,8,16,17,30.2,0,1
3,1610612742,DallasMavericks,22200617,20230110,101,30,69,0.435,12,38,...,29,32,15,5,6,11,17,-12.0,1,0
4,1610612756,PhoenixSuns,22200615,20230110,125,41,90,0.456,14,31,...,38,57,26,5,7,21,22,12.0,1,1


In [679]:
# On supprime toutes les lignes contenant une équipe qui ne fait pas partie des 30 équipes de la NBA 
teams = ['AtlantaHawks','BrooklynNets','BostonCeltics', 'CharlotteHornets', 'ChicagoBulls', 'ClevelandCavaliers', 
'DallasMavericks', 'DenverNuggets', 'DetroitPistons', 'GoldenStateWarriors', 'HoustonRockets', 'IndianaPacers', 
'LAClippers', 'LosAngelesLakers', 'MemphisGrizzlies', 'MiamiHeat', 'MilwaukeeBucks', 'MinnesotaTimberwolves',
'NewOrleansPelicans', 'NewYorkKnicks', 'OklahomaCityThunder', 'OrlandoMagic', 'Philadelphia76ers', 'PhoenixSuns', 
'PortlandTrailBlazers', 'SacramentoKings', 'SanAntonioSpurs', 'TorontoRaptors', 'UtahJazz', 'WashingtonWizards']

team_serie = pd.Series(score_df["TEAM_NAME"])
score_df = score_df[team_serie.str.contains('|'.join(teams))]

In [680]:
# On supprime toutes les lignes qui possède un GAME_ID unique
game_id_counts = score_df["GAME_ID"].value_counts()
single_occurrence_game_ids = game_id_counts[game_id_counts == 1].index

score_df = score_df[~score_df["GAME_ID"].isin(single_occurrence_game_ids)]
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,WIN
0,1610612739,ClevelandCavaliers,22200614,20230110,114,42,90,0.467,10,37,...,30,37,18,7,7,5,25,-2.0,1,0
3,1610612742,DallasMavericks,22200617,20230110,101,30,69,0.435,12,38,...,29,32,15,5,6,11,17,-12.0,1,0
4,1610612756,PhoenixSuns,22200615,20230110,125,41,90,0.456,14,31,...,38,57,26,5,7,21,22,12.0,1,1
5,1610612748,MiamiHeat,22200611,20230110,112,31,79,0.392,10,33,...,28,41,18,10,4,19,19,1.0,0,1
6,1610612760,OklahomaCityThunder,22200611,20230110,111,44,91,0.484,9,35,...,30,44,27,8,3,18,27,-1.0,1,0


In [681]:
# Création d'un indice d'efficaicité lors des matchs et suppression des colonnes inutiles
score_df['EFF'] = (score_df['PTS'] + score_df['REB'] + score_df['AST'] + score_df['STL'] + score_df['BLK'] - ((score_df['FGA'] - score_df['FGM'])+(score_df['FTA']-score_df['FTM'])+score_df['TOV']))
score_df.drop(['PTS', 'REB', 'AST', 'STL', 'BLK', 'FGA', 'FGM', 'FTA', 'FTM', 'TOV', 'FG3M', 'FG3A', 'OREB', 'DREB'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PF,PLUS_MINUS,HOME,WIN,EFF
0,1610612739,ClevelandCavaliers,22200614,20230110,0.467,0.27,0.833,25,-2.0,1,0,126
3,1610612742,DallasMavericks,22200617,20230110,0.435,0.316,0.806,17,-12.0,1,0,102
4,1610612756,PhoenixSuns,22200615,20230110,0.456,0.452,0.935,22,12.0,1,1,148
5,1610612748,MiamiHeat,22200611,20230110,0.392,0.303,1.0,19,1.0,0,1,118
6,1610612760,OklahomaCityThunder,22200611,20230110,0.484,0.257,0.667,27,-1.0,1,0,121


In [682]:
# Création de notre identifiant pour le merge de nos deux dataframes
score_df['ID'] = score_df['GAME_DATE'] + score_df['TEAM_NAME']
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PF,PLUS_MINUS,HOME,WIN,EFF,ID
0,1610612739,ClevelandCavaliers,22200614,20230110,0.467,0.27,0.833,25,-2.0,1,0,126,20230110ClevelandCavaliers
3,1610612742,DallasMavericks,22200617,20230110,0.435,0.316,0.806,17,-12.0,1,0,102,20230110DallasMavericks
4,1610612756,PhoenixSuns,22200615,20230110,0.456,0.452,0.935,22,12.0,1,1,148,20230110PhoenixSuns
5,1610612748,MiamiHeat,22200611,20230110,0.392,0.303,1.0,19,1.0,0,1,118,20230110MiamiHeat
6,1610612760,OklahomaCityThunder,22200611,20230110,0.484,0.257,0.667,27,-1.0,1,0,121,20230110OklahomaCityThunder


In [683]:
# Merge des deux dataframes
df_merge = score_df.merge(odds_df, left_on = 'ID', right_on='ID')
df_merge.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PF,PLUS_MINUS,HOME,WIN,EFF,ID,Date,Team,ML,ODDS
0,1610612744,GoldenStateWarriors,42100406,20220616,0.413,0.413,1.0,20,13.0,1,1,125,20220616GoldenStateWarriors,20220616,GoldenStateWarriors,155,2.55
1,1610612738,BostonCeltics,42100406,20220616,0.425,0.393,0.917,16,-13.0,0,0,105,20220616BostonCeltics,20220616,BostonCeltics,-175,1.57
2,1610612738,BostonCeltics,42100405,20220613,0.413,0.344,0.677,16,-10.0,1,0,91,20220613BostonCeltics,20220613,BostonCeltics,145,2.45
3,1610612744,GoldenStateWarriors,42100405,20220613,0.466,0.225,0.867,28,10.0,0,1,122,20220613GoldenStateWarriors,20220613,GoldenStateWarriors,-165,1.61
4,1610612738,BostonCeltics,42100404,20220610,0.4,0.395,0.737,17,-10.0,0,0,104,20220610BostonCeltics,20220610,BostonCeltics,-165,1.61


In [684]:
# Suppression des dernières colonnes inutiles
df_merge.drop(['PF', 'ID', 'Date', 'Team', 'ML'], axis = 1, inplace = True)
df_merge.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PLUS_MINUS,HOME,WIN,EFF,ODDS
0,1610612744,GoldenStateWarriors,42100406,20220616,0.413,0.413,1.0,13.0,1,1,125,2.55
1,1610612738,BostonCeltics,42100406,20220616,0.425,0.393,0.917,-13.0,0,0,105,1.57
2,1610612738,BostonCeltics,42100405,20220613,0.413,0.344,0.677,-10.0,1,0,91,2.45
3,1610612744,GoldenStateWarriors,42100405,20220613,0.466,0.225,0.867,10.0,0,1,122,1.61
4,1610612738,BostonCeltics,42100404,20220610,0.4,0.395,0.737,-10.0,0,0,104,1.61
