In [1]:
# Importation des bibliothèques
import numpy as np
import pandas as pd

# Importation de l'API
from nba_api.stats.endpoints import leaguegamefinder

In [2]:
# Récupération du jeu de données
path = 'nba odds 2021-22.xlsx'
dataframe = pd.read_excel(path, usecols = ['Date', 'Team', '1st', '2nd', '3rd', '4th', 'Final', 'ML'])
dataframe.tail(10)

Unnamed: 0,Date,Team,1st,2nd,3rd,4th,Final,ML
2636,606,Boston,30,20,14,24,88,175
2637,606,GoldenState,31,21,35,20,107,-200
2638,608,GoldenState,22,34,33,11,100,135
2639,608,Boston,33,35,25,23,116,-155
2640,610,GoldenState,27,22,30,28,107,145
2641,610,Boston,28,26,24,19,97,-165
2642,613,Boston,16,23,35,20,94,145
2643,613,Golden State,27,24,24,29,104,-165
2644,616,Golden State,27,27,22,27,103,155
2645,616,Boston,22,17,27,24,90,-175


In [3]:
# Suppression des espaces dans les noms des équipes
dataframe.Team = dataframe.Team.str.replace(' ', '')
dataframe.tail(10)

Unnamed: 0,Date,Team,1st,2nd,3rd,4th,Final,ML
2636,606,Boston,30,20,14,24,88,175
2637,606,GoldenState,31,21,35,20,107,-200
2638,608,GoldenState,22,34,33,11,100,135
2639,608,Boston,33,35,25,23,116,-155
2640,610,GoldenState,27,22,30,28,107,145
2641,610,Boston,28,26,24,19,97,-165
2642,613,Boston,16,23,35,20,94,145
2643,613,GoldenState,27,24,24,29,104,-165
2644,616,GoldenState,27,27,22,27,103,155
2645,616,Boston,22,17,27,24,90,-175


In [4]:
# Création d'un dictionnaire pour remplacer les équipes par leurs noms officiels
team_name = {'Detroit': 'DetroitPistons', 'Washington': 'WashingtonWizards',
            'Dallas': 'DallasMavericks', 'Phoenix': 'PhoenixSuns',
            'NewOrleans': 'NewOrleansPelicans', 'LAClippers': 'LAClippers',
            'OklahomaCity': 'OklahomaCityThunder', 'GoldenState': 'GoldenStateWarriors',
            'Philadelphia': 'Philadelphia76ers', 'Indiana': 'IndianaPacers',
            'Miami': 'MiamiHeat', 'Toronto': 'TorontoRaptors',
            'Orlando': 'OrlandoMagic', 'NewYork': 'NewYorkKnicks',
            'Boston': 'BostonCeltics', 'Chicago': 'ChicagoBulls',
            'SanAntonio': 'SanAntonioSpurs', 'Portland': 'PortlandTrailBlazers',
            'Denver': 'DenverNuggets', 'Memphis': 'MemphisGrizzlies',
            'Brooklyn': 'BrooklynNets', 'Houston': 'HoustonRockets',
            'Utah': 'UtahJazz', 'Minnesota':'MinnesotaTimberwolves',
            'LALakers': 'LosAngelesLakers', 'Atlanta': 'AtlantaHawks',
            'Charlotte': 'CharlotteHornets', 'Cleveland': 'ClevelandCavaliers',
            'Sacramento': 'SacramentoKings', 'Milwaukee': 'MilwaukeeBucks'}
dataframe.replace({'Team': team_name}, inplace = True)

In [5]:
def format(date):
    'Fonction pour changer le format de l\'heure de notre dataframe'
    'Elle prend notre dataframe en entrée'
    'Elle nous retourne une date qui respecte le format AAAAMMJJ'
    
    year = path.split(' ')[2].split('.')[0].split('-')[0]
    
    if dataframe['Date'][0] <= date:
        return year + str(date)
    else:
        return str(int(year) + 1) + '0' + str(date)

In [6]:
# Formattage des données pour la création de notre identifiant
dataframe['Date'] = list(map(format, dataframe['Date']))
dataframe['ID'] = list(map(str, dataframe['Date'])) + dataframe['Team']
dataframe.tail(10)

Unnamed: 0,Date,Team,1st,2nd,3rd,4th,Final,ML,ID
2636,20220606,BostonCeltics,30,20,14,24,88,175,20220606BostonCeltics
2637,20220606,GoldenStateWarriors,31,21,35,20,107,-200,20220606GoldenStateWarriors
2638,20220608,GoldenStateWarriors,22,34,33,11,100,135,20220608GoldenStateWarriors
2639,20220608,BostonCeltics,33,35,25,23,116,-155,20220608BostonCeltics
2640,20220610,GoldenStateWarriors,27,22,30,28,107,145,20220610GoldenStateWarriors
2641,20220610,BostonCeltics,28,26,24,19,97,-165,20220610BostonCeltics
2642,20220613,BostonCeltics,16,23,35,20,94,145,20220613BostonCeltics
2643,20220613,GoldenStateWarriors,27,24,24,29,104,-165,20220613GoldenStateWarriors
2644,20220616,GoldenStateWarriors,27,27,22,27,103,155,20220616GoldenStateWarriors
2645,20220616,BostonCeltics,22,17,27,24,90,-175,20220616BostonCeltics


In [7]:
dataframe['Date'].max()

'20220616'

In [8]:
df = leaguegamefinder.LeagueGameFinder().get_data_frames()[0]

df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22022,1610612755,PHI,Philadelphia 76ers,22200409,2022-12-13,PHI vs. SAC,W,241,123,...,0.778,4,38,42,34,10,4,11,21,14.2
1,22022,1610612756,PHX,Phoenix Suns,22200411,2022-12-13,PHX @ HOU,L,240,97,...,0.947,18,26,44,22,13,7,9,24,-14.0
2,52022,1612709921,LIN,Long Island Nets,2052200214,2022-12-13,LIN vs. WES,W,241,116,...,0.778,10,32,42,29,8,2,25,24,7.8
3,52022,1612709903,SLC,Salt Lake City Stars,2052200216,2022-12-13,SLC vs. OKL,W,240,119,...,0.778,11,32,43,28,8,6,13,23,14.0
4,52022,1612709925,LAK,Lakeland Magic,2052200215,2022-12-13,LAK @ MHU,L,276,130,...,0.647,16,28,44,26,7,3,20,25,-2.0


In [9]:
df['GAME_DATE'] = df['GAME_DATE'].str.replace('-', '')
df['TEAM_NAME'] = df['TEAM_NAME'].str.replace(' ', '')


In [10]:
df['HOME'] = df['AWAY'] = 0
df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,AWAY
0,22022,1610612755,PHI,Philadelphia76ers,22200409,20221213,PHI vs. SAC,W,241,123,...,38,42,34,10,4,11,21,14.2,0,0
1,22022,1610612756,PHX,PhoenixSuns,22200411,20221213,PHX @ HOU,L,240,97,...,26,44,22,13,7,9,24,-14.0,0,0
2,52022,1612709921,LIN,LongIslandNets,2052200214,20221213,LIN vs. WES,W,241,116,...,32,42,29,8,2,25,24,7.8,0,0
3,52022,1612709903,SLC,SaltLakeCityStars,2052200216,20221213,SLC vs. OKL,W,240,119,...,32,43,28,8,6,13,23,14.0,0,0
4,52022,1612709925,LAK,LakelandMagic,2052200215,20221213,LAK @ MHU,L,276,130,...,28,44,26,7,3,20,25,-2.0,0,0


In [11]:
for i in range(len(df)):
    if df['MATCHUP'][i][4:5] == '@':
        df['AWAY'][i] = 1
    else:
        df['HOME'][i] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HOME'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['AWAY'][i] = 1


In [12]:
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,AWAY
0,22022,1610612755,PHI,Philadelphia76ers,0022200409,20221213,PHI vs. SAC,W,241,123,...,38,42,34,10,4,11,21,14.2,1,0
1,22022,1610612756,PHX,PhoenixSuns,0022200411,20221213,PHX @ HOU,L,240,97,...,26,44,22,13,7,9,24,-14.0,0,1
2,52022,1612709921,LIN,LongIslandNets,2052200214,20221213,LIN vs. WES,W,241,116,...,32,42,29,8,2,25,24,7.8,1,0
3,52022,1612709903,SLC,SaltLakeCityStars,2052200216,20221213,SLC vs. OKL,W,240,119,...,32,43,28,8,6,13,23,14.0,1,0
4,52022,1612709925,LAK,LakelandMagic,2052200215,20221213,LAK @ MHU,L,276,130,...,28,44,26,7,3,20,25,-2.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,22016,1610612759,SAS,SanAntonioSpurs,0021600957,20170308,SAS vs. SAC,W,240,114,...,37,43,33,9,5,12,15,10.0,1,0
29996,22016,1612709921,LIN,LongIslandNets,2021600417,20170308,LIN @ OKL,L,241,92,...,35,47,25,11,9,21,19,-6.0,0,1
29997,22016,1612709920,RAP,Raptors905,2021600420,20170308,RAP @ SCW,L,241,122,...,31,44,25,6,3,17,25,-6.0,0,1
29998,22016,1610612748,MIA,MiamiHeat,0021600951,20170308,MIA vs. CHA,W,240,108,...,33,46,22,8,7,12,18,7.0,1,0


In [13]:
for i in range(len(df)):
    df['MATCHUP'][i] = df['MATCHUP'][i][-3:]

df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['MATCHUP'][i] = df['MATCHUP'][i][-3:]


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,AWAY
0,22022,1610612755,PHI,Philadelphia76ers,0022200409,20221213,SAC,W,241,123,...,38,42,34,10,4,11,21,14.2,1,0
1,22022,1610612756,PHX,PhoenixSuns,0022200411,20221213,HOU,L,240,97,...,26,44,22,13,7,9,24,-14.0,0,1
2,52022,1612709921,LIN,LongIslandNets,2052200214,20221213,WES,W,241,116,...,32,42,29,8,2,25,24,7.8,1,0
3,52022,1612709903,SLC,SaltLakeCityStars,2052200216,20221213,OKL,W,240,119,...,32,43,28,8,6,13,23,14.0,1,0
4,52022,1612709925,LAK,LakelandMagic,2052200215,20221213,MHU,L,276,130,...,28,44,26,7,3,20,25,-2.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,22016,1610612759,SAS,SanAntonioSpurs,0021600957,20170308,SAC,W,240,114,...,37,43,33,9,5,12,15,10.0,1,0
29996,22016,1612709921,LIN,LongIslandNets,2021600417,20170308,OKL,L,241,92,...,35,47,25,11,9,21,19,-6.0,0,1
29997,22016,1612709920,RAP,Raptors905,2021600420,20170308,SCW,L,241,122,...,31,44,25,6,3,17,25,-6.0,0,1
29998,22016,1610612748,MIA,MiamiHeat,0021600951,20170308,CHA,W,240,108,...,33,46,22,8,7,12,18,7.0,1,0


## Conservation des équipes NBA


In [14]:
teams = ['AtlantaHawks','BrooklynNets','BostonCeltics', 'CharlotteHornets', 'ChicagoBulls', 'ClevelandCavaliers', 'DallasMavericks', 'DenverNuggets', 'DetroitPistons', 'GoldenStateWarriors', 'HoustonRockets', 'IndianaPacers', 'LAClippers', 'LosAngelesLakers', 'MemphisGrizzlies', 'MiamiHeat','MilwaukeeBucks','MinnesotaTimberwolves','NewOrleansPelicans', 'NewYorkKnicks', 'OklahomaCityThunder', 'OrlandoMagic', 'Philadelphia76ers', 'PhoenixSuns', 'PortlandTrailBlazers', 'SacramentoKings', 'SanAntonioSpurs', 'TorontoRaptors', 'UtahJazz', 'WashingtonWizards']


In [15]:
to_delete = df.drop(df[df['TEAM_NAME'].isin(teams)].index)
to_delete.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,AWAY
2,52022,1612709921,LIN,LongIslandNets,2052200214,20221213,WES,W,241,116,...,32,42,29,8,2,25,24,7.8,1,0
3,52022,1612709903,SLC,SaltLakeCityStars,2052200216,20221213,OKL,W,240,119,...,32,43,28,8,6,13,23,14.0,1,0
4,52022,1612709925,LAK,LakelandMagic,2052200215,20221213,MHU,L,276,130,...,28,44,26,7,3,20,25,-2.0,0,1
5,52022,1612709926,MHU,MemphisHustle,2052200215,20221213,LAK,W,276,132,...,27,46,26,13,5,17,20,1.4,1,0
6,52022,1612709889,OKL,OklahomaCityBlue,2052200216,20221213,SLC,L,241,105,...,29,35,29,8,3,16,13,-14.0,0,1


In [16]:
game_to_delete = to_delete['GAME_ID'].unique()
game_to_delete

array(['2052200214', '2052200216', '2052200215', ..., '2021600418',
       '2021600420', '2021600417'], dtype=object)

In [17]:
df.drop(df[~df['TEAM_NAME'].isin(teams)].index, inplace=True)
df.drop(df[df['GAME_ID'].isin(game_to_delete)].index, inplace=True)

In [18]:
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,AWAY
0,22022,1610612755,PHI,Philadelphia76ers,0022200409,20221213,SAC,W,241,123,...,38,42,34,10,4,11,21,14.2,1,0
1,22022,1610612756,PHX,PhoenixSuns,0022200411,20221213,HOU,L,240,97,...,26,44,22,13,7,9,24,-14.0,0,1
7,22022,1610612747,LAL,LosAngelesLakers,0022200413,20221213,BOS,,180,87,...,30,36,19,6,6,9,9,-6.0,1,0
9,22022,1610612744,GSW,GoldenStateWarriors,0022200410,20221213,MIL,L,239,111,...,25,37,25,7,2,18,22,-17.0,0,1
10,22022,1610612738,BOS,BostonCeltics,0022200413,20221213,LAL,,180,88,...,27,39,23,4,2,11,17,6.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29992,22016,1610612744,GSW,GoldenStateWarriors,0021600959,20170308,BOS,L,240,86,...,34,46,23,8,12,17,20,-13.0,1,0
29994,22016,1610612745,HOU,HoustonRockets,0021600954,20170308,UTA,L,241,108,...,21,31,22,9,4,8,20,-7.0,1,0
29995,22016,1610612759,SAS,SanAntonioSpurs,0021600957,20170308,SAC,W,240,114,...,37,43,33,9,5,12,15,10.0,1,0
29998,22016,1610612748,MIA,MiamiHeat,0021600951,20170308,CHA,W,240,108,...,33,46,22,8,7,12,18,7.0,1,0


In [19]:
temp = df
df.head()


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,AWAY
0,22022,1610612755,PHI,Philadelphia76ers,22200409,20221213,SAC,W,241,123,...,38,42,34,10,4,11,21,14.2,1,0
1,22022,1610612756,PHX,PhoenixSuns,22200411,20221213,HOU,L,240,97,...,26,44,22,13,7,9,24,-14.0,0,1
7,22022,1610612747,LAL,LosAngelesLakers,22200413,20221213,BOS,,180,87,...,30,36,19,6,6,9,9,-6.0,1,0
9,22022,1610612744,GSW,GoldenStateWarriors,22200410,20221213,MIL,L,239,111,...,25,37,25,7,2,18,22,-17.0,0,1
10,22022,1610612738,BOS,BostonCeltics,22200413,20221213,LAL,,180,88,...,27,39,23,4,2,11,17,6.0,0,1


In [20]:
#df = temp

In [21]:
df = pd.get_dummies(df,prefix='', prefix_sep='', columns=['WL','MATCHUP'])
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MIN,PTS,FGM,FGA,...,OKC,ORL,PHI,PHX,POR,SAC,SAS,TOR,UTA,WAS
0,22022,1610612755,PHI,Philadelphia76ers,0022200409,20221213,241,123,43,84,...,0,0,0,0,0,1,0,0,0,0
1,22022,1610612756,PHX,PhoenixSuns,0022200411,20221213,240,97,33,103,...,0,0,0,0,0,0,0,0,0,0
7,22022,1610612747,LAL,LosAngelesLakers,0022200413,20221213,180,87,32,68,...,0,0,0,0,0,0,0,0,0,0
9,22022,1610612744,GSW,GoldenStateWarriors,0022200410,20221213,239,111,38,95,...,0,0,0,0,0,0,0,0,0,0
10,22022,1610612738,BOS,BostonCeltics,0022200413,20221213,180,88,34,80,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29992,22016,1610612744,GSW,GoldenStateWarriors,0021600959,20170308,240,86,37,84,...,0,0,0,0,0,0,0,0,0,0
29994,22016,1610612745,HOU,HoustonRockets,0021600954,20170308,241,108,35,79,...,0,0,0,0,0,0,0,0,1,0
29995,22016,1610612759,SAS,SanAntonioSpurs,0021600957,20170308,240,114,42,84,...,0,0,0,0,0,1,0,0,0,0
29998,22016,1610612748,MIA,MiamiHeat,0021600951,20170308,240,108,37,85,...,0,0,0,0,0,0,0,0,0,0


In [22]:
df['ID'] = df['GAME_DATE'] + df['TEAM_NAME']

In [23]:
df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MIN,PTS,FGM,FGA,...,ORL,PHI,PHX,POR,SAC,SAS,TOR,UTA,WAS,ID
0,22022,1610612755,PHI,Philadelphia76ers,22200409,20221213,241,123,43,84,...,0,0,0,0,1,0,0,0,0,20221213Philadelphia76ers
1,22022,1610612756,PHX,PhoenixSuns,22200411,20221213,240,97,33,103,...,0,0,0,0,0,0,0,0,0,20221213PhoenixSuns
7,22022,1610612747,LAL,LosAngelesLakers,22200413,20221213,180,87,32,68,...,0,0,0,0,0,0,0,0,0,20221213LosAngelesLakers
9,22022,1610612744,GSW,GoldenStateWarriors,22200410,20221213,239,111,38,95,...,0,0,0,0,0,0,0,0,0,20221213GoldenStateWarriors
10,22022,1610612738,BOS,BostonCeltics,22200413,20221213,180,88,34,80,...,0,0,0,0,0,0,0,0,0,20221213BostonCeltics


In [24]:
df_merge = df.merge(dataframe, left_on = 'ID', right_on='ID')

In [25]:
df_merge = df_merge.astype({"ML": int})
df_merge

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MIN,PTS,FGM,FGA,...,WAS,ID,Date,Team,1st,2nd,3rd,4th,Final,ML
0,42021,1610612738,BOS,BostonCeltics,0042100406,20220616,239,90,34,80,...,0,20220616BostonCeltics,20220616,BostonCeltics,22,17,27,24,90,-175
1,42021,1610612744,GSW,GoldenStateWarriors,0042100406,20220616,241,103,38,92,...,0,20220616GoldenStateWarriors,20220616,GoldenStateWarriors,27,27,22,27,103,155
2,42021,1610612738,BOS,BostonCeltics,0042100405,20220613,238,94,31,75,...,0,20220613BostonCeltics,20220613,BostonCeltics,16,23,35,20,94,145
3,42021,1610612744,GSW,GoldenStateWarriors,0042100405,20220613,240,104,41,88,...,0,20220613GoldenStateWarriors,20220613,GoldenStateWarriors,27,24,24,29,104,-165
4,42021,1610612744,GSW,GoldenStateWarriors,0042100404,20220610,241,107,40,91,...,0,20220610GoldenStateWarriors,20220610,GoldenStateWarriors,27,22,30,28,107,145
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2639,22021,1610612743,DEN,DenverNuggets,0022100012,20211020,241,110,44,83,...,0,20211020DenverNuggets,20211020,DenverNuggets,26,25,34,25,110,200
2640,22021,1610612747,LAL,LosAngelesLakers,0022100002,20211019,241,114,45,95,...,0,20211019LosAngelesLakers,20211019,LosAngelesLakers,34,25,26,29,114,-160
2641,22021,1610612749,MIL,MilwaukeeBucks,0022100001,20211019,239,127,48,105,...,0,20211019MilwaukeeBucks,20211019,MilwaukeeBucks,37,29,31,30,127,-125
2642,22021,1610612744,GSW,GoldenStateWarriors,0022100002,20211019,240,121,41,93,...,0,20211019GoldenStateWarriors,20211019,GoldenStateWarriors,32,21,30,38,121,140


In [26]:
df_merge['Team'].unique()

array(['BostonCeltics', 'GoldenStateWarriors', 'MiamiHeat',
       'DallasMavericks', 'MilwaukeeBucks', 'PhoenixSuns',
       'MemphisGrizzlies', 'Philadelphia76ers', 'MinnesotaTimberwolves',
       'NewOrleansPelicans', 'TorontoRaptors', 'UtahJazz', 'ChicagoBulls',
       'DenverNuggets', 'AtlantaHawks', 'BrooklynNets',
       'ClevelandCavaliers', 'LAClippers', 'CharlotteHornets',
       'SanAntonioSpurs', 'DetroitPistons', 'PortlandTrailBlazers',
       'WashingtonWizards', 'SacramentoKings', 'HoustonRockets',
       'LosAngelesLakers', 'OrlandoMagic', 'IndianaPacers',
       'OklahomaCityThunder', 'NewYorkKnicks'], dtype=object)

Changement côte US -> EUROPE

In [27]:
def CHANGE_ODDS(x):
    return round(1 + x / 100 , 2) if x > 0 else round(1 + 100 / np.abs(x),2)

In [28]:
df_merge['New_ML'] = df_merge['ML'].apply(lambda x : CHANGE_ODDS(x))
df_merge

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MIN,PTS,FGM,FGA,...,ID,Date,Team,1st,2nd,3rd,4th,Final,ML,New_ML
0,42021,1610612738,BOS,BostonCeltics,0042100406,20220616,239,90,34,80,...,20220616BostonCeltics,20220616,BostonCeltics,22,17,27,24,90,-175,1.57
1,42021,1610612744,GSW,GoldenStateWarriors,0042100406,20220616,241,103,38,92,...,20220616GoldenStateWarriors,20220616,GoldenStateWarriors,27,27,22,27,103,155,2.55
2,42021,1610612738,BOS,BostonCeltics,0042100405,20220613,238,94,31,75,...,20220613BostonCeltics,20220613,BostonCeltics,16,23,35,20,94,145,2.45
3,42021,1610612744,GSW,GoldenStateWarriors,0042100405,20220613,240,104,41,88,...,20220613GoldenStateWarriors,20220613,GoldenStateWarriors,27,24,24,29,104,-165,1.61
4,42021,1610612744,GSW,GoldenStateWarriors,0042100404,20220610,241,107,40,91,...,20220610GoldenStateWarriors,20220610,GoldenStateWarriors,27,22,30,28,107,145,2.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2639,22021,1610612743,DEN,DenverNuggets,0022100012,20211020,241,110,44,83,...,20211020DenverNuggets,20211020,DenverNuggets,26,25,34,25,110,200,3.00
2640,22021,1610612747,LAL,LosAngelesLakers,0022100002,20211019,241,114,45,95,...,20211019LosAngelesLakers,20211019,LosAngelesLakers,34,25,26,29,114,-160,1.62
2641,22021,1610612749,MIL,MilwaukeeBucks,0022100001,20211019,239,127,48,105,...,20211019MilwaukeeBucks,20211019,MilwaukeeBucks,37,29,31,30,127,-125,1.80
2642,22021,1610612744,GSW,GoldenStateWarriors,0022100002,20211019,240,121,41,93,...,20211019GoldenStateWarriors,20211019,GoldenStateWarriors,32,21,30,38,121,140,2.40


In [29]:
df_merge.columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PLUS_MINUS', 'HOME', 'AWAY', 'L', 'W', 'ATL',
       'BKN', 'BOS', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW', 'HOU',
       'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP', 'NYK', 'OKC',
       'ORL', 'PHI', 'PHX', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS', 'ID',
       'Date', 'Team', '1st', '2nd', '3rd', '4th', 'Final', 'ML', 'New_ML'],
      dtype='object')

colonnes à supprimer ?? 
TEAM_ID - TEAM_ABREVIATION - GAME_ID - MIN - ID - DATE - TEAM - ML - FINAL

colonnes à catégoriser ?? TEAM_NAME