## **NBA Betting Model**

### **The goal here is to see if we can create a base NBA betting model**

#### **First we will get the 2023 NBA Season Data**

##### Import libaries used

In [147]:
import requests
import pandas as pd
import json
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [25]:
def game_logs(season):
    # season: '2022-23'
    url = 'https://stats.nba.com/stats/leaguegamelog'
    headers= {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', 'Referer': 'https://www.nba.com/'}
    payload = {
        'Counter': '1000',
        'DateFrom': '',
        'DateTo': '',
        'Direction': 'DESC',
        'LeagueID': '00',
        'PlayerOrTeam': 'T',
        'Season': season,
        'SeasonType': 'Regular Season',
        'Sorter': 'DATE'}
    
    jsonData = requests.get(url, headers=headers, params=payload).json()
    
    rows = jsonData['resultSets'][0]['rowSet']
    columns = jsonData['resultSets'][0]['headers']
    
    df = pd.DataFrame(rows, columns=columns)
    
    df['DATE'] = pd.to_datetime(df['GAME_DATE'])
    df['bookie_date'] = df['DATE'].dt.strftime('%Y%m%d')
    df['home'] = np.where(df['MATCHUP'].str.contains('@'), 0, 1)
    del df['SEASON_ID']
    
    df['bet_date_id'] = df['TEAM_ABBREVIATION'] + df['bookie_date']
    
    return df 

#### Here will get the 2023 NBA season 

In [26]:
nba_games = game_logs('2022-23')    

In [28]:
nba_games.head()

Unnamed: 0,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,DATE,bookie_date,home,bet_date_id
0,1610612764,WAS,Washington Wizards,22201222,2023-04-09,WAS vs. HOU,L,240,41,97,0.423,14,45,0.311,13,21,0.619,14,37,51,28,11,5,13,21,109,-5,1,2023-04-09,20230409,1,WAS20230409
1,1610612756,PHX,Phoenix Suns,22201229,2023-04-09,PHX vs. LAC,L,240,42,99,0.424,14,37,0.378,16,22,0.727,12,35,47,29,4,3,7,21,114,-5,1,2023-04-09,20230409,1,PHX20230409
2,1610612757,POR,Portland Trail Blazers,22201230,2023-04-09,POR vs. GSW,L,240,38,95,0.4,9,35,0.257,16,24,0.667,6,24,30,25,5,3,14,9,101,-56,1,2023-04-09,20230409,1,POR20230409
3,1610612758,SAC,Sacramento Kings,22201227,2023-04-09,SAC @ DEN,L,240,37,85,0.435,11,43,0.256,10,15,0.667,10,24,34,24,7,6,15,18,95,-14,1,2023-04-09,20230409,0,SAC20230409
4,1610612737,ATL,Atlanta Hawks,22201216,2023-04-09,ATL @ BOS,L,240,44,97,0.454,11,39,0.282,15,19,0.789,16,34,50,27,11,5,9,12,114,-6,1,2023-04-09,20230409,0,ATL20230409


### **Now we will get the betting odds**

##### **The site we are scrapping the data from is https://www.actionnetwork.com/nba/odds and it has the odds from the most commom bookies, Draftking, MGM, Fanduel ect..**


In [29]:
def book_odds(date):
    
    '''
    date => eg;20221018
    '''
    url = 'https://api.actionnetwork.com/web/v1/scoreboard/nba?bookIds=15,30,76,75,123,69,68,972,71,247,79&date=' + date
    
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:101.0) Gecko/20100101 Firefox/101.0'}
    
    r = requests.get(url, headers=headers)

    tb = json.loads(r.content.decode('utf-8'))
    
    games = tb['games']
    
    game_odds = []
    
    for i in games:
        odds = i['odds']
        
        dfs = []
        
        for o in odds:
            dfs.append(pd.DataFrame([o]))
        df = pd.concat(dfs)    
        
        t1 = pd.DataFrame([i['teams'][0]])
        t2 = pd.DataFrame([i['teams'][1]])
    
        t1 = t1[['id','full_name','display_name','short_name','location','abbr', 'standings']].add_suffix('_home')
        t2 = t2[['id','full_name','display_name','short_name','location','abbr', 'standings']].add_suffix('_away')
        
        t = pd.concat([t1,t2], axis = 1)
        dt =pd.concat([t,df], axis = 1)
        
        game_odds.append(dt)    
        
    df = pd.concat(game_odds)      
    
    book_ids = {15:'open', 30: 'best odds', 76:'Pointsbet',75:'betmgm',  123:'caesars', 69:'fanduel', 68:'draftking', 972:'betrivers', 71: 'playsugarhouse', 274:'unibet', 79: 'bet365' }
    
    df['bookie'] = df['book_id'].map(book_ids)
    
    df['bookie_date'] = date
    
    df['bet_date_id'] = df['abbr_home'] +  df['bookie_date']

    return df  


In [30]:
bet_odds = book_odds('20221018')

#### **Here we will get the betting odds for each team that played on that date** 

In [31]:
bet_odds.head()

Unnamed: 0,id_home,full_name_home,display_name_home,short_name_home,location_home,abbr_home,standings_home,id_away,full_name_away,display_name_away,short_name_away,location_away,abbr_away,standings_away,ml_away,ml_home,spread_away,spread_home,spread_away_line,spread_home_line,over,under,draw,total,away_total,away_over,away_under,home_total,home_over,home_under,ml_home_public,ml_away_public,spread_home_public,spread_away_public,total_under_public,total_over_public,ml_home_money,ml_away_money,spread_home_money,spread_away_money,total_over_money,total_under_money,meta,num_bets,book_id,type,inserted,line_status,bookie,bookie_date,bet_date_id
0,164,Boston Celtics,Celtics,Celtics,Boston,BOS,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",123,-148,3.0,-3.0,-107,-112,-112,-109,,216.0,106.5,-125.0,-105.0,110.5,-105.0,-125.0,30.0,70.0,25.0,75.0,19.0,81.0,45.0,55.0,25.0,75.0,31.0,69.0,"{'over': None, 'under': None, 'ml_away': None,...",152954.0,15,game,2022-10-18T23:42:04.958108+00:00,"{'over': 0, 'under': 0, 'ml_away': 0, 'ml_draw...",open,20221018,BOS20221018
0,164,Boston Celtics,Celtics,Celtics,Boston,BOS,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",106,-134,1.5,-1.5,-120,-107,-119,-111,,110.0,54.5,-113.0,-118.0,55.5,-114.0,-117.0,,,,,,,,,,,,,"{'over': None, 'under': None, 'ml_away': None,...",0.0,15,firsthalf,2022-10-18T23:42:20.481784+00:00,"{'over': 0, 'under': 0, 'ml_away': 0, 'ml_draw...",open,20221018,BOS20221018
0,164,Boston Celtics,Celtics,Celtics,Boston,BOS,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",120,-140,2.5,-2.5,-112,-107,-110,-110,,107.0,,,,,,,,,,,,,,,,,,,"{'over': None, 'under': None, 'ml_away': None,...",,15,secondhalf,2022-10-19T00:58:32.702673+00:00,"{'over': 0, 'under': 0, 'ml_away': 0, 'ml_draw...",open,20221018,BOS20221018
0,164,Boston Celtics,Celtics,Celtics,Boston,BOS,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",102,-130,1.0,-1.0,-121,-105,-114,-115,,55.0,27.5,-111.0,-121.0,27.5,-125.0,-107.0,,,,,,,,,,,,,"{'over': None, 'under': None, 'ml_away': None,...",0.0,15,firstquarter,2022-10-18T23:42:20.844206+00:00,"{'over': 0, 'under': 0, 'ml_away': 0, 'ml_draw...",open,20221018,BOS20221018
0,164,Boston Celtics,Celtics,Celtics,Boston,BOS,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",-103,-124,1.0,-1.0,-129,100,-125,-109,,54.5,27.5,-105.0,-125.0,27.5,-125.0,-105.0,,,,,,,,,,,,,"{'over': None, 'under': None, 'ml_away': None,...",,15,secondquarter,2022-10-18T23:42:20.742035+00:00,"{'over': 0, 'under': 0, 'ml_away': 0, 'ml_draw...",open,20221018,BOS20221018


#### **The next step is get the betting odds for all the games of the season**

###### **We already have a column that has the beting dates in the nba_games dataframe, we then use it to get a list of those betting dates**

In [32]:
bookie_dates = list(set(nba_games['bookie_date'].tolist()))

##### **Now we will use bookie_dates list to run a loop on book_odds function, it will return a list of betting odds for each date**

In [33]:
# List where will store the betting odds dataframe results
nba_game_odds = []

# List to store what dates had an error and what the error was
error_notes = []

n = 0

# for each bookie date in the list bookie_dates, we will run the function book_odds
for i in bookie_dates:
    try:
        # Attempt some operation that might raise an error
        nba_game_odds.append(book_odds(i))  # This will raise a ValueError for non-integer elements

    except ValueError as e:
        # If an error occurs, catch it, add a note, and continue to the next element
        error_notes.append([i,e])
        print(e)  # to note any errors
        continue
        
    n = n + 1
    print(n)  # to note how far we are along the loop
    

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
No objects to concatenate
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
No objects to concatenate
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162


##### **Now we will check to see the size of nba_game_odds and error_notes lists, with the hope have having are error's list being empty**

In [34]:
print(len(nba_game_odds))
print(len(error_notes))

162
2


###### **We will now rerun the two errors we have again and update it to the nba_game_odds**  

In [41]:
if len(error_notes) > 0:
    for i in error_notes:
        nba_game_odds.append(book_odds(i[0]))

##### **Now we will concat our list of dataframes into one**

In [43]:
game_odds = pd.concat(nba_game_odds)
game_odds.sample(5)

Unnamed: 0,id_home,full_name_home,display_name_home,short_name_home,location_home,abbr_home,standings_home,id_away,full_name_away,display_name_away,short_name_away,location_away,abbr_away,standings_away,ml_away,ml_home,spread_away,spread_home,spread_away_line,spread_home_line,over,under,draw,total,away_total,away_over,away_under,home_total,home_over,home_under,ml_home_public,ml_away_public,spread_home_public,spread_away_public,total_under_public,total_over_public,ml_home_money,ml_away_money,spread_home_money,spread_away_money,total_over_money,total_under_money,meta,num_bets,book_id,type,inserted,line_status,bookie,bookie_date,bet_date_id
0,168,Indiana Pacers,Pacers,Pacers,Indiana,IND,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",185,Sacramento Kings,Kings,Kings,Sacramento,SAC,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",-120,-106,-1.0,1.0,102,-132,-112,-120,,57.0,,,,,,,,,,,,,,,,,,,"{'draw': None, 'over': {'line_id': 3222066899,...",,71,fourthquarter,2023-02-03T23:39:12.912066+00:00,"{'draw': None, 'over': 0, 'under': 0, 'ml_away...",playsugarhouse,20230203,IND20230203
0,172,Milwaukee Bucks,Bucks,Bucks,Milwaukee,MIL,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",120,-150,0.5,-0.5,105,-135,-115,-115,,58.5,28.5,-115.0,-115.0,29.5,-125.0,-105.0,,,,,,,,,,,,,,0.0,30,firstquarter,2023-03-04T05:03:57.532148+00:00,"{'draw': 0, 'over': 0, 'under': 0, 'ml_away': ...",best odds,20230304,MIL20230304
0,168,Indiana Pacers,Pacers,Pacers,Indiana,IND,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",182,Golden State Warriors,Warriors,Warriors,Golden State,GSW,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",-118,-102,-0.5,0.5,-106,-114,-114,-112,,61.5,,,,,,,,,,,,,,,,,,,"{'draw': None, 'over': None, 'under': None, 'm...",,15,secondquarter,2022-12-14T23:52:34.318675+00:00,"{'draw': None, 'over': 0, 'under': 0, 'ml_away...",open,20221214,IND20221214
0,164,Boston Celtics,Celtics,Celtics,Boston,BOS,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",140,-170,4.5,-4.5,-115,-112,-115,-112,,226.0,112.5,-110.0,-120.0,114.5,-125.0,-105.0,,,,,,,,,,,,,,0.0,30,live,2023-04-05T00:05:17.744057+00:00,"{'draw': 0, 'over': 0, 'under': 0, 'ml_away': ...",best odds,20230404,BOS20230404
0,174,Denver Nuggets,Nuggets,Nuggets,Denver,DEN,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",185,Sacramento Kings,Kings,Kings,Sacramento,SAC,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",-145,115,-1.0,1.0,-115,-115,-115,-115,,61.5,,,,,,,,,,,,,,,,,,,"{'draw': None, 'over': {'selection_id': 'ad839...",,123,firstquarter,2022-12-28T03:13:31.192688+00:00,"{'draw': None, 'over': 2, 'under': 2, 'ml_away...",caesars,20221227,DEN20221227


#### **Now that we have the NBA betting odds, we are going to use betting odds set by the market as a feature for our nba_games dataframe**

##### **We will use the best money line odds as our feature**

In [44]:
nba_game_odds = game_odds[(game_odds['book_id'] == 30) & (game_odds['type'] == 'game')] #filter for best odds

In [45]:
nba_game_odds.head()

Unnamed: 0,id_home,full_name_home,display_name_home,short_name_home,location_home,abbr_home,standings_home,id_away,full_name_away,display_name_away,short_name_away,location_away,abbr_away,standings_away,ml_away,ml_home,spread_away,spread_home,spread_away_line,spread_home_line,over,under,draw,total,away_total,away_over,away_under,home_total,home_over,home_under,ml_home_public,ml_away_public,spread_home_public,spread_away_public,total_under_public,total_over_public,ml_home_money,ml_away_money,spread_home_money,spread_away_money,total_over_money,total_under_money,meta,num_bets,book_id,type,inserted,line_status,bookie,bookie_date,bet_date_id
0,161,Orlando Magic,Magic,Magic,Orlando,ORL,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",167,Cleveland Cavaliers,Cavaliers,Cavaliers,Cleveland,CLE,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",-190,160,-4.5,4.5,-110,-110,-110,-110,,223.0,112.5,-130,-100,110.5,-105,-125,50,50,67,33,64,36,81,19,92,8,1,99,,0,30,game,2023-04-06T16:02:47.151062+00:00,"{'draw': 0, 'over': 0, 'under': 0, 'ml_away': ...",best odds,20230406,ORL20230406
0,160,Miami Heat,Heat,Heat,Miami,MIA,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",163,Philadelphia 76ers,76ers,76ers,Philadelphia,PHI,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",180,-210,5.5,-5.5,-110,-110,-112,-108,,221.5,108.5,-115,-115,112.5,-110,-120,43,57,33,67,33,67,9,91,13,87,13,87,,0,30,game,2023-04-06T13:44:04.392115+00:00,"{'draw': 0, 'over': 0, 'under': 0, 'ml_away': ...",best odds,20230406,MIA20230406
0,181,San Antonio Spurs,Spurs,Spurs,San Antonio,SAS,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",176,Portland Trail Blazers,Trail Blazers,Trail Blazers,Portland,POR,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",115,-135,2.5,-2.5,-110,-110,-110,-110,,229.5,114.5,-115,-115,116.5,-110,-120,50,50,75,25,40,60,63,37,32,68,97,3,,0,30,game,2023-04-06T13:50:53.160478+00:00,"{'draw': 0, 'over': 0, 'under': 0, 'ml_away': ...",best odds,20230406,SAS20230406
0,173,Utah Jazz,Jazz,Jazz,Utah,UTA,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",175,Oklahoma City Thunder,Thunder,Thunder,Oklahoma City,OKC,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",-200,170,-5.0,5.0,-110,-110,-110,-110,,234.5,120.5,-125,-105,116.5,-105,-125,11,89,8,92,67,33,91,9,1,99,85,15,,0,30,game,2023-04-06T16:44:46.979073+00:00,"{'draw': 0, 'over': 0, 'under': 0, 'ml_away': ...",best odds,20230406,UTA20230406
0,174,Denver Nuggets,Nuggets,Nuggets,Denver,DEN,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",183,Phoenix Suns,Suns,Suns,Phoenix,PHX,"{'win': 0, 'loss': 0, 'ties': None, 'overtime_...",160,-190,4.5,-4.5,-110,-110,-110,-110,,226.5,110.5,-105,-125,116.5,-115,-115,88,12,50,50,33,67,93,7,85,15,47,53,,0,30,game,2023-04-06T16:26:03.715123+00:00,"{'draw': 0, 'over': 0, 'under': 0, 'ml_away': ...",best odds,20230406,DEN20230406


#### **We will now create ID columns so that we can map the moneyline to nba_games**

In [60]:
nba_game_odds['home_date_id'] = nba_game_odds['abbr_home'] + nba_game_odds['bookie_date']
nba_game_odds['away_date_id'] = nba_game_odds['abbr_away'] + nba_game_odds['bookie_date']

#### **We will now create a dictionary for moneyline and map it**

In [59]:
home_dic_ml = dict(zip(nba_game_odds.home_date_id, nba_game_odds.ml_home))
away_dic_ml = dict(zip(nba_game_odds.away_date_id, nba_game_odds.ml_away))
ml_dic = {**home_dic_ml, **away_dic_ml}
nba_games['ml_line'] = nba_games['bet_date_id'].map(ml_dic)

#### **We can see we have the moneyline column to our Nba games dataframe, called ml_line**

In [57]:
nba_games.head()

Unnamed: 0,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,DATE,bookie_date,home,bet_date_id,ml_line
0,1610612764,WAS,Washington Wizards,22201222,2023-04-09,WAS vs. HOU,L,240,41,97,0.423,14,45,0.311,13,21,0.619,14,37,51,28,11,5,13,21,109,-5,1,2023-04-09,20230409,1,WAS20230409,-130
1,1610612756,PHX,Phoenix Suns,22201229,2023-04-09,PHX vs. LAC,L,240,42,99,0.424,14,37,0.378,16,22,0.727,12,35,47,29,4,3,7,21,114,-5,1,2023-04-09,20230409,1,PHX20230409,-900
2,1610612757,POR,Portland Trail Blazers,22201230,2023-04-09,POR vs. GSW,L,240,38,95,0.4,9,35,0.257,16,24,0.667,6,24,30,25,5,3,14,9,101,-56,1,2023-04-09,20230409,1,POR20230409,-1800
3,1610612758,SAC,Sacramento Kings,22201227,2023-04-09,SAC @ DEN,L,240,37,85,0.435,11,43,0.256,10,15,0.667,10,24,34,24,7,6,15,18,95,-14,1,2023-04-09,20230409,0,SAC20230409,130
4,1610612737,ATL,Atlanta Hawks,22201216,2023-04-09,ATL @ BOS,L,240,44,97,0.454,11,39,0.282,15,19,0.789,16,34,50,27,11,5,9,12,114,-6,1,2023-04-09,20230409,0,ATL20230409,-260


##### **Now we sort the columns to have a clean dataframe where string columns are next to each other.**

In [122]:
move_columns = ['TEAM_ABBREVIATION','TEAM_NAME','GAME_DATE','MATCHUP','GAME_ID','bookie_date','DATE', 'bet_date_id', 'MIN','TEAM_ID','home', 'ml_line','WL' ]
df = nba_games[move_columns + [col for col in nba_games.columns if col not in move_columns]]
df = df.drop(columns= ['VIDEO_AVAILABLE'])

In [123]:
df.head(5)

Unnamed: 0,TEAM_ABBREVIATION,TEAM_NAME,GAME_DATE,MATCHUP,GAME_ID,bookie_date,DATE,bet_date_id,MIN,TEAM_ID,home,ml_line,WL,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
0,WAS,Washington Wizards,2023-04-09,WAS vs. HOU,22201222,20230409,2023-04-09,WAS20230409,240,1610612764,1,-130,L,41,97,0.423,14,45,0.311,13,21,0.619,14,37,51,28,11,5,13,21,109,-5
1,PHX,Phoenix Suns,2023-04-09,PHX vs. LAC,22201229,20230409,2023-04-09,PHX20230409,240,1610612756,1,-900,L,42,99,0.424,14,37,0.378,16,22,0.727,12,35,47,29,4,3,7,21,114,-5
2,POR,Portland Trail Blazers,2023-04-09,POR vs. GSW,22201230,20230409,2023-04-09,POR20230409,240,1610612757,1,-1800,L,38,95,0.4,9,35,0.257,16,24,0.667,6,24,30,25,5,3,14,9,101,-56
3,SAC,Sacramento Kings,2023-04-09,SAC @ DEN,22201227,20230409,2023-04-09,SAC20230409,240,1610612758,0,130,L,37,85,0.435,11,43,0.256,10,15,0.667,10,24,34,24,7,6,15,18,95,-14
4,ATL,Atlanta Hawks,2023-04-09,ATL @ BOS,22201216,20230409,2023-04-09,ATL20230409,240,1610612737,0,-260,L,44,97,0.454,11,39,0.282,15,19,0.789,16,34,50,27,11,5,9,12,114,-6


In [124]:
df['WL'] = np.where(df['WL'] == 'W', 1, 0)  #numericate WL where wins are 1 and 0 for loses
df = df.sort_values(by='DATE') #sort by date

#### **We will add the rolling averages for all the statistical columns by team** 

In [125]:
dt = df.join(df.groupby('TEAM_ID')[list(df)[12:]].expanding().mean().reset_index(level=0, drop=True).groupby(df['TEAM_ID']).shift().add_prefix('Rolling_Avg_'))

In [126]:
dt.head(50)

Unnamed: 0,TEAM_ABBREVIATION,TEAM_NAME,GAME_DATE,MATCHUP,GAME_ID,bookie_date,DATE,bet_date_id,MIN,TEAM_ID,home,ml_line,WL,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,Rolling_Avg_WL,Rolling_Avg_FGM,Rolling_Avg_FGA,Rolling_Avg_FG_PCT,Rolling_Avg_FG3M,Rolling_Avg_FG3A,Rolling_Avg_FG3_PCT,Rolling_Avg_FTM,Rolling_Avg_FTA,Rolling_Avg_FT_PCT,Rolling_Avg_OREB,Rolling_Avg_DREB,Rolling_Avg_REB,Rolling_Avg_AST,Rolling_Avg_STL,Rolling_Avg_BLK,Rolling_Avg_TOV,Rolling_Avg_PF,Rolling_Avg_PTS,Rolling_Avg_PLUS_MINUS
2459,PHI,Philadelphia 76ers,2022-10-18,PHI @ BOS,22200001,20221018,2022-10-18,PHI20221018,240,1610612755,0,150,0,40,80,0.5,13,34,0.382,24,28,0.857,4,27,31,16,8,3,14,25,117,-9,,,,,,,,,,,,,,,,,,,,
2456,LAL,Los Angeles Lakers,2022-10-18,LAL @ GSW,22200002,20221018,2022-10-18,LAL20221018,240,1610612747,0,-250,0,40,94,0.426,10,40,0.25,19,25,0.76,9,39,48,23,12,4,22,18,109,-14,,,,,,,,,,,,,,,,,,,,
2458,BOS,Boston Celtics,2022-10-18,BOS vs. PHI,22200001,20221018,2022-10-18,BOS20221018,240,1610612738,1,-186,1,46,82,0.561,12,35,0.343,22,28,0.786,6,30,36,24,8,3,11,24,126,9,,,,,,,,,,,,,,,,,,,,
2457,GSW,Golden State Warriors,2022-10-18,GSW vs. LAL,22200002,20221018,2022-10-18,GSW20221018,240,1610612744,1,200,1,45,99,0.455,16,45,0.356,17,23,0.739,11,37,48,31,11,4,18,23,123,14,,,,,,,,,,,,,,,,,,,,
2455,ORL,Orlando Magic,2022-10-19,ORL @ DET,22200003,20221019,2022-10-19,ORL20221019,240,1610612753,0,150,0,42,86,0.488,11,30,0.367,14,19,0.737,10,38,48,21,5,5,18,24,109,-4,,,,,,,,,,,,,,,,,,,,
2432,WAS,Washington Wizards,2022-10-19,WAS @ IND,22200004,20221019,2022-10-19,WAS20221019,240,1610612764,0,-136,1,42,92,0.457,11,31,0.355,19,24,0.792,14,39,53,21,5,10,17,19,114,7,,,,,,,,,,,,,,,,,,,,
2433,HOU,Houston Rockets,2022-10-19,HOU @ ATL,22200005,20221019,2022-10-19,HOU20221019,240,1610612745,0,370,0,42,98,0.429,9,35,0.257,14,15,0.933,15,39,54,25,4,3,16,20,107,-10,,,,,,,,,,,,,,,,,,,,
2434,ATL,Atlanta Hawks,2022-10-19,ATL vs. HOU,22200005,20221019,2022-10-19,ATL20221019,240,1610612737,1,-500,1,45,90,0.5,7,25,0.28,20,24,0.833,4,34,38,30,12,5,9,18,117,10,,,,,,,,,,,,,,,,,,,,
2435,DET,Detroit Pistons,2022-10-19,DET vs. ORL,22200003,20221019,2022-10-19,DET20221019,240,1610612765,1,-190,1,40,94,0.426,14,38,0.368,19,24,0.792,12,29,41,31,11,4,13,21,113,4,,,,,,,,,,,,,,,,,,,,
2437,CHI,Chicago Bulls,2022-10-19,CHI @ MIA,22200007,20221019,2022-10-19,CHI20221019,240,1610612741,0,188,1,41,93,0.441,13,36,0.361,21,28,0.75,12,34,46,23,12,6,15,26,116,8,,,,,,,,,,,,,,,,,,,,


#### **Now we will prepare the dataframe to run a Logistic Regression model**

In [137]:
# Select all numerical columns
data = dt.select_dtypes(include=[np.number])
data.dropna(inplace=True) #drop NAN values
data.head()

Unnamed: 0,MIN,TEAM_ID,home,ml_line,WL,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,Rolling_Avg_WL,Rolling_Avg_FGM,Rolling_Avg_FGA,Rolling_Avg_FG_PCT,Rolling_Avg_FG3M,Rolling_Avg_FG3A,Rolling_Avg_FG3_PCT,Rolling_Avg_FTM,Rolling_Avg_FTA,Rolling_Avg_FT_PCT,Rolling_Avg_OREB,Rolling_Avg_DREB,Rolling_Avg_REB,Rolling_Avg_AST,Rolling_Avg_STL,Rolling_Avg_BLK,Rolling_Avg_TOV,Rolling_Avg_PF,Rolling_Avg_PTS,Rolling_Avg_PLUS_MINUS
2428,240,1610612747,1,-165,0,33,94,0.351,9,45,0.2,22,26,0.846,8,30,38,23,13,5,10,24,97,-6,0.0,40.0,94.0,0.426,10.0,40.0,0.25,19.0,25.0,0.76,9.0,39.0,48.0,23.0,12.0,4.0,22.0,18.0,109.0,-14.0
2430,240,1610612755,1,116,0,36,83,0.434,5,24,0.208,11,13,0.846,6,38,44,17,6,5,10,14,88,-2,0.0,40.0,80.0,0.5,13.0,34.0,0.382,24.0,28.0,0.857,4.0,27.0,31.0,16.0,8.0,3.0,14.0,25.0,117.0,-9.0
2414,240,1610612740,0,191,1,42,89,0.472,8,22,0.364,32,37,0.865,15,38,53,23,9,7,16,14,124,12,1.0,50.0,102.0,0.49,12.0,26.0,0.462,18.0,25.0,0.72,21.0,40.0,61.0,31.0,9.0,4.0,13.0,21.0,130.0,22.0
2406,240,1610612752,1,185,1,50,95,0.526,16,38,0.421,14,22,0.636,11,39,50,29,4,6,12,26,130,24,0.0,44.0,109.0,0.404,9.0,37.0,0.243,15.0,18.0,0.833,18.0,36.0,54.0,28.0,7.0,5.0,12.0,26.0,112.0,-3.0
2407,240,1610612763,0,165,1,45,98,0.459,19,42,0.452,20,29,0.69,17,33,50,26,8,10,12,21,129,7,1.0,42.0,108.0,0.389,17.0,50.0,0.34,14.0,23.0,0.609,20.0,44.0,64.0,27.0,6.0,7.0,14.0,17.0,115.0,3.0


##### **We will now set up X (feature dataframe) and y (prediction dataframe)**

In [141]:
X = data[list(data)[24:] + ['home', 'ml_line']]
y = data['WL']

In [145]:
# Import libraries 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [146]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create a logistic regression classifier 
clf = LogisticRegression()

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = clf.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')

Accuracy: 0.5905349794238683
Classification Report:
              precision    recall  f1-score   support

           0       0.61      0.60      0.61       253
           1       0.57      0.58      0.57       233

    accuracy                           0.59       486
   macro avg       0.59      0.59      0.59       486
weighted avg       0.59      0.59      0.59       486



## **Conclusion**
#### **What we can see is a base betting models that uses the most common NBA statistical data combined with the market's view (ie: moneyline) results in 59% accuracy.**


### **Next steps to improve the model's accuracy**
* Improved feature engineering, including rolling averages for 3, 5, and 10 games.
* Integration of ELO ratings
* Incorporation of team's player statistics
* Integration of historical match-up data