In [1]:
from nba_api.stats.endpoints import leaguegamelog
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from termcolor import colored
from config import conn_host, conn_database, conn_user, conn_password
from functools import reduce
import datetime
import mysql.connector
from IPython.display import clear_output

In [2]:
def connect_to_db():
    return mysql.connector.connect(host=conn_host, 
                                     database=conn_database,
                                     user=conn_user,
                                     password=conn_password)

def execute_query(query, read_only = True):
    resp = None
    try:
        db = connect_to_db()
        if read_only:
            resp = pd.read_sql_query(query, db)
        else:
            mycursor = db.cursor()
            mycursor.execute(query)

            db.commit()
        db.close()
    except Exception as e:
        print(e)
    return resp

def execute_multiple_queries(queries):
    try:
        db = connect_to_db()
        mycursor = db.cursor()
        for query in queries:
            mycursor.execute(query)

        db.commit()
        db.close()
    except Exception as e:
        print(e)

In [3]:
execute_query("ALTER TABLE games " +
              "ADD over_under_line FLOAT NULL," +
              "ADD spread_line FLOAT NULL," +
              "ADD home_odds FLOAT NULL," +
              "ADD away_odds FLOAT NULL;")

Execution failed on sql 'ALTER TABLE games ADD over_under_line FLOAT NULL,ADD spread_line FLOAT NULL,ADD home_odds FLOAT NULL,ADD away_odds FLOAT NULL;': 1060 (42S21): Duplicate column name 'over_under_line'




In [4]:
first_season = 2022
last_season = 2022

In [5]:
season_games = execute_query(f"SELECT g.id, g.date, g.season, g.is_playoff, g.winner, g.home_id, ht.name as home_name, g.home_pts, g.home_fgm, g.home_fga, g.home_fg_pct, g.home_fg3m, g.home_fg3a, g.home_fg3_pct, g.home_ftm, g.home_fta, g.home_ft_pct, g.home_oreb, g.home_dreb, g.home_reb, g.home_ast, g.home_stl, g.home_blk, g.home_tov, g.home_pf, g.away_id, at.name as away_name, g.away_pts, g.away_fgm, g.away_fga, g.away_fg_pct, g.away_fg3m, g.away_fg3a, g.away_fg3_pct, g.away_ftm, g.away_fta, g.away_ft_pct, g.away_oreb, g.away_dreb, g.away_reb, g.away_ast, g.away_stl, g.away_blk, g.away_tov, g.away_pf, g.home_odds, g.away_odds FROM games AS g LEFT JOIN teams as ht ON g.home_id = ht.id LEFT JOIN teams as at ON g.away_id = at.id WHERE g.season >= {first_season} and g.season <= {last_season} ORDER BY g.date ASC")
teams = execute_query(f"SELECT * FROM teams")



In [6]:
teams.head()

Unnamed: 0,id,name,abbreviation
0,1610612737,Atlanta Hawks,ATL
1,1610612738,Boston Celtics,BOS
2,1610612739,Cleveland Cavaliers,CLE
3,1610612740,New Orleans Pelicans,NOP
4,1610612741,Chicago Bulls,CHI


In [7]:
season_games['date_converted'] = season_games['date'].dt.strftime('%m%d')
season_games.head()

Unnamed: 0,id,date,season,is_playoff,winner,home_id,home_name,home_pts,home_fgm,home_fga,...,away_dreb,away_reb,away_ast,away_stl,away_blk,away_tov,away_pf,home_odds,away_odds,date_converted
0,22200001,2022-10-18,2022,0,H,1610612738,Boston Celtics,126,46,82,...,27,31,16,8,3,14,25,,,1018
1,22200002,2022-10-18,2022,0,H,1610612744,Golden State Warriors,123,45,99,...,39,48,23,12,4,22,18,,,1018
2,22200003,2022-10-19,2022,0,H,1610612765,Detroit Pistons,113,40,94,...,38,48,21,5,5,18,23,,,1019
3,22200004,2022-10-19,2022,0,A,1610612754,Indiana Pacers,107,39,97,...,39,53,21,5,10,17,19,,,1019
4,22200005,2022-10-19,2022,0,H,1610612737,Atlanta Hawks,117,45,90,...,39,54,25,4,3,16,20,,,1019


In [8]:
teams_dict = {'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}
print(teams_dict)

{'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}


In [11]:
games_totals = []

for i in range(first_season,last_season+1):
    season_totals = pd.read_excel('lines/nba odds {}-{}.xlsx'.format(i, f"{0 if i+1 - 2000 < 10 else ''}{i+1 - 2000}"), sheet_name=None)
    
    try:
        season_totals = season_totals['Sheet1']
    except:
        season_totals = season_totals['Sheet 1']
        
    season_totals['Date'] = season_totals['Date'].astype(str).str.zfill(4)
    season_totals['Team'] = season_totals['Team'].str.replace(' ', '')
    
    if season_totals['Close'].dtype == 'object':
        season_totals['Close'] = season_totals['Close'].astype(str).str.lower()
        season_totals['Close'].replace('pk', 0.0, inplace=True)
    season_totals['Close'] = season_totals['Close'].astype(float)
    
    season_totals['ML'] = season_totals['ML'].astype(float)
    season_totals['Season'] = i
    games_totals.append(season_totals)

lines_df = reduce(lambda  left,right: pd.merge(left,right, how='outer'), games_totals).reset_index(drop=True)

In [12]:
lines_df.head()

Unnamed: 0,Date,Rot,VH,Team,1st,2nd,3rd,4th,Final,Open,Close,ML,2H,Season
0,1018,501,V,Philadelphia,29,34,25,29,117,229.0,216.0,135.0,107.0,2022
1,1018,502,H,Boston,24,39,35,28,126,7.0,3.0,-155.0,2.0,2022
2,1018,503,V,LALakers,22,30,19,38,109,229.5,223.5,260.0,114.0,2022
3,1018,504,H,GoldenState,25,34,32,32,123,6.5,7.5,-310.0,2.0,2022
4,1019,505,V,Washington,36,24,27,27,114,2.0,2.5,-135.0,111.5,2022


In [13]:
def convert_to_decimal_odds(american_odds):
    if american_odds > 0:
        return round(((american_odds/100) + 1), 3)
    else:
        return round((abs(100/american_odds) + 1), 3)

def get_lines(home_game, away_game):
    totals_line, spread_line = None, None
    home_odds = convert_to_decimal_odds(home_game['ML'])
    away_odds = convert_to_decimal_odds(away_game['ML'])
    
    line_1 = home_game['Close']
    line_2 = away_game['Close']
    
    if line_1 == 0 and line_2 < 50:
        spread_line = line_2
    elif line_1 == 0 and line_2 > 50:
        totals_line = line_2
    elif line_2 == 0 and line_1 < 50:
        spread_line = line_1
    elif line_2 == 0 and line_1 > 50:
        totals_line = line_1
    else:
        totals_line = max(line_1, line_2)
        spread_line = min(line_1, line_2)
        
    return totals_line, spread_line, home_odds, away_odds

def find_game_get_lines(game):
    host = teams_dict[teams[teams['id'] == game['home_id']].iloc[0,:]['abbreviation']]
    visitor = teams_dict[teams[teams['id'] == game['away_id']].iloc[0,:]['abbreviation']]

    try:
        away_game = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == visitor)].iloc[0]
        home_game = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == host)].iloc[0]
    except IndexError:
        try:
            date = (game['date'] + datetime.timedelta(days=1)).strftime('%m%d')
            away_game = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == date) & (lines_df['Team'] == visitor)].iloc[0]
            home_game = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == date) & (lines_df['Team'] == host)].iloc[0]
        except IndexError:
            return None
        
    totals_line, spread_line, home_odds, away_odds = get_lines(home_game, away_game)
    return totals_line, spread_line, home_odds, away_odds

In [14]:
insert_queries = []

for index, game in season_games.iterrows():
    clear_output(wait=True)
    print("{}/{}".format(index, len(season_games.index)))
    print(f"{game['id']} {game['season']} {game['date']}: {game['home_name']} x {game['away_name']}")
    response = find_game_get_lines(game)
    if not response: continue
    totals_line, spread_line, home_odds, away_odds = response
    if totals_line and totals_line != 'pk' and totals_line > 50:
        update_query = f"UPDATE games SET over_under_line = {totals_line} WHERE id = {game['id']}"
        insert_queries.append(update_query)
    if spread_line and spread_line != 'pk' and spread_line < 50:
        update_query = f"UPDATE games SET spread_line = {spread_line} WHERE id = {game['id']}"
        insert_queries.append(update_query)
    if home_odds and away_odds:
        update_query = f"UPDATE games SET home_odds = {home_odds}, away_odds = {away_odds} WHERE id = {game['id']}"
        insert_queries.append(update_query)
        

1313/1314
42200405 2022 2023-06-12 00:00:00: Denver Nuggets x Miami Heat


In [15]:
try:
    execute_multiple_queries(insert_queries)
except Exception as e:
    print(e)