In [23]:
from nba_api.stats.endpoints import leaguegamelog
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from termcolor import colored
from config import conn_host, conn_database, conn_user, conn_password
from functools import reduce
import datetime
import mysql.connector
from IPython.display import clear_output

In [24]:
def connect_to_db():
    return mysql.connector.connect(host=conn_host, 
                                     database=conn_database,
                                     user=conn_user,
                                     password=conn_password)

def execute_query(query, read_only = True):
    resp = None
    try:
        db = connect_to_db()
        if read_only:
            resp = pd.read_sql_query(query, db)
        else:
            mycursor = db.cursor()
            mycursor.execute(query)

            db.commit()
        db.close()
    except Exception as e:
        print(e)
    return resp

def execute_multiple_queries(queries):
    try:
        db = connect_to_db()
        mycursor = db.cursor()
        for query in queries:
            mycursor.execute(query)

        db.commit()
        db.close()
    except Exception as e:
        print(e)

In [25]:
execute_query("ALTER TABLE games " +
              "ADD over_under_line FLOAT NULL;")
execute_query("ALTER TABLE games " +
              "ADD spread_line FLOAT NULL;")

Execution failed on sql 'ALTER TABLE games ADD over_under_line FLOAT NULL;': 1060 (42S21): Duplicate column name 'over_under_line'
Execution failed on sql 'ALTER TABLE games ADD spread_line FLOAT NULL;': 1060 (42S21): Duplicate column name 'spread_line'




In [26]:
first_season = 2008
last_season = 2021

In [28]:
season_games = execute_query(f"SELECT g.id, g.date, g.season, g.is_playoff, g.winner, g.home_id, ht.name as home_name, g.home_pts, g.home_fgm, g.home_fga, g.home_fg_pct, g.home_fg3m, g.home_fg3a, g.home_fg3_pct, g.home_ftm, g.home_fta, g.home_ft_pct, g.home_oreb, g.home_dreb, g.home_reb, g.home_ast, g.home_stl, g.home_blk, g.home_tov, g.home_pf, g.away_id, at.name as away_name, g.away_pts, g.away_fgm, g.away_fga, g.away_fg_pct, g.away_fg3m, g.away_fg3a, g.away_fg3_pct, g.away_ftm, g.away_fta, g.away_ft_pct, g.away_oreb, g.away_dreb, g.away_reb, g.away_ast, g.away_stl, g.away_blk, g.away_tov, g.away_pf, g.home_odds, g.away_odds FROM games AS g LEFT JOIN teams as ht ON g.home_id = ht.id LEFT JOIN teams as at ON g.away_id = at.id WHERE g.season >= {first_season} and g.season <= {last_season} ORDER BY g.date ASC")
teams = execute_query(f"SELECT * FROM teams")



In [29]:
teams.head()

Unnamed: 0,id,name,abbreviation
0,1610612737,Atlanta Hawks,ATL
1,1610612738,Boston Celtics,BOS
2,1610612739,Cleveland Cavaliers,CLE
3,1610612740,New Orleans Pelicans,NOP
4,1610612741,Chicago Bulls,CHI


In [31]:
season_games['date_converted'] = season_games['date'].dt.strftime('%m%d')
season_games.tail()

Unnamed: 0,id,date,season,is_playoff,winner,home_id,home_name,home_pts,home_fgm,home_fga,...,away_dreb,away_reb,away_ast,away_stl,away_blk,away_tov,away_pf,home_odds,away_odds,date_converted
17824,42100402,2022-06-05,2021,1,H,1610612744,Golden State Warriors,107,39,86,...,37,43,24,5,7,19,18,1.5,2.71,605
17825,42100403,2022-06-08,2021,1,H,1610612738,Boston Celtics,116,43,89,...,25,31,22,7,5,17,23,1.66,2.3,608
17826,42100404,2022-06-10,2021,1,A,1610612738,Boston Celtics,97,34,85,...,39,55,20,10,5,16,21,1.61,2.41,610
17827,42100405,2022-06-13,2021,1,H,1610612744,Golden State Warriors,104,41,88,...,39,47,18,2,2,18,16,1.6,2.44,613
17828,42100406,2022-06-16,2021,1,A,1610612738,Boston Celtics,90,34,80,...,29,44,27,13,7,17,20,1.59,2.47,616


In [8]:
teams_dict = {'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}
print(teams_dict)

{'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}


In [32]:
games_totals = []

for i in range(first_season,last_season+1):
    season_totals = pd.read_excel('lines/nba odds {}-{}.xlsx'.format(i, f"{0 if i+1 - 2000 < 10 else ''}{i+1 - 2000}"), sheet_name=None)
    season_totals = season_totals['Sheet1']
    season_totals['Date'] = season_totals['Date'].astype(str).str.zfill(4)
    season_totals['Team'] = season_totals['Team'].str.replace(' ', '')
    season_totals['Season'] = i
    games_totals.append(season_totals)

lines_df = reduce(lambda  left,right: pd.merge(left,right, how='outer'), games_totals).reset_index(drop=True)

  key_col = Index(lvals).where(~mask_left, rvals)


In [35]:
lines_df.tail()

Unnamed: 0,Date,Rot,VH,Team,1st,2nd,3rd,4th,Final,Open,Close,ML,2H,Season
35679,610,524,H,Boston,28,26,24,19,97,2.5,4.0,-165,1.0,2021
35680,613,525,V,Boston,16,23,35,20,94,212.5,211.0,145,2.5,2021
35681,613,526,H,GoldenState,27,24,24,29,104,3.5,4.0,-165,107.5,2021
35682,616,527,V,GoldenState,27,27,22,27,103,212.5,211.5,155,103.5,2021
35683,616,528,H,Boston,22,17,27,24,90,2.5,4.0,-175,5.5,2021


In [38]:
def get_lines(line_1, line_2):
    totals_line, spread_line = None, None
    if str(line_1).lower() == 'pk' and float(line_2) < 50:
        spread_line = line_2
    elif str(line_1).lower() == 'pk' and float(line_2) > 50:
        totals_line = line_2
    elif str(line_2).lower() == 'pk' and float(line_1) < 50:
        spread_line = line_1
    elif str(line_2).lower() == 'pk' and float(line_1) > 50:
        totals_line = line_1
    else:
        totals_line = max(float(line_1), float(line_2))
        spread_line = min(float(line_1), float(line_2))
    return totals_line, spread_line

def find_game_get_lines(game):
    host = teams_dict[teams[teams['id'] == game['home_id']].iloc[0,:]['abbreviation']]
    visitor = teams_dict[teams[teams['id'] == game['away_id']].iloc[0,:]['abbreviation']]

    try:
        lines_game = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == visitor)]
        line_1 = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == visitor)].iloc[0]['Close']
        line_2 = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == host)].iloc[0]['Close']
    except IndexError:
        try:
            date = (game['date'] + datetime.timedelta(days=1)).strftime('%m%d')
            lines_game = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == date) & (lines_df['Team'] == visitor)]
            line_1 = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == date) & (lines_df['Team'] == visitor)].iloc[0]['Close']
            line_2 = lines_df.loc[(lines_df['Season'] == game['season']) & (lines_df['Date'] == date) & (lines_df['Team'] == host)].iloc[0]['Close']
        except IndexError:
            return None
        
    totals_line, spread_line = get_lines(line_1, line_2)
    return totals_line, spread_line

In [39]:
insert_queries = []

for index, game in season_games.iterrows():
    clear_output(wait=True)
    print("{}/{}".format(index, len(season_games.index)))
    print(f"{game['id']} {game['season']} {game['date']}: {game['home_name']} x {game['away_name']}")
    response = find_game_get_lines(game)
    if not response: continue
    totals_line, spread_line = response
    if totals_line and totals_line != 'pk' and totals_line > 50:
        update_query = f"UPDATE games SET over_under_line = {totals_line} WHERE id = {game['id']}"
        insert_queries.append(update_query)
    if spread_line and spread_line != 'pk' and spread_line < 50:
        update_query = f"UPDATE games SET spread_line = {spread_line} WHERE id = {game['id']}"
        insert_queries.append(update_query)
        

17828/17829
42100406 2021 2022-06-16 00:00:00: Boston Celtics x Golden State Warriors


In [45]:
try:
    execute_multiple_queries(insert_queries)
except Exception as e:
    print(e)