In [32]:
from nba_api.stats.endpoints import leaguegamelog
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from termcolor import colored
from config import conn_host, conn_database, conn_user, conn_password
from functools import reduce
import datetime
import mysql.connector
from IPython.display import clear_output

In [33]:
def connect_to_db():
    return mysql.connector.connect(host=conn_host, 
                                     database=conn_database,
                                     user=conn_user,
                                     password=conn_password)

def execute_query(query, read_only = True):
    resp = None
    try:
        db = connect_to_db()
        if read_only:
            resp = pd.read_sql_query(query, db)
        else:
            mycursor = db.cursor()
            mycursor.execute(query)

            db.commit()
        db.close()
    except Exception as e:
        print(e)
    return resp

def execute_multiple_queries(queries):
    try:
        db = connect_to_db()
        mycursor = db.cursor()
        for query in queries:
            mycursor.execute(query)

        db.commit()
        db.close()
    except Exception as e:
        print(e)

In [34]:
execute_query("ALTER TABLE games " +
              "ADD over_under_line FLOAT NULL;")

Execution failed on sql 'ALTER TABLE games ADD over_under_line FLOAT NULL;': 1060 (42S21): Duplicate column name 'over_under_line'




In [35]:
first_season = 2015
last_season = 2021

In [36]:
season_games = execute_query(f"SELECT g.id, g.date, g.season, g.is_playoff, g.winner, g.home_id, ht.name as home_name, g.home_pts, g.home_fgm, g.home_fga, g.home_fg_pct, g.home_fg3m, g.home_fg3a, g.home_fg3_pct, g.home_ftm, g.home_fta, g.home_ft_pct, g.home_oreb, g.home_dreb, g.home_reb, g.home_ast, g.home_stl, g.home_blk, g.home_tov, g.home_pf, g.away_id, at.name as away_name, g.away_pts, g.away_fgm, g.away_fga, g.away_fg_pct, g.away_fg3m, g.away_fg3a, g.away_fg3_pct, g.away_ftm, g.away_fta, g.away_ft_pct, g.away_oreb, g.away_dreb, g.away_reb, g.away_ast, g.away_stl, g.away_blk, g.away_tov, g.away_pf, g.home_odds, g.away_odds FROM games AS g LEFT JOIN teams as ht ON g.home_id = ht.id LEFT JOIN teams as at ON g.away_id = at.id WHERE g.season >= {first_season} and g.season <= {last_season} ORDER BY g.date ASC")
teams = execute_query(f"SELECT * FROM teams")



In [37]:
teams.head()

Unnamed: 0,id,name,abbreviation
0,1610612737,Atlanta Hawks,ATL
1,1610612738,Boston Celtics,BOS
2,1610612739,Cleveland Cavaliers,CLE
3,1610612740,New Orleans Pelicans,NOP
4,1610612741,Chicago Bulls,CHI


In [38]:
season_games['date_converted'] = season_games['date'].dt.strftime('%m%d')
season_games.head()

Unnamed: 0,id,date,season,is_playoff,winner,home_id,home_name,home_pts,home_fgm,home_fga,...,away_dreb,away_reb,away_ast,away_stl,away_blk,away_tov,away_pf,home_odds,away_odds,date_converted
0,21500001,2015-10-27,2015,0,A,1610612737,Atlanta Hawks,94,37,82,...,36,59,23,5,3,15,15,1.32,3.5,1027
1,21500002,2015-10-27,2015,0,H,1610612741,Chicago Bulls,97,37,87,...,39,50,26,5,7,11,21,1.56,2.49,1027
2,21500003,2015-10-27,2015,0,H,1610612744,Golden State Warriors,111,41,96,...,25,33,21,9,3,19,26,1.14,6.07,1027
3,21500004,2015-10-28,2015,0,A,1610612753,Orlando Magic,87,37,100,...,34,49,17,8,9,18,14,2.58,1.53,1028
4,21500005,2015-10-28,2015,0,H,1610612738,Boston Celtics,112,39,85,...,32,46,12,11,6,24,22,1.09,8.16,1028


In [39]:
teams_dict = {'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}
print(teams_dict)

{'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}


In [40]:
games_totals = []

for i in range(first_season,last_season+1):
    season_totals = pd.read_excel('lines/nba odds {}-{}.xlsx'.format(i, i+1 - 2000), sheet_name=None)
    season_totals = season_totals['Sheet1']
    season_totals['Date'] = season_totals['Date'].astype(str).str.zfill(4)
    season_totals['Team'] = season_totals['Team'].str.replace(' ', '')
    games_totals.append(season_totals)

lines_df = reduce(lambda  left,right: pd.merge(left,right, how='outer'), games_totals).reset_index(drop=True)

In [54]:
lines_df.head()

Unnamed: 0,Date,Rot,VH,Team,1st,2nd,3rd,4th,Final,Open,Close,ML,2H
0,1027,501,V,Cleveland,17,23,28,27,95,197.5,198.5,160,97
1,1027,502,H,Chicago,26,20,25,26,97,2.0,4.0,-180,pk
2,1027,503,V,Detroit,25,23,34,24,106,197.5,196.5,255,98
3,1027,504,H,Atlanta,25,18,23,28,94,7.0,7.0,-320,7
4,1027,505,V,NewOrleans,35,14,26,20,95,214.5,215.0,510,108


In [None]:
def compare_lines(line_1, line_2):
    if str(line_1).lower() == 'pk':
        return line_2
    elif str(line_2).lower() == 'pk':
        return line_1
    return max(float(line_1), float(line_2))

def find_game_get_line(game):
    host = teams_dict[teams[teams['id'] == game['home_id']].iloc[0,:]['abbreviation']]
    visitor = teams_dict[teams[teams['id'] == game['away_id']].iloc[0,:]['abbreviation']]

    try:
        line_1 = lines_df.loc[(lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == visitor)].iloc[0]['Close']
        line_2 = lines_df.loc[(lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == host)].iloc[0]['Close']
    except IndexError:
        try:
            date = (game['date'] + datetime.timedelta(days=1)).strftime('%m%d')
            line_1 = lines_df.loc[(lines_df['Date'] == date) & (lines_df['Team'] == visitor)].iloc[0]['Close']
            line_2 = lines_df.loc[(lines_df['Date'] == date) & (lines_df['Team'] == host)].iloc[0]['Close']
        except IndexError:
            return None
        
    selected_line = compare_lines(line_1, line_2)
    return selected_line

insert_queries = []

for index, game in season_games.iterrows():
    clear_output(wait=True)
    print("{}/{}".format(index, len(season_games.index)))
    print(f"{game['id']} {game['season']} {game['date']}: {game['home_name']} x {game['away_name']}")
    line = find_game_get_line(game)
    if line:
        update_query = f"UPDATE games SET over_under_line = {line} WHERE id = {game['id']}"
        insert_queries.append(update_query)
        

480/8873
21500481 2015 2015-12-30 00:00:00: San Antonio Spurs x Phoenix Suns


In [51]:
print(insert_queries[0])

IndexError: list index out of range

In [50]:
try:
    execute_multiple_queries(insert_queries)
except Exception as e:
    print(e)