In [1]:
from nba_api.stats.endpoints import leaguegamelog
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from termcolor import colored
from config import conn_host, conn_database, conn_user, conn_password
from functools import reduce
import datetime
import mysql.connector
from IPython.display import clear_output

In [2]:
def connect_to_db():
    return mysql.connector.connect(host=conn_host, 
                                     database=conn_database,
                                     user=conn_user,
                                     password=conn_password)

def execute_query(query, read_only = True):
    resp = None
    try:
        db = connect_to_db()
        if read_only:
            resp = pd.read_sql_query(query, db)
        else:
            mycursor = db.cursor()
            mycursor.execute(query)

            db.commit()
        db.close()
    except Exception as e:
        print(e)
    return resp

def execute_multiple_queries(queries):
    try:
        db = connect_to_db()
        mycursor = db.cursor()
        for query in queries:
            mycursor.execute(query)

        db.commit()
        db.close()
    except Exception as e:
        print(e)

In [3]:
execute_query("ALTER TABLE games " +
              "ADD over_under_line FLOAT NULL;")

Execution failed on sql 'ALTER TABLE games ADD over_under_line FLOAT NULL;': 1060 (42S21): Duplicate column name 'over_under_line'




In [4]:
first_season = 2008
last_season = 2015

In [5]:
season_games = execute_query(f"SELECT g.id, g.date, g.season, g.is_playoff, g.winner, g.home_id, ht.name as home_name, g.home_pts, g.home_fgm, g.home_fga, g.home_fg_pct, g.home_fg3m, g.home_fg3a, g.home_fg3_pct, g.home_ftm, g.home_fta, g.home_ft_pct, g.home_oreb, g.home_dreb, g.home_reb, g.home_ast, g.home_stl, g.home_blk, g.home_tov, g.home_pf, g.away_id, at.name as away_name, g.away_pts, g.away_fgm, g.away_fga, g.away_fg_pct, g.away_fg3m, g.away_fg3a, g.away_fg3_pct, g.away_ftm, g.away_fta, g.away_ft_pct, g.away_oreb, g.away_dreb, g.away_reb, g.away_ast, g.away_stl, g.away_blk, g.away_tov, g.away_pf, g.home_odds, g.away_odds FROM games AS g LEFT JOIN teams as ht ON g.home_id = ht.id LEFT JOIN teams as at ON g.away_id = at.id WHERE g.season >= {first_season} and g.season <= {last_season} ORDER BY g.date ASC")
teams = execute_query(f"SELECT * FROM teams")



In [6]:
teams.head()

Unnamed: 0,id,name,abbreviation
0,1610612737,Atlanta Hawks,ATL
1,1610612738,Boston Celtics,BOS
2,1610612739,Cleveland Cavaliers,CLE
3,1610612740,New Orleans Pelicans,NOP
4,1610612741,Chicago Bulls,CHI


In [7]:
season_games['date_converted'] = season_games['date'].dt.strftime('%m%d')
season_games.head()

Unnamed: 0,id,date,season,is_playoff,winner,home_id,home_name,home_pts,home_fgm,home_fga,...,away_dreb,away_reb,away_ast,away_stl,away_blk,away_tov,away_pf,home_odds,away_odds,date_converted
0,20800001,2008-10-28,2008,0,H,1610612738,Boston Celtics,90,33,74,...,33,41,16,8,2,22,26,1.36,3.15,1028
1,20800002,2008-10-28,2008,0,H,1610612741,Chicago Bulls,108,35,69,...,19,32,21,5,3,19,30,1.36,3.2,1028
2,20800003,2008-10-28,2008,0,H,1610612747,Los Angeles Lakers,96,37,79,...,31,44,19,8,3,14,17,1.27,3.85,1028
3,20800004,2008-10-29,2008,0,A,1610612753,Orlando Magic,85,32,87,...,35,53,17,10,7,15,23,1.25,3.95,1029
4,20800005,2008-10-29,2008,0,A,1610612755,Philadelphia 76ers,84,29,84,...,23,33,24,8,5,10,23,1.46,2.72,1029


In [8]:
teams_dict = {'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}
print(teams_dict)

{'CLE': 'Cleveland', 'CHI': 'Chicago', 'DET': 'Detroit', 'ATL': 'Atlanta', 'NOP': 'NewOrleans', 'GSW': 'GoldenState', 'WAS': 'Washington', 'ORL': 'Orlando', 'PHI': 'Philadelphia', 'BOS': 'Boston', 'IND': 'Indiana', 'TOR': 'Toronto', 'BKN': 'Brooklyn', 'CHA': 'Charlotte', 'MIA': 'Miami', 'UTA': 'Utah', 'SAS': 'SanAntonio', 'OKC': 'OklahomaCity', 'NYK': 'NewYork', 'MIL': 'Milwaukee', 'DEN': 'Denver', 'HOU': 'Houston', 'MEM': 'Memphis', 'POR': 'Portland', 'DAL': 'Dallas', 'PHX': 'Phoenix', 'LAC': 'LAClippers', 'SAC': 'Sacramento', 'MIN': 'Minnesota', 'LAL': 'LALakers'}


In [10]:
games_totals = []

for i in range(first_season,last_season+1):
    season_totals = pd.read_excel('lines/nba odds {}-{}.xlsx'.format(i, f"{0 if i+1 - 2000 < 10 else ''}{i+1 - 2000}"), sheet_name=None)
    season_totals = season_totals['Sheet1']
    season_totals['Date'] = season_totals['Date'].astype(str).str.zfill(4)
    season_totals['Team'] = season_totals['Team'].str.replace(' ', '')
    games_totals.append(season_totals)

lines_df = reduce(lambda  left,right: pd.merge(left,right, how='outer'), games_totals).reset_index(drop=True)



In [11]:
lines_df.head()

Unnamed: 0,Date,Rot,VH,Team,1st,2nd,3rd,4th,Final,Open,Close,ML,2H
0,1028,501,V,Cleveland,28,22,13,22,85,179.5,182.5,220.0,93.5
1,1028,502,H,Boston,22,21,24,23,90,7.0,6.0,-260.0,5.0
2,1028,503,V,Milwaukee,26,26,23,20,95,196.0,198.0,230.0,101.5
3,1028,504,H,Chicago,26,29,27,26,108,6.0,6.0,-270.0,3.0
4,1028,505,V,Portland,14,20,19,23,76,196.5,195.0,305.0,1.0


In [20]:
def compare_lines(line_1, line_2):
    if str(line_1).lower() == 'pk':
        return line_2
    elif str(line_2).lower() == 'pk':
        return line_1
    return max(float(line_1), float(line_2))

def find_game_get_line(game):
    host = teams_dict[teams[teams['id'] == game['home_id']].iloc[0,:]['abbreviation']]
    visitor = teams_dict[teams[teams['id'] == game['away_id']].iloc[0,:]['abbreviation']]

    try:
        line_1 = lines_df.loc[(lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == visitor)].iloc[0]['Close']
        line_2 = lines_df.loc[(lines_df['Date'] == game['date_converted']) & (lines_df['Team'] == host)].iloc[0]['Close']
    except IndexError:
        try:
            date = (game['date'] + datetime.timedelta(days=1)).strftime('%m%d')
            line_1 = lines_df.loc[(lines_df['Date'] == date) & (lines_df['Team'] == visitor)].iloc[0]['Close']
            line_2 = lines_df.loc[(lines_df['Date'] == date) & (lines_df['Team'] == host)].iloc[0]['Close']
        except IndexError:
            return None
        
    selected_line = compare_lines(line_1, line_2)
    return selected_line

insert_queries = []

for index, game in season_games.iterrows():
    clear_output(wait=True)
    print("{}/{}".format(index, len(season_games.index)))
    print(f"{game['id']} {game['season']} {game['date']}: {game['home_name']} x {game['away_name']}")
    line = find_game_get_line(game)
    if line and line != 'pk' and float(line) > 80:
        update_query = f"UPDATE games SET over_under_line = {line} WHERE id = {game['id']}"
        insert_queries.append(update_query)
        

10271/10272
41500407 2015 2016-06-19 00:00:00: Golden State Warriors x Cleveland Cavaliers


In [21]:
try:
    execute_multiple_queries(insert_queries)
except Exception as e:
    print(e)