In [16]:
from services import MySQLService, NBAService
from joblib import load
import pandas as pd
import numpy as np
import requests
from datetime import datetime as dt
from bs4 import BeautifulSoup as soup
from termcolor import colored
from IPython.display import clear_output
from helpers.elo import update_elo
from helpers.per import get_realtime_team_per
from helpers.feature_engineer import (
    current_streak,
    get_team_defensive_rating_game,
    get_team_offensive_rating_game,
    get_wl_pct,
)
from helpers.stats import get_match_info, get_game_data

pd.set_option('mode.chained_assignment', None)

import warnings
from sklearn.exceptions import DataConversionWarning

warnings.filterwarnings("ignore", message="X does not have valid feature names, but StandardScaler was fitted with feature names", category=UserWarning)

In [17]:
min_odds = 1.8

season = 2023

model = 'RF'

n_last_games = 15
n_last_specific_games = 5

mysql = MySQLService()
nba = NBAService(season)

In [18]:
elo_dict = load('../elo/elo_dict.joblib')

original_columns = load('../columns/original.joblib')
filtered_columns = load('../columns/filtered.joblib')

season_games, season_players = nba.get_season(season_type='Regular Season')
teams_df = nba.get_teams_df()

last_date_query = "SELECT date FROM `nba-data`.games order by date desc limit 1"
last_date = mysql.execute_query(last_date_query).iloc[0]['date']

season_games = season_games[season_games["GAME_DATE"] >= last_date].reset_index()

display(teams_df.head(2))
display(season_games.head(2))



Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Atlanta,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946


Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,IS_PLAYOFFS
0,1520,22023,1610612744,GSW,Golden State Warriors,22300733,2024-02-08,GSW @ IND,W,240,...,49,33,7,7,17,22,131,22,1,False
1,1521,22023,1610612754,IND,Indiana Pacers,22300733,2024-02-08,IND vs. GSW,L,240,...,30,31,11,6,14,17,109,-22,1,False


In [19]:
def add_match_info_to_db(home_game, away_game, winner):
    query = (f"INSERT IGNORE INTO games (id, date, season, is_playoff, winner, " + 
                  "home_id, home_team, home_pts, home_fgm, home_fga, home_fg_pct, home_fg3m, home_fg3a, home_fg3_pct, home_ftm, home_fta, home_ft_pct, home_oreb, home_dreb, home_reb, home_ast, home_stl, home_blk, home_tov, home_pf," +
                  "away_id, away_team, away_pts, away_fgm, away_fga, away_fg_pct, away_fg3m, away_fg3a, away_fg3_pct, away_ftm, away_fta, away_ft_pct, away_oreb, away_dreb, away_reb, away_ast, away_stl, away_blk, away_tov, away_pf" +
                  f") VALUES ({home_game['GAME_ID']}, '{home_game['GAME_DATE']}', {int(home_game['SEASON_ID'][1:])}, {home_game['IS_PLAYOFFS']}, '{winner}', " + 
                  f"{home_game['TEAM_ID']}, '{home_game['TEAM_NAME']}', {home_game['PTS']}, {home_game['FGM']}, {home_game['FGA']}, {home_game['FG_PCT']}, {home_game['FG3M']}, {home_game['FG3A']}, {home_game['FG3_PCT']}, {home_game['FTM']}, {home_game['FTA']}, {home_game['FT_PCT']}, {home_game['OREB']}, {home_game['DREB']}, {home_game['REB']}, {home_game['AST']}, {home_game['STL']}, {home_game['BLK']}, {home_game['TOV']}, {home_game['PF']}, " +
                  f"{away_game['TEAM_ID']}, '{away_game['TEAM_NAME']}', {away_game['PTS']}, {away_game['FGM']}, {away_game['FGA']}, {away_game['FG_PCT']}, {away_game['FG3M']}, {away_game['FG3A']}, {away_game['FG3_PCT']}, {away_game['FTM']}, {away_game['FTA']}, {away_game['FT_PCT']}, {away_game['OREB']}, {away_game['DREB']}, {away_game['REB']}, {away_game['AST']}, {away_game['STL']}, {away_game['BLK']}, {away_game['TOV']}, {away_game['PF']}" +
                  ")")
    return query

def add_player_to_db(player_id, name):
    query = (f"INSERT IGNORE INTO players (id, name) VALUES ({player_id}, '{name}')")
    return query
    
def add_player_game_to_db(game):
    query = (f"INSERT IGNORE INTO player_games (team_id, player_id, game_id, minutes, pts, fgm, fga, fg_pct, fg3m, fg3a, fg3_pct, ftm, fta, ft_pct, oreb, dreb, reb, ast, stl, blk, tov, pf, plus_minus) " + 
                  f"VALUES ({game['TEAM_ID']}, {game['PLAYER_ID']}, {game['GAME_ID']}, {game['MIN']}, {game['PTS']}, {game['FGM']}, {game['FGA']}, {game['FG_PCT']}, {game['FG3M']}, {game['FG3A']}, {game['FG3_PCT']}, {game['FTM']}, {game['FTA']}, {game['FT_PCT']}, {game['OREB']}, {game['DREB']}, {game['REB']}, {game['AST']}, {game['STL']}, {game['BLK']}, {game['TOV']}, {game['PF']}, {game['PLUS_MINUS']})")
    return query

In [20]:
games_to_insert_queries = []
players_to_insert_queries = []
player_games_to_insert_queries = []

season_id = ''
for i, g in season_games.groupby(season_games.index // 2):
    clear_output(wait=True)
    print("{}/{}".format(i+1, len(season_games.index) // 2))
    if g.iloc[[0],:].iloc[0]['WL'] == None:
        break
        
    if '@' in g.iloc[[0],:].iloc[0]['MATCHUP']:
        away_game = g.iloc[0,:]
        home_game = g.iloc[1,:]
        winner = 'H' if g.iloc[1,:]['WL'] == 'W' else 'A'
    else:
        home_game = g.iloc[0,:]
        away_game = g.iloc[1,:]
        winner = 'H' if g.iloc[0,:]['WL'] == 'W' else 'A'
    
    game_players = season_players.loc[season_players['GAME_ID'] == home_game['GAME_ID']]
    game_players = game_players.replace({np.nan: 0})
    
    games_to_insert_queries.append(add_match_info_to_db(home_game, away_game, winner))
    
    for index, player in game_players.iterrows():
        players_to_insert_queries.append(add_player_to_db(player['PLAYER_ID'], player['PLAYER_NAME'].replace("'", "")))
        player_games_to_insert_queries.append(add_player_game_to_db(player))

9/9


In [21]:
mysql.execute_multiple_queries(games_to_insert_queries)
mysql.execute_multiple_queries(players_to_insert_queries)
mysql.execute_multiple_queries(player_games_to_insert_queries)

In [22]:
season_games = mysql.execute_query(f"SELECT g.id, g.date, g.season, g.is_playoff, g.winner, g.home_id, ht.name as home_name, g.home_pts, g.home_fgm, g.home_fga, g.home_fg_pct, g.home_fg3m, g.home_fg3a, g.home_fg3_pct, g.home_ftm, g.home_fta, g.home_ft_pct, g.home_oreb, g.home_dreb, g.home_reb, g.home_ast, g.home_stl, g.home_blk, g.home_tov, g.home_pf, g.away_id, at.name as away_name, g.away_pts, g.away_fgm, g.away_fga, g.away_fg_pct, g.away_fg3m, g.away_fg3a, g.away_fg3_pct, g.away_ftm, g.away_fta, g.away_ft_pct, g.away_oreb, g.away_dreb, g.away_reb, g.away_ast, g.away_stl, g.away_blk, g.away_tov, g.away_pf, g.home_odds, g.away_odds FROM games AS g LEFT JOIN teams as ht ON g.home_id = ht.id LEFT JOIN teams as at ON g.away_id = at.id WHERE g.season >= {season - 4} and g.season <= {season} ORDER BY g.date ASC")
season_games_plyrs = mysql.execute_query(f"SELECT g.id as game_id, g.date, g.season, g.is_playoff, g.winner, g.home_id, g.away_id, pg.team_id, p.name as player_name , pg.player_id, pg.minutes, pg.pts, pg.fgm, pg.fga, pg.fg_pct, pg.fg3m, pg.fg3a, pg.fg3_pct, pg.ftm, pg.fta, pg.ft_pct, pg.oreb, pg.dreb, pg.reb, pg.ast, pg.stl, pg.blk, pg.tov, pg.pf, pg.plus_minus FROM player_games AS pg LEFT JOIN games as g on pg.game_id = g.id LEFT JOIN players as p on pg.player_id = p.id WHERE g.season >= {season - 4} and g.season <= {season} ORDER BY g.date ASC")
season_games_plyrs['player_name'] = season_games_plyrs['player_name'].apply(lambda x: x.replace('Jr.', '').strip())



In [23]:
season_games['home_off_rtg'] = season_games.apply(lambda row: get_team_offensive_rating_game(row, 'H'), axis = 1)
season_games['home_def_rtg'] = season_games.apply(lambda row: get_team_defensive_rating_game(row, 'H'), axis = 1)

season_games['away_off_rtg'] = season_games.apply(lambda row: get_team_offensive_rating_game(row, 'A'), axis = 1)
season_games['away_def_rtg'] = season_games.apply(lambda row: get_team_defensive_rating_game(row, 'A'), axis = 1)

In [24]:
current_season_games = season_games[season_games["season"] == season].reset_index()

for index, row in current_season_games.iterrows():
    clear_output()
    print(f"Updating ELOs: {index+1}/{len(current_season_games)}")
    
    update_elo(row["winner"], elo_dict, row["home_id"], row["away_id"], row["home_pts"], row["away_pts"])

Updating ELOs: 769/769


In [25]:
scaler = load('../scalers/scaler.joblib')
model = load(f'../models/{model}.joblib')

In [26]:
def get_games_info():
    url = "https://www.rotowire.com/basketball/nba-lineups.php"
    result = requests.get(url)
    doc = soup(result.text, "html.parser")
    games = doc.find_all("div", {"class": "lineup__box"})
    
    games_info = []
    
    for g in games:
        try:
            game_dict = {
                'season': season,
                'date': dt.now()
            }
            
            teams = [x.text for x in g.find_all("div", {"class": "lineup__abbr"})]
            if len(teams) == 0:
                continue

            game_dict["away_abbv"] = teams[0]
            game_dict["home_abbv"] = teams[1]

            home = teams_df.loc[teams_df['abbreviation'] == game_dict["home_abbv"]].iloc[0]
            away = teams_df.loc[teams_df['abbreviation'] == game_dict["away_abbv"]].iloc[0]
            
            game_dict["away_id"] = away["id"]
            game_dict["home_id"] = home["id"]

#             away_lineup = g.find("ul", {"class": "lineup__list is-visit"})
#             home_lineup = g.find("ul", {"class": "lineup__list is-home"})

#             away_lineup = [x.text.split('\n')[-2] for x in away_lineup.find_all("li", {"class": "lineup__player"})][:5]
#             home_lineup = [x.text.split('\n')[-2] for x in home_lineup.find_all("li", {"class": "lineup__player"})][:5]

#             game_dict["home_per"] = get_realtime_team_per(season_games_plyrs, home_lineup, home)
#             game_dict["away_per"] = get_realtime_team_per(season_games_plyrs, away_lineup, away)
            game_dict["home_per"] = 0
            game_dict["away_per"] = 0

            odds_info = g.find_all("div", {"class": "lineup__odds-item"})

            bookie = 'composite'

            overdog = odds_info[0].find_all("span", {"class": bookie})[0].text.split()[0]
            overdog_odds = float(odds_info[0].find_all("span", {"class": bookie})[0].text.split()[1])
            overdog_odds = round((100/abs(overdog_odds))+1, 2)
            
            if overdog == home['abbreviation']:
                game_dict["home_odds"] = overdog_odds
                game_dict["home_probs"] = (1/game_dict["home_odds"])*100
                game_dict["away_probs"] = 103 - game_dict["home_probs"]
                game_dict["away_odds"] = 1/(game_dict["away_probs"]/100)
            else:
                game_dict["away_odds"] = overdog_odds
                game_dict["away_probs"] = (1/game_dict["away_odds"])*100
                game_dict["home_probs"] = 103 - game_dict["away_probs"]
                game_dict["home_odds"] = 1/(game_dict["home_probs"]/100)

            try:
                game_dict["spread_line"] = float(odds_info[1].find_all("span", {"class": bookie})[0].text.split()[1])
            except:
                game_dict["spread_line"] = None
            
            try:
                game_dict["totals_line"] = float(odds_info[2].find_all("span", {"class": bookie})[0].text.split()[0])
            except:
                game_dict["totals_line"] = None

            games_info.append(game_dict)
        except Exception as e:
            print(e)
            continue
        
    return games_info

In [27]:
games_info = get_games_info()

In [31]:
games_agg = []
games_metadata_agg = []

for g in games_info:
    
    stats_team_a = get_game_data(
        season_games,
        season_games_plyrs,
        elo_dict,
        g,
        g["home_id"],
        g["away_id"],
        {g["home_id"]: g["home_per"]},
        n_last_games,
        n_last_specific_games,
        "H",
        False
    )
    if not stats_team_a:
        continue

    stats_team_b = get_game_data(
        season_games,
        season_games_plyrs,
        elo_dict,
        g,
        g["away_id"],
        g["home_id"],
        {g["away_id"]: g["away_per"]},
        n_last_games,
        n_last_specific_games,
        "A",
        False
    )
    if not stats_team_b:
        continue
        
    match_info = stats_team_a + stats_team_b
    
    games_agg.append(match_info)
    games_metadata_agg.append(g)

In [32]:
games_df = pd.DataFrame(games_agg, columns=original_columns)
games_df = games_df[filtered_columns]

for index, row in games_df.iterrows():
    match_info_scaled = scaler.transform([row])

    prediction = model.predict(match_info_scaled)[0]
    away_probs, home_probs = model.predict_proba(match_info_scaled)[0]

    home_pred_odds = 1/home_probs
    away_pred_odds = 1/away_probs
    
    g = games_metadata_agg[index]

    selected_odds = g["home_odds"] if prediction == 'H' else g["away_odds"]
    selected_pred_odds = home_pred_odds if prediction == 'H' else away_pred_odds

    print(f'\nMatchup: {g["home_abbv"]} x {g["away_abbv"]}')
    print('Prediction', g["home_abbv"] if prediction == 'H' else g["away_abbv"])

    if selected_odds > min_odds and selected_odds > selected_pred_odds:
        print(colored(f"GOOD BET @ {round(selected_odds, 2)}", 'green'))
    elif selected_odds > selected_pred_odds and abs(selected_odds - min_odds) < 0.3:
        print(colored(f"BAD BET @ {round(selected_odds, 2)} (Good if odds > {min_odds})", 'red'))
    else:
        print(colored("BAD BET", 'red'))


Matchup: PHI x ATL
Prediction PHI
[32mGOOD BET @ 2.32[0m

Matchup: BOS x WAS
Prediction BOS
[31mBAD BET[0m

Matchup: TOR x HOU
Prediction HOU
[32mGOOD BET @ 2.11[0m

Matchup: MIL x CHA
Prediction MIL
[31mBAD BET[0m

Matchup: SAC x DEN
Prediction SAC
[32mGOOD BET @ 1.83[0m

Matchup: LAL x NOP
Prediction NOP
[31mBAD BET[0m
