In [111]:
import pandas as pd
import numpy as np
from functools import reduce
from nba_api.stats.endpoints import teamplayerdashboard, leaguestandings, teamplayerdashboard, leagueleaders, teamestimatedmetrics, teamgamelog, teamgamelogs, leaguegamelog
from nba_api.stats.static import teams 
import urllib.request
import json
from bs4 import BeautifulSoup
from IPython.display import clear_output
import requests
from statistics import mean
import pickle
from termcolor import colored

In [4]:
def get_season_year(season_id):
    return int(str(season_id)[1:])

def get_wl_pct (previous_games):
    if len(previous_games.index) > 0:
        wl = previous_games['WL'].value_counts(normalize=True)
        if 'W' in wl and 'L' in wl:
            win_pct = wl['W']
            loss_pct = wl['L']
        elif 'W' not in wl and 'L' in wl:
            win_pct = 0
            loss_pct = wl['L']
        elif 'W' in wl and 'L' not in wl:
            win_pct = wl['W']
            loss_pct = 0
        return win_pct, loss_pct
    return 0, 0

def get_season(season, season_type):
    season_i_teams = leaguegamelog.LeagueGameLog(season = str(i), season_type_all_star=season_type).get_data_frames()[0]
    season_i_players = leaguegamelog.LeagueGameLog(season = str(i), player_or_team_abbreviation = 'P', season_type_all_star=season_type).get_data_frames()[0]
    season_i_teams['IS_PLAYOFFS'] = True if season_type == 'Playoffs' else False
    season_i_players['IS_PLAYOFFS'] = True if season_type == 'Playoffs' else False
    return season_i_teams, season_i_players

In [5]:
def get_team_elo(team):
    team = team.lower().replace(" ", "")
    if team == '76ers':
        team = 'sixers'
    contents = urllib.request.urlopen("https://projects.fivethirtyeight.com/complete-history-of-the-nba/data/{}.json".format(team)).read()
    contents = json.loads(contents)
    return contents['value'][-1]['y']

In [21]:
pd.options.mode.chained_assignment = None  # default='warn'
teams_list = teams.get_teams()

seasons_teams = []
seasons_players = []
first_season = 2020
last_season = 2022

print("Getting NBA Seasons Information...")
for i in range(first_season,last_season):
    season_i_teams, season_i_players = get_season(str(i), 'Regular Season')
    season_i_teams_playoffs, season_i_players_playoffs = get_season(str(i), 'Playoffs')
    
    season_i_teams = reduce(lambda  left,right: pd.merge(left,right, how='outer'), [season_i_teams, season_i_teams_playoffs])
    season_i_players = reduce(lambda  left,right: pd.merge(left,right, how='outer'), [season_i_players, season_i_players_playoffs])
    
    seasons_teams.append(season_i_teams)
    seasons_players.append(season_i_players)
    print("{}/{}".format(i,last_season))
    clear_output(wait=True)


dfs = []

season_games = reduce(lambda  left,right: pd.merge(left,right, how='outer'), seasons_teams)
season_games_plyrs = reduce(lambda  left,right: pd.merge(left,right, how='outer'), seasons_players)
season_games.dropna(subset=['FG_PCT','FT_PCT','FG3_PCT'], inplace=True)

season_games_plyrs['GAME_ID'] = pd.to_numeric(season_games_plyrs['GAME_ID'])
season_games['GAME_ID'] = pd.to_numeric(season_games['GAME_ID'])

season_games['GAME_DATE'] = pd.to_datetime(season_games['GAME_DATE'])
season_games_plyrs['GAME_DATE'] = pd.to_datetime(season_games_plyrs['GAME_DATE'])

season_games = season_games.sort_values(['GAME_DATE', 'GAME_ID'], ascending=[True, True]).reset_index(drop=True)

print('size', len(season_games.index))

size 4790


In [22]:
season_games_plyrs.head()

Unnamed: 0,SEASON_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,FANTASY_PTS,VIDEO_AVAILABLE,IS_PLAYOFFS
0,22020,1629617,Reggie Perry,1610612751,BKN,Brooklyn Nets,22000001,2020-12-22,BKN vs. GSW,W,...,1,0,0,0,0,2,-8,5.9,1,False
1,22020,203925,Joe Harris,1610612751,BKN,Brooklyn Nets,22000001,2020-12-22,BKN vs. GSW,W,...,2,0,1,2,2,10,21,22.4,1,False
2,22020,201939,Stephen Curry,1610612744,GSW,Golden State Warriors,22000001,2020-12-22,GSW @ BKN,L,...,10,2,0,3,1,20,-23,42.8,1,False
3,22020,204020,Tyler Johnson,1610612751,BKN,Brooklyn Nets,22000001,2020-12-22,BKN vs. GSW,W,...,0,1,0,0,0,0,-12,4.2,1,False
4,22020,1628386,Jarrett Allen,1610612751,BKN,Brooklyn Nets,22000001,2020-12-22,BKN vs. GSW,W,...,2,1,2,2,0,8,12,26.4,1,False


In [6]:
url = "https://www.rotowire.com/basketball/nba-lineups.php"
result = requests.get(url)
doc = BeautifulSoup(result.text, "html.parser")
games = doc.find_all("div", {"class": "lineup__box"})

In [98]:
min_threshold = 1.75
max_threshold = 3

In [75]:
train_season = '2012-2021.pkl'
scaler_path = 'scalers/{}'.format(train_season)
with open(scaler_path, 'rb') as file:  
    sc = pickle.load(file)

In [78]:
model_path = 'models/VotingClassifier.pkl'
with open(model_path, 'rb') as file:  
    vc = pickle.load(file)[2]

In [95]:
def check_bet_worth(prediction, odds_home, odds_away, pred_odds_home, pred_odds_away):
    return (prediction == 1 and (pred_odds_home < odds_home) and (odds_home > min_threshold and odds_home < max_threshold)) or (prediction == 0 and (pred_odds_away < odds_away) and (odds_away > min_threshold and odds_away < max_threshold))

In [96]:
def get_pred_odds(prob_home):
    pred_odds_home = 1/prob_home
    pred_odds_away = 1/(1-prob_home)
    return pred_odds_home, pred_odds_away

In [66]:
def get_player_mean_per(last_games):
    per_values = []
    for index, game in last_games.iterrows():
        per_values.append((game['FGM'] * 85.910 + game['STL'] * 53.897 + game['FG3M'] * 51.757 + game['FTM'] * 46.845 + game['BLK'] * 39.190 + game['OREB'] * 39.190 + game['AST'] * 34.677 + game['DREB'] * 14.707
                          - game['PF'] * 17.174 - (game['FTA'] - game['FTM']) * 20.091 - (game['FGA'] - game['FGM'])* 39.190 - game['TOV'] * 53.897 ) * (1 / game['MIN']))
    if len(per_values) > 0:
        return mean(per_values)
    return 0

In [138]:
def get_team_stats(team_abbr, team_players):
    team = [x for x in teams_list if x['abbreviation'] == team_abbr][0]
    
    elo = get_team_elo(team['nickname'])
    
    team_id = team['id']
    team_abbv = team['abbreviation']

    previous_games = season_games.loc[(season_games['TEAM_ID'] == team_id) & (season_games['MIN'] > 0)]
    team_season_games = previous_games.loc[previous_games['SEASON_ID'].apply(get_season_year) == (last_season - 1)]

    # Season Win Percentage
    season_pct = get_wl_pct(team_season_games)[0]

    # Getting teams last 10 games
    previous_games = previous_games.iloc[-10:,:]
    pts_conceded = []
    for index, game in previous_games.iterrows():
        opp_game = season_games.loc[(season_games['GAME_ID'] == game['GAME_ID']) & (season_games['TEAM_ID'] != game['TEAM_ID'])].iloc[0]
        pts_conceded.append(opp_game['PTS'])
    if len(pts_conceded) > 0:
        pts_conceded = sum(pts_conceded) / len(pts_conceded)
        
    # Getting players PER
    per_values = []
    for player in team_players:
        try:
            player_object = season_games_plyrs.loc[(season_games_plyrs['TEAM_ABBREVIATION'] == team_abbv) & ((season_games_plyrs['PLAYER_NAME'].str.contains(player)) | (season_games_plyrs['PLAYER_NAME'] == player) | 
                                                        (season_games_plyrs['PLAYER_NAME'].str.startswith(player[0]) & season_games_plyrs['PLAYER_NAME'].str.endswith(player.split(' ')[1])))].iloc[-1]
            last_ten_games = season_games_plyrs.loc[season_games_plyrs['PLAYER_ID'] == player_object['PLAYER_ID']].iloc[-10:]
            per_values.append(get_player_mean_per(last_ten_games))
        except:
            print('Error when trying to get the games for {} of the {}'.format(player, team['nickname']))
            continue
    
    if len(per_values) > 0:
        per = mean(per_values)
    else:
        per = 0
        
    odds = float(input("Type the odds for the {} to win: ".format(team['nickname'])))
        
    return [previous_games['PTS'].mean(), pts_conceded, previous_games['FG_PCT'].mean(), previous_games['FG3_PCT'].mean(), previous_games['FT_PCT'].mean(),
           previous_games['REB'].mean(), previous_games['TOV'].mean(), season_pct, per, odds, elo], odds, team

In [139]:
for game in games:
    teams = [x.text for x in game.find_all("div", {"class": "lineup__abbr"})]
    if len(teams) == 0:
        continue
        
    away_abbr = teams[0]
    home_abbr = teams[1]
    
    away_lineup = game.find("ul", {"class": "lineup__list is-visit"})
    home_lineup = game.find("ul", {"class": "lineup__list is-home"})
    
    away_lineup = [x.text.split('\n')[-2] for x in away_lineup.find_all("li", {"class": "lineup__player"})][:5]
    home_lineup = [x.text.split('\n')[-2] for x in home_lineup.find_all("li", {"class": "lineup__player"})][:5]
    
    a, odds_away, away_team = get_team_stats(away_abbr, away_lineup)
    h, odds_home, home_team = get_team_stats(home_abbr, home_lineup)
    
    game_stats = sc.transform([h + a])
    
    prediction = vc.predict(game_stats)[0]
    probability = vc.predict_proba(game_stats)[0,1]
    pred_home, pred_away = get_pred_odds(probability)
    should_bet = check_bet_worth(prediction, odds_home, odds_away, pred_home, pred_away)
    if should_bet:
        print(colored('GOOD BET', 'green'))
    else:
        print(colored('BAD BET', 'red'))
    print('{} should beat the {}\n'.format(home_team['nickname'] if prediction == 1 else away_team['nickname'], away_team['nickname'] if prediction == 1 else home_team['nickname']))
    

Type the odds for the Cavaliers to win: 4.5
Type the odds for the Nets to win: 1.22
[31mBAD BET[0m
Nets should beat the Cavaliers

Type the odds for the Clippers to win: 2.3
Type the odds for the Timberwolves to win: 1.6
[31mBAD BET[0m
Timberwolves should beat the Clippers

