# Generate picks for today's games
This code will likely be used in the web application

In [21]:
import pickle
import pandas as pd
import requests
from datetime import datetime
from zoneinfo import ZoneInfo

In [22]:
nba_games_model = pickle.load(open('data/2024_random_forest_model.pkl', 'rb'))
nba_games = pd.read_csv('data/04_improved_model_with_moneylines.csv', dtype={'GAME_ID': str}, parse_dates=['GAME_DATE_EST'], date_format='%Y-%m-%d')
nba_games = nba_games.sort_values(by='GAME_DATE_EST')

In [23]:
nba_games.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6133 entries, 0 to 6132
Data columns (total 27 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   GAME_DATE_EST         6133 non-null   datetime64[ns]
 1   GAME_ID               6133 non-null   object        
 2   HOME_TEAM_ID          6133 non-null   float64       
 3   AWAY_TEAM_ID          6133 non-null   float64       
 4   SEASON                6133 non-null   float64       
 5   HOME_TEAM_POINTS      6133 non-null   float64       
 6   AWAY_TEAM_POINTS      6133 non-null   float64       
 7   HOME_WIN_PCT          6133 non-null   float64       
 8   HOME_HOME_WIN_PCT     6133 non-null   float64       
 9   AWAY_WIN_PCT          6133 non-null   float64       
 10  AWAY_AWAY_WIN_PCT     6133 non-null   float64       
 11  HOME_TEAM_WINS        6133 non-null   bool          
 12  HOME_TEAM_B2B         6133 non-null   bool          
 13  AWAY_TEAM_B2B         6

In [24]:

HEADERS = {
  "Referer": "stats.nba.com",
  "Content-Type": "application/json",
  "Accept": "*/*",
  "Accept-Encoding": "gzip, deflate, br",
  "Connection": "keep-alive",
  "Host": "stats.nba.com",
  "Origin": "https://stats.nba.com",
  "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
}
eastern = ZoneInfo('US/Eastern')
current_date = datetime.now(eastern).strftime('%m/%d/%Y')
url = f"https://stats.nba.com/stats/scoreboardV2?DayOffset=0&LeagueID=00&gameDate={current_date}"

res = requests.get(url, headers=HEADERS).json()

In [25]:
def calc_last_n_win_pct(team_id, n):
    _game = nba_games[(nba_games['HOME_TEAM_ID'] == team_id) | (nba_games['AWAY_TEAM_ID'] == team_id)]
    _game['IS_HOME'] = _game['HOME_TEAM_ID'] == team_id
    _game['WIN_PRCT'] = _game['IS_HOME'] == _game['HOME_TEAM_WINS']
    return _game["WIN_PRCT"].tail(n).mean()


In [26]:
nba_games_model.feature_names_in_

array(['GAME_DATETIME', 'HOME_TEAM_ID', 'AWAY_TEAM_ID', 'HOME_WIN_PCT',
       'HOME_HOME_WIN_PCT', 'AWAY_WIN_PCT', 'AWAY_AWAY_WIN_PCT',
       'HOME_TEAM_B2B', 'AWAY_TEAM_B2B', 'HOME_LAST_10_WIN_PCT',
       'AWAY_LAST_10_WIN_PCT'], dtype=object)

In [27]:
results = res['resultSets']
game_headers = results[0]
eastern_standings = results[4]
western_standings = results[5]

games = []
for game in game_headers.get('rowSet'):
  game_id = game[2]
  home_team_id = game[6]
  away_team_id = game[7]
  season = game[8]

  if home_team_id in [r[0] for r in eastern_standings.get("rowSet")]:
      home_team_rank = next((team for team in eastern_standings.get("rowSet") if team[0] == home_team_id))
  else:
      home_team_rank = next((team for team in western_standings.get("rowSet") if team[0] == home_team_id))

  if away_team_id in [r[0] for r in eastern_standings.get("rowSet")]:
      away_team_rank = next((team for team in eastern_standings.get("rowSet") if team[0] == away_team_id))
  else:
      away_team_rank = next((team for team in western_standings.get("rowSet") if team[0] == away_team_id))

  home_win_pct = home_team_rank[9]
  home_home_wins, home_home_losses = home_team_rank[10].split("-")
  home_home_win_pct = int(home_home_wins) / (int(home_home_wins) + int(home_home_losses)) if int(home_home_wins) + int(home_home_losses) > 0 else 0

  away_win_pct = away_team_rank[9]
  away_away_wins, away_away_losses = away_team_rank[11].split("-")
  away_away_win_pct = int(away_away_wins) / (int(away_away_wins) + int(away_away_losses)) if int(away_away_wins) + int(away_away_losses) > 0 else 0

  game_date = datetime.strptime(game[0], "%Y-%m-%dT%H:%M:%S")
  yesterday = game_date - pd.Timedelta(days=1)

  home_yesterday_game_count = len(nba_games[(nba_games['GAME_DATE_EST'] == yesterday) & ((nba_games['HOME_TEAM_ID'] == home_team_id) | (nba_games["AWAY_TEAM_ID"] == home_team_id))])
  home_b2b = home_yesterday_game_count > 0

  away_yesterday_game_count = len(nba_games[(nba_games['GAME_DATE_EST'] == yesterday) & ((nba_games['HOME_TEAM_ID'] == away_team_id) | (nba_games["AWAY_TEAM_ID"] == away_team_id))])
  away_b2b = away_yesterday_game_count > 0

  home_last_10_win_pct = calc_last_n_win_pct(home_team_id, 10)
  away_last_10_win_pct = calc_last_n_win_pct(away_team_id, 10)


  games.append({
      "GAME_DATETIME": int(game_date.timestamp()) * 10**9,
      "HOME_TEAM_ID": home_team_id,
      "AWAY_TEAM_ID": away_team_id,
      "HOME_WIN_PCT": home_win_pct,
      "HOME_HOME_WIN_PCT": home_home_win_pct,
      "AWAY_WIN_PCT": away_win_pct,
      "AWAY_AWAY_WIN_PCT": away_away_win_pct,
      "HOME_TEAM_B2B": home_b2b,
      "AWAY_TEAM_B2B": away_b2b,
      "HOME_LAST_10_WIN_PCT": home_last_10_win_pct,
      "AWAY_LAST_10_WIN_PCT": away_last_10_win_pct
  })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _game['IS_HOME'] = _game['HOME_TEAM_ID'] == team_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _game['WIN_PRCT'] = _game['IS_HOME'] == _game['HOME_TEAM_WINS']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _game['IS_HOME'] = _game['HOME_TEAM_ID'] == team_id
A value is trying to be set on a copy

In [28]:
games

[{'GAME_DATETIME': 1733205600000000000,
  'HOME_TEAM_ID': 1610612766,
  'AWAY_TEAM_ID': 1610612755,
  'HOME_WIN_PCT': 0.286,
  'HOME_HOME_WIN_PCT': 0.38461538461538464,
  'AWAY_WIN_PCT': 0.263,
  'AWAY_AWAY_WIN_PCT': 0.3,
  'HOME_TEAM_B2B': False,
  'AWAY_TEAM_B2B': False,
  'HOME_LAST_10_WIN_PCT': np.float64(0.2),
  'AWAY_LAST_10_WIN_PCT': np.float64(0.3)},
 {'GAME_DATETIME': 1733205600000000000,
  'HOME_TEAM_ID': 1610612739,
  'AWAY_TEAM_ID': 1610612764,
  'HOME_WIN_PCT': 0.864,
  'HOME_HOME_WIN_PCT': 0.9230769230769231,
  'AWAY_WIN_PCT': 0.105,
  'AWAY_AWAY_WIN_PCT': 0.1,
  'HOME_TEAM_B2B': False,
  'AWAY_TEAM_B2B': False,
  'HOME_LAST_10_WIN_PCT': np.float64(0.7),
  'AWAY_LAST_10_WIN_PCT': np.float64(0.0)},
 {'GAME_DATETIME': 1733205600000000000,
  'HOME_TEAM_ID': 1610612765,
  'AWAY_TEAM_ID': 1610612749,
  'HOME_WIN_PCT': 0.391,
  'HOME_HOME_WIN_PCT': 0.36363636363636365,
  'AWAY_WIN_PCT': 0.55,
  'AWAY_AWAY_WIN_PCT': 0.3333333333333333,
  'HOME_TEAM_B2B': False,
  'AWAY_TEAM_B2B'

In [29]:
games_df = pd.DataFrame(games)
games_df["PREDICTION"] = nba_games_model.predict(games_df)

In [30]:
teams = pd.read_csv('data/raw/nba_teams.csv')

In [31]:
games_df = games_df.merge(teams[["TEAM_ID", "ABBREVIATION", "NICKNAME"]], left_on='HOME_TEAM_ID', right_on='TEAM_ID')
games_df = games_df.merge(teams[["TEAM_ID", "ABBREVIATION", "NICKNAME"]], left_on='AWAY_TEAM_ID', right_on='TEAM_ID', suffixes=('_HOME', '_AWAY'))
games_df.drop(columns=["TEAM_ID_HOME", "TEAM_ID_AWAY"], inplace=True)
games_df

Unnamed: 0,GAME_DATETIME,HOME_TEAM_ID,AWAY_TEAM_ID,HOME_WIN_PCT,HOME_HOME_WIN_PCT,AWAY_WIN_PCT,AWAY_AWAY_WIN_PCT,HOME_TEAM_B2B,AWAY_TEAM_B2B,HOME_LAST_10_WIN_PCT,AWAY_LAST_10_WIN_PCT,PREDICTION,ABBREVIATION_HOME,NICKNAME_HOME,ABBREVIATION_AWAY,NICKNAME_AWAY
0,1733205600000000000,1610612766,1610612755,0.286,0.384615,0.263,0.3,False,False,0.2,0.3,False,CHA,Hornets,PHI,76ers
1,1733205600000000000,1610612739,1610612764,0.864,0.923077,0.105,0.1,False,False,0.7,0.0,True,CLE,Cavaliers,WAS,Wizards
2,1733205600000000000,1610612765,1610612749,0.391,0.363636,0.55,0.333333,False,False,0.4,0.8,False,DET,Pistons,MIL,Bucks
3,1733205600000000000,1610612752,1610612753,0.619,0.777778,0.652,0.428571,False,False,0.7,0.9,True,NYK,Knicks,ORL,Magic
4,1733205600000000000,1610612761,1610612754,0.318,0.6,0.409,0.166667,False,False,0.4,0.4,True,TOR,Raptors,IND,Pacers
5,1733205600000000000,1610612760,1610612762,0.75,0.8,0.2,0.222222,False,False,0.7,0.2,True,OKC,Thunder,UTA,Jazz
6,1733205600000000000,1610612742,1610612763,0.619,0.7,0.667,0.5,False,False,0.8,0.7,True,DAL,Mavericks,MEM,Grizzlies
7,1733205600000000000,1610612756,1610612759,0.579,0.636364,0.55,0.375,False,False,0.3,0.7,True,PHX,Suns,SAS,Spurs
8,1733205600000000000,1610612743,1610612744,0.556,0.555556,0.632,0.636364,False,False,0.5,0.5,False,DEN,Nuggets,GSW,Warriors
9,1733205600000000000,1610612758,1610612745,0.429,0.363636,0.714,0.666667,False,False,0.3,0.8,False,SAC,Kings,HOU,Rockets


In [32]:
def prettify_winner(row):
    if row["PREDICTION"] == 1:
        return f"{row['ABBREVIATION_HOME']} {row['NICKNAME_HOME']}"
    else:
        return f"{row['ABBREVIATION_AWAY']} {row['NICKNAME_AWAY']}"

games_df["WINNER"] = games_df.apply(prettify_winner, axis=1)

In [33]:
games_df["GAME_DATE_EST"] = pd.to_datetime(games_df["GAME_DATETIME"], unit='ns').dt.strftime('%Y-%m-%d')

In [34]:
games_df[["GAME_DATE_EST", "ABBREVIATION_HOME", "NICKNAME_HOME", "ABBREVIATION_AWAY", "NICKNAME_AWAY", "WINNER"]].to_json(orient='records')

'[{"GAME_DATE_EST":"2024-12-03","ABBREVIATION_HOME":"CHA","NICKNAME_HOME":"Hornets","ABBREVIATION_AWAY":"PHI","NICKNAME_AWAY":"76ers","WINNER":"PHI 76ers"},{"GAME_DATE_EST":"2024-12-03","ABBREVIATION_HOME":"CLE","NICKNAME_HOME":"Cavaliers","ABBREVIATION_AWAY":"WAS","NICKNAME_AWAY":"Wizards","WINNER":"CLE Cavaliers"},{"GAME_DATE_EST":"2024-12-03","ABBREVIATION_HOME":"DET","NICKNAME_HOME":"Pistons","ABBREVIATION_AWAY":"MIL","NICKNAME_AWAY":"Bucks","WINNER":"MIL Bucks"},{"GAME_DATE_EST":"2024-12-03","ABBREVIATION_HOME":"NYK","NICKNAME_HOME":"Knicks","ABBREVIATION_AWAY":"ORL","NICKNAME_AWAY":"Magic","WINNER":"NYK Knicks"},{"GAME_DATE_EST":"2024-12-03","ABBREVIATION_HOME":"TOR","NICKNAME_HOME":"Raptors","ABBREVIATION_AWAY":"IND","NICKNAME_AWAY":"Pacers","WINNER":"TOR Raptors"},{"GAME_DATE_EST":"2024-12-03","ABBREVIATION_HOME":"OKC","NICKNAME_HOME":"Thunder","ABBREVIATION_AWAY":"UTA","NICKNAME_AWAY":"Jazz","WINNER":"OKC Thunder"},{"GAME_DATE_EST":"2024-12-03","ABBREVIATION_HOME":"DAL","NICKN