<a href="https://colab.research.google.com/github/TrevBot17/NBA_Predict/blob/main/Predict_NBA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [None]:
from datetime import datetime, timedelta
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import time

In [None]:
team_dict = {'Atlanta Hawks' : 'ATL',
             'Boston Celtics' : 'BOS',
             'Brooklyn Nets' : 'BRK',
             'Charlotte Hornets' : 'CHO',
             'Chicago Bulls' : 'CHI',
             'Cleveland Cavaliers' : 'CLE',
             'Dallas Mavericks' : 'DAL',
             'Denver Nuggets' : 'DEN',
             'Detroit Pistons' : 'DET',
             'Golden State Warriors' : 'GSW',
             'Houston Rockets' : 'HOU',
             'Indiana Pacers' : 'IND',
             'Los Angeles Clippers' : 'LAC',
             'Los Angeles Lakers' : 'LAL',
             'Memphis Grizzlies' : 'MEM',
             'Miami Heat' : 'MIA',
             'Milwaukee Bucks' : 'MIL',
             'Minnesota Timberwolves' : 'MIN',
             'New Orleans Pelicans' : 'NOP',
             'New York Knicks' : 'NYK',
             'Oklahoma City Thunder' : 'OKC',
             'Orlando Magic' : 'ORL',
             'Philadelphia 76ers' : 'PHI',
             'Phoenix Suns' : 'PHO',
             'Portland Trail Blazers' : 'POR',
             'Sacramento Kings' : 'SAC',
             'San Antonio Spurs' : 'SAS',
             'Toronto Raptors' : 'TOR',
             'Utah Jazz' : 'UTA',
             'Washington Wizards' : 'WAS'}

## Defensive Ranks

In [None]:
def defensive_rank(season):
  # Opponent Defensive Rank
  def_url = 'https://www.basketball-reference.com/leagues/NBA_{}_ratings.html'.format(str(season))
  def_data = pd.read_html(def_url, skiprows = 1, header = 0)[0]
  return def_data[['Team', 'DRtg/A']]

## Get Data And Predict

In [None]:
# Make it all a function
def predict_points(season, team, def_ranks):
  # Season: int (ex: 2022)
  # Team: str (ex: 'BOS')
  # Def_Ranks: dataframe created above. limit calls to bball-ref

  url = 'https://www.basketball-reference.com/teams/{}/{}_games.html'.format(team, str(season))
  data = pd.read_html(url)[0]
  data = data.rename(columns = {'Unnamed: 5' : 'Home',
                              'Unnamed: 7' : 'Result',
                              'Unnamed: 8' : 'Overtime',
                              'Tm' : 'Points_For',
                              'Opp' : 'Points_Against'})

  # Filter out extra rows in table
  data = data.loc[data['G'] != 'G']

  # Convert Date to datetime objects
  data['Date'] = pd.to_datetime(data['Date'])

  # Filter out games that haven't been played yet
  today = (datetime.today() - timedelta(hours = 8)).strftime('%Y-%m-%d')
  data = data[data['Date'] <= today]

  # Convert Points_For to int
  data['Points_For'] = data['Points_For'].astype('int', errors = 'ignore')

  # Clean Home/Away column
  data['Home'] = data['Home'].replace('@', 0)
  data['Home'] = data['Home'].fillna(1)
  data['Home'] = data['Home'].astype('int')

  # Add Days Rest column
  data['Days_Rest'] = data.Date.diff() - timedelta(days = 1)
  data['Days_Rest'] = data['Days_Rest'].fillna(data['Days_Rest'].median())
  data['Days_Rest'] = pd.to_numeric(data.Days_Rest.dt.days, downcast = 'integer')

  # Join data and def_data to add DRtg/A (adjusted defensive rating)
  data = data.merge(def_ranks, left_on = 'Opponent', right_on = 'Team').sort_values(by = ['Date'])

  # Set index to Date
  data = data.set_index('Date')

  # Avg points per game for the season
  data['Avg_Pts'] = data['Points_For'].rolling(window = 82, min_periods = 1).mean().shift(periods = 1)
  # data['Avg_Pts'].fillna(data['Avg_Pts'].mean()) not sure if a good idea to impute

  # Avg points per game last three games
  data['Avg_Pts_Last_3'] = data['Points_For'].rolling(window = 3, min_periods = 1).mean().shift(periods = 1)

  # Only numerical columns for regression
  # data variable has everything, plus NA values. df only has numerical values for regression
  df = data[['Points_For', 'Avg_Pts', 'Avg_Pts_Last_3', 'Home', 'Days_Rest', 'DRtg/A']].dropna()

  # Train Test Split
  X = df[['Avg_Pts', 'Avg_Pts_Last_3', 'Home', 'Days_Rest', 'DRtg/A']]
  y = df['Points_For']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 17)

  # RF consistently performs best. Not sure of real life applications of model choice
  rf = RandomForestRegressor(n_estimators = 500, max_depth = 2, random_state = 17).fit(X_train, y_train)

  # Calculate the five inputs and pass to the predict function
  avg_pts = data[data.index == today]['Avg_Pts'][0]
  avg_pts_3 = data[data.index == today]['Avg_Pts_Last_3'][0]
  home = data[data.index == today]['Home'][0]
  days_rest = data[data.index == today]['Days_Rest'][0]
  def_rating = data[data.index == today]['DRtg/A'][0]

  inputs = pd.DataFrame([[avg_pts, avg_pts_3, home, days_rest, def_rating]], columns = list(X_train.columns))
  return rf.predict(inputs)

## Today's Games

In [None]:
# Make it all a function
def todays_games():
  currentMonth = (datetime.now() - timedelta(hours = 8)).strftime('%B')
  schedule = 'https://www.basketball-reference.com/leagues/NBA_2023_games-{}.html'.format(currentMonth.lower())

  sch = pd.read_html(schedule)[0]
  sch = sch.rename(columns = {'Visitor/Neutral' : 'Away',
                              'Home/Neutral' : 'Home'})

  # Convert Date to datetime objects
  sch['Date'] = pd.to_datetime(sch['Date'])

  # Filter df to today's date
  today = (datetime.today() - timedelta(hours = 8)).strftime('%Y-%m-%d')
  sch = sch[sch['Date'] == today]

  return sch[['Date', 'Away', 'Home']]

sch = todays_games()
games = list(zip(sch['Away'], sch['Home']))

In [None]:
print((datetime.today() - timedelta(hours = 8)).strftime('%Y-%m-%d'))
def_ranks = defensive_rank(2023)
i = 1
for a, h in games:
  away = team_dict[a]
  home = team_dict[h]
  time.sleep(2)
  away_pts = np.round(predict_points(2023, away, def_ranks), 2)
  time.sleep(2)
  home_pts = np.round(predict_points(2023, home, def_ranks), 2)
  print('Game {}'.format(i))
  print('{}: {}'.format(away, away_pts))
  print('{}: {}'.format(home, home_pts))
  print()
  i += 1

2023-02-02
Game 1
LAL: [118.69]
IND: [121.6]

Game 2
MEM: [107.15]
CLE: [112.09]

Game 3
MIA: [110.95]
NYK: [109.65]

Game 4
CHO: [111.67]
CHI: [117.65]

Game 5
NOP: [114.6]
DAL: [105.63]

Game 6
GSW: [113.83]
DEN: [116.67]

Game 7
LAC: [107.51]
MIL: [116.55]

