In [4]:
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains
from sklearn.linear_model import LinearRegression
import pickle
import unidecode
from bs4 import BeautifulSoup
import requests
import time
import matplotlib.pyplot as plt

current_year = 2022

In [2]:
# Retreiving active rosters

driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://www.lineups.com/nba/depth-charts')
driver.refresh()
time.sleep(5)
html = driver.page_source
tables = pd.read_html(html)
teams = ['Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets', 'Charlotte Hornets',
        'Chicago Bulls', 'Cleveland Cavaliers', 'Dallas Mavericks', 'Denver Nuggets',
        'Detroit Pistons', 'Golden State Warriors', 'Houston Rockets', 'Indiana Pacers',
        'Los Angeles Clippers', 'Los Angeles Lakers', 'Memphis Grizzlies', 'Miami Heat',
        'Milwaukee Bucks', 'Minnesota Timberwolves', 'New Orleans Pelicans',
        'New York Knicks', 'Oklahoma City Thunder', 'Orlando Magic', 'Philadelphia 76ers',
        'Phoenix Suns', 'Portland Trailblazers', 'Sacramento Kings', 'San Antonio Spurs',
        'Toronto Raptors', 'Utah Jazz', 'Washington Wizards']
def name_adjustment(x):
    try:
        names = x.split(' ')
        if len(names) == 4:
            name = names[0] + ' ' + names[1]
        if len(names) == 6:
            name = names[0] + ' ' + names[1] + ' ' + names[2]
        if len(names) == 5:
            name = names[0] + ' ' + names[1]
        if len(names) == 7:
            name = names[0] + ' ' + names[1] + ' ' + names[2]
    except:
        name = x
    return name

# Getting active rosters into dictionary of lists for each team
team_dict = {}
for team, table in zip(teams,tables):
    table.columns = table.columns.droplevel(0)
    for i in [1,2,3]:
        table[str(i)] = table[str(i)].apply(name_adjustment)
    table = table[['1', '2', '3']]
    players = list()
    for i in range(table.shape[0]): 
        for j in range(table.shape[1]):
            player = table.iloc[i, j]
            players = players + [player]
    team_dict[team] = players
active_rosters = team_dict



Current google-chrome version is 97.0.4692
Get LATEST driver version for 97.0.4692
Get LATEST driver version for 97.0.4692
Trying to download new driver from https://chromedriver.storage.googleapis.com/97.0.4692.71/chromedriver_mac64.zip
Driver has been saved in cache [/Users/hsinger24/.wdm/drivers/chromedriver/mac64/97.0.4692.71]


In [5]:
# Retreiving games played

games_played_map = {
    'Milwaukee' : 'Milwaukee Bucks',
    'Boston' : 'Boston Celtics',
    'Memphis' : 'Memphis Grizzlies',
    'LA Clippers' : 'Los Angeles Clippers',
    'Sacramento' : 'Sacramento Kings',
    'Indiana' : 'Indiana Pacers',
    'Golden State' : 'Golden State Warriors',
    'LA Lakers' : 'Los Angeles Lakers',
    'San Antonio' : 'San Antonio Spurs',
    'Utah' : 'Utah Jazz',
    'Houston' : 'Houston Rockets',
    'Dallas' : 'Dallas Mavericks',
    'Charlotte' : 'Charlotte Hornets',
    'Cleveland' : 'Cleveland Cavaliers',
    'Orlando' : 'Orlando Magic',
    'Washington' : 'Washington Wizards',
    'New York' : 'New York Knicks',
    'Brooklyn' : 'Brooklyn Nets',
    'Philadelphia' : 'Philadelphia 76ers',
    'Minnesota' : 'Minnesota Timberwolves',
    'Miami' : 'Miami Heat',
    'New Orleans' : 'New Orleans Pelicans',
    'Detroit' : 'Detroit Pistons',
    'Okla City' : 'Oklahoma City Thunder',
    'Portland' : 'Portland Trailblazers',
    'Chicago' : 'Chicago Bulls',
    'Denver' : 'Denver Nuggets',
    'Phoenix' : 'Phoenix Suns',
    'Atlanta' : 'Atlanta Hawks',
    'Toronto' : 'Toronto Raptors'
}

# Reading in table and adjusting columns
tables = pd.read_html('https://www.teamrankings.com/nba/stat/games-played')
games_played_table = tables[0]
games_played_table = games_played_table[['Team', str(current_year - 1)]]
games_played_table.columns = ['Team', 'Games_Played']
games_played_table['Team'] = games_played_table.Team.apply(lambda x: games_played_map[x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [6]:
# Getting current day win %

# Getting overall fraction of season from average of games played table
frac_season = np.mean(games_played_table.Games_Played)/82.0

# Retreiving BOY projected VORPS
boy_vorps = pd.read_csv('In_Season/Data/opening_day_vorps_player.csv', index_col = 0)
# Changing Vorp_Projection column to be a float
boy_vorps['VORP_projection'] = boy_vorps.VORP_projection.apply(lambda x: float(x.strip('[]')) if type(x)==str else x)

# Retrieving current year VORPs
tables = pd.read_html(f'https://www.basketball-reference.com/leagues/NBA_{str(current_year)}_advanced.html')
table = tables[0]
table = table[['Player', 'Tm', 'G', 'VORP']]
table.columns = ['Player', 'Team', 'Games', 'VORP']
table = table[table.Team != 'Tm']
table = table[table.Team != 'TOT']
table['VORP'] = table.VORP.apply(pd.to_numeric)
player_vorp = table.groupby(['Player', 'Games'])['VORP'].sum()
player_vorp = pd.DataFrame(player_vorp)
player_vorp.reset_index(drop = False, inplace = True)
player_vorp.columns = ['Player', 'Games', 'VORP']

# Adjusting naming conventions of current year VORP table to be consistent w/ BOY vorps
def name_exceptions(x):
    if x == 'cam thomas':
        return 'cameron thomas'
    if x == 'herbert jones':
        return 'herb jones'
    if x == 'charlie brown':
        return 'charles brown'
    if x == 'ish wainright':
        return 'ishmail wainright'
    if x == 'enes freedom':
        return 'enes kanter'
    return x

table['Player'] = table.Player.str.lower()
table['Player'] = table.Player.apply(unidecode.unidecode)
table['Player'] = table.Player.str.replace("'", '')
table['Player'] = table.Player.str.replace(".", '')
table['Player'] = table.Player.apply(lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])
table['Player'] = table.Player.str.strip('*,')
table['Player'] = table.Player.apply(name_exceptions)

# Merging BOY VORPS w/ current VORPs
vorp_df = pd.merge(boy_vorps, table, on = 'Player', how = 'inner')
vorp_df['Games'] = vorp_df.Games.apply(float)
vorp_df.drop_duplicates(subset = ['Player'], inplace = True)

# Getting each player's current year annualized VORP, adjusting for games played
vorp_df['VORP_82'] = 0
if frac_season < 0.25:
    vorp_df['VORP_82'] = vorp_df.VORP_projection
if frac_season > 0.25:
    for index, row in vorp_df.iterrows():
        if ((frac_season <= 0.5) & (row.Games < 12)):
            vorp_df.loc[index, 'VORP_82'] = row.VORP_projection
        elif ((frac_season > 0.5) & (frac_season <= 0.75) & (row.Games < 25)):
            vorp_df.loc[index, 'VORP_82'] = row.VORP_projection
        elif ((frac_season > 0.75) & (row.Games < 40)):
            vorp_df.loc[index, 'VORP_82'] = row.VORP_projection
        else:
            games_frac = float(row.Games)/82.0
            vorp_df.loc[index, 'VORP_82'] = (row.VORP_projection * (1.0 - games_frac)) + (row.VORP * games_frac)

# Iterating through active rosters to get active roster annnualized VORP
team_vorp_df = pd.DataFrame(columns = ['Team', 'VORP_Today'])
missed_players = list()
for team, roster in active_rosters.items():
    team_vorp = 0
    for player in roster:
        if str(player) == 'nan':
            continue

        # Aligning naming conventions with vorp_df
        player = player.lower()
        player = unidecode.unidecode(player)
        player = player.replace("'", '')
        player = player.replace(".", '')
        player = player.split(' ')[0] + ' ' + player.split(' ')[1]
        player.strip('*,')
        player = name_exceptions(player)

        # Adding player's VORP to team VORP total
        player_vorp = vorp_df[vorp_df.Player == player]
        if len(player_vorp == 1):
            vorp = sum(player_vorp.VORP_82)
            team_vorp += vorp
        else:
            missed_players.append(player)

    # Adding team's VORP to overall VORP df
    series = pd.Series([team, team_vorp], index = team_vorp_df.columns)
    team_vorp_df = team_vorp_df.append(series, ignore_index = True)

In [53]:
boy_win_pct_df = pd.read_csv('In_Season/Data/BOY_projected_win_pct.csv', index_col = 0)

# Possesions table team map for merging
team_map = {
    'LA Lakers' : 'Los Angeles Lakers',
    'Charlotte' : 'Charlotte Hornets',
    'Sacramento' : 'Sacramento Kings',
    'San Antonio' : 'San Antonio Spurs',
    'Houston' : 'Houston Rockets',
    'Minnesota' : 'Minnesota Timberwolves',
    'Phoenix' : 'Phoenix Suns',
    'Brooklyn' : 'Brooklyn Nets',
    'Detroit' : 'Detroit Pistons',
    'Memphis' : 'Memphis Grizzlies',
    'Milwaukee' : 'Milwaukee Bucks',
    'Boston' : 'Boston Celtics',
    'Utah' : 'Utah Jazz',
    'Golden State' : 'Golden State Warriors',
    'LA Clippers' : 'Los Angeles Clippers',
    'Orlando' : 'Orlando Magic',
    'Chicago' : 'Chicago Bulls',
    'Portland' : 'Portland Trail Blazers',
    'Okla City': 'Oklahoma City Thunder',
    'New Orleans' : 'New Orleans Pelicans',
    'Indiana' : 'Indiana Pacers',
    'Washington' : 'Washington Wizards',
    'Cleveland' : 'Cleveland Cavaliers',
    'Atlanta' : 'Atlanta Hawks',
    'Toronto' : 'Toronto Raptors',
    'Denver' : 'Denver Nuggets',
    'New York' : 'New York Knicks',
    'Philadelphia' : 'Philadelphia 76ers',
    'Miami' : 'Miami Heat',
    'Dallas' : 'Dallas Mavericks'
}

# Getting net adjusted rating table
rating_tables = pd.read_html('https://www.basketball-reference.com/leagues/NBA_2022_ratings.html')
rating_table = rating_tables[0]
rating_table.columns = rating_table.columns.droplevel(0)
rating_table['Games'] = rating_table.W + rating_table.L

# Getting possesions table
possesions_table = pd.read_html('https://www.teamrankings.com/nba/stat/possessions-per-game')
possesions_table = possesions_table[0]
possesions_table['Team'] = possesions_table.Team.apply(lambda x: team_map[x])
possesions_table = possesions_table[['Team', '2021']]
possesions_table.columns = ['Team', 'Possesions/Game']

# Merging tables
merged = pd.merge(rating_table, possesions_table, on = 'Team', how = 'inner')
merged['Possesions'] = merged.Games*merged['Possesions/Game']
merged['Adj_Point_Differential'] = merged.Possesions/100 * merged.NRtg
merged['Adj_Point_Differential_82'] = merged.Adj_Point_Differential*(82/merged.Games)

point_diff_df = merged
point_diff_df = point_diff_df[['Team', 'Adj_Point_Differential_82', 'Games']]
for index, row in point_diff_df.iterrows():
    if row.Team == 'Portland Trail Blazers':
        point_diff_df.loc[index, 'Team'] = 'Portland Trailblazers'

boy_win_pct_df = boy_win_pct_df[['Team', 'Projected_Point_Differential', 'VORP_Projection']]
frac_season = 0.6

# Retreiving VORP/point model
file_name = 'Model_Build/Data/vorp_regression.pickle'
with open(file_name, 'rb') as f:
    model = pickle.load(f)

# Retreiving point/win_pct model
file_name = 'Model_Build/Data/win_pct_regression.pickle'
with open(file_name, 'rb') as f:
    win_pct_model = pickle.load(f)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [54]:
# Merging tables

merged_1 = pd.merge(point_diff_df, team_vorp_df, on = 'Team')
merged_2 = pd.merge(merged_1, boy_win_pct_df, on = 'Team')

# Calculating team's point differential considering inputs
merged_2['Point_Differential'] = 0
if frac_season < 0.25:
    merged_2['Point_Differential'] = merged_2.Projected_Point_Differential
else:
    merged_2['Point_Differential'] = merged_2.Projected_Point_Differential * (1 - merged_2.Games/82.0)\
        + merged_2.Adj_Point_Differential_82 * merged_2.Games/82.0
    
# Making adjustment for VORP
merged_2['VORP_Adjustment'] = 0
for index, row in merged_2.iterrows():
    vorp_difference = row.VORP_Today - row.VORP_Projection
    merged_2.loc[index, 'VORP_Adjustment'] = model.coef_ * vorp_difference

merged_2['Point_Differential_Final'] = merged_2.Point_Differential + merged_2.VORP_Adjustment

# Adding projected win % column
merged_2['Projected_Win_Pct'] = 0
for index, row in merged_2.iterrows():
    x = merged_2[merged_2.Team == row.Team]
    x = x[['Point_Differential_Final']]
    merged_2.loc[index, 'Projected_Win_Pct'] = win_pct_model.predict(x)

In [55]:
merged_2

Unnamed: 0,Team,Adj_Point_Differential_82,Games,VORP_Today,Projected_Point_Differential,VORP_Projection,Point_Differential,VORP_Adjustment,Point_Differential_Final,Projected_Win_Pct
0,Golden State Warriors,704.93924,49,14.543054,318.227773,15.42501,549.311454,-48.178637,501.132817,0.691101
1,Phoenix Suns,666.32544,48,12.683604,411.199466,14.869291,560.541499,-119.397575,441.143924,0.668126
2,Utah Jazz,526.4277,50,13.52801,581.893157,16.36667,548.072757,-155.067516,393.00524,0.649689
3,Cleveland Cavaliers,414.07704,49,7.866574,-234.911721,8.983557,152.898636,-61.017419,91.881217,0.534361
4,Miami Heat,396.95872,49,11.235986,212.957286,13.841309,322.909363,-142.321018,180.588344,0.568335
5,Memphis Grizzlies,288.0004,51,12.935916,304.516841,14.95418,294.24442,-110.251738,183.992683,0.569639
6,Boston Celtics,243.14312,50,11.956791,290.536943,14.740166,261.638271,-152.047502,109.590769,0.541144
7,Milwaukee Bucks,308.52336,51,12.984381,421.095464,15.039785,351.081106,-112.280626,238.800481,0.59063
8,Dallas Mavericks,204.78024,49,12.22858,342.897294,15.510615,260.363933,-179.287784,81.076149,0.530223
9,Philadelphia 76ers,201.72984,48,12.955001,528.410462,16.409473,337.182781,-188.707501,148.475279,0.556036
