In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
import tensorflow as tf

In [2]:
# Import dependencies
import os
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup
from datetime import datetime

In [3]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

In [4]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [5]:
def createPlayerDF(stat, year):
    # Set url for given year
    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_{stat}.html'
    page = requests.get(url)
    
    # Convert the page html to a soup object
    soup = BeautifulSoup(page.content, 'html.parser')
    
    # Find the sought after table of data
    table = soup.find_all(class_="full_table")

    # Store the headers/column names
    head = soup.find(class_="thead")
    column_names_raw = [head.text for item in head][0]

    # Clean the column_names_raw list
    column_names = column_names_raw.replace("\n",",").split(",")[2:-1]
    
    # Create the dataframe
    players = []

    for i in range(len(table)):
        player_ = []

        for td in table[i].find_all("td"):
            player_.append(td.text)

        players.append(player_)

    df = pd.DataFrame(players, columns=column_names).set_index("Player")

    # Cleaning the player's name from occasional special characters
    df.index = df.index.str.replace('*', "", regex=True)
    
    return df

In [131]:
def createRosters(team):
    roster = []

    # Set url for given team
    url = f"https://www.basketball-reference.com/teams/{team}/{datetime.now().year}.html"
    page = requests.get(url)

    # Convert the page html to a soup object
    soup = BeautifulSoup(page.content, 'html.parser')

    # Find the sought after table of data
    table = soup.find(id="roster")
    player_table = table.find_all(attrs={"data-stat" : "player"})

    # Create a list of all players in the player table
    for p in player_table[1:]:
        roster.append(p.text)

    # Remove "TW" suffix
    for i in range(len(roster)):
        if "\xa0\xa0(TW)" in roster[i]:
            roster[i] = roster[i].replace("\xa0\xa0(TW)", "")

    return roster

In [208]:
def createPlayerAverages(df):
#     # Store the minutes played before dropping column
#     mins = df["MP"].astype(int)

#     # Calculate player's average minutes per game
#     mpg = mins/df["G"].astype(int)

    # Drop categorical and unnecessary columns
    columns_to_drop=["Pos", "Age", "Tm", "GS", "FG", "FG%", "3P", "3P%", "2P", "2PA", "2P%", "eFG%", "FT", "FT%", "TRB", "PTS"]
    df = df.drop(columns=columns_to_drop)

    # Convert data to numeric instead of object
    df = df.apply(pd.to_numeric)

#     # Divide columns by MP and multiply by MPG to get average stats per game
#     for col in df:
#         df[col] = df[col]/mins*mpg

#     # Add the minutes per game column to the dataframe
#     df["MPG"] = mpg
    
    return df

In [None]:
def convertToPerGameStats(df):
    

In [7]:
# Get nba players data into dataframes from the year 2016 - present
currentYear = datetime.now().year
startYear = 2016
year_totals = {}

for year in range(startYear, currentYear+1):
    year_totals[year] = createPlayerDF('totals', str(year))

In [132]:
# Get nba team rosters into dataframes
nba_teams = teams.get_teams()
nba_team_abr = [team['abbreviation'] for team in nba_teams]
team_rosters = {}

# Convert abreviation for Brooklyn, Pheonix, & Charlotte for basketball-reference.com
nba_team_abr[14] = "BRK"
nba_team_abr[19] = "PHO"
nba_team_abr[29] = "CHO"

for team in nba_team_abr:
    team_rosters[team] = createRosters(team)

In [210]:
# Create dataframe for each player's per minute averages over the last 5 years
currentYear = datetime.now().year
player_totals = {}

for team in team_rosters:
    for player in team_rosters[team]:
        # Initializing
        player_totals[player] = [0,0,0,0,0,0,0,0,0,0,0,0]
        
        for i in range(0, 6):
            if player in player_totals[currentYear-i].index:
                player_totals[player] += createPlayerAverages(year_totals[currentYear-i]).loc[player]            

In [212]:
# Create a dataframe for each teams per minute average based on current roster
team_averages = {}

for team in team_rosters:
    # Initializing
    team_averages[team] = [0,0,0,0,0,0,0,0,0,0,0,0]
    
    for player in team_rosters[team]:
        team_averages[team] += player_averages[player]

In [215]:
player_totals[2020]

Unnamed: 0_level_0,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Steven Adams,C,26,OKC,63,63,1680,283,478,0.592,1,3,0.333,282,475,0.594,0.593,117,201,0.582,207,376,583,146,51,67,94,122,684
Bam Adebayo,PF,22,MIA,72,72,2417,440,790,0.557,2,14,0.143,438,776,0.564,0.558,264,382,0.691,176,559,735,368,82,93,204,182,1146
LaMarcus Aldridge,C,34,SAS,53,53,1754,391,793,0.493,61,157,0.389,330,636,0.519,0.532,158,191,0.827,103,289,392,129,36,87,74,128,1001
Kyle Alexander,C,23,MIA,2,0,13,1,2,0.5,0,0,,1,2,0.5,0.5,0,0,,2,1,3,0,0,0,1,1,2
Nickeil Alexander-Walker,SG,21,NOP,47,1,591,98,266,0.368,46,133,0.346,52,133,0.391,0.455,25,37,0.676,9,75,84,89,17,8,54,57,267
Grayson Allen,SG,24,MEM,38,0,718,117,251,0.466,57,141,0.404,60,110,0.545,0.58,39,45,0.867,8,77,85,52,10,2,33,53,330
Jarrett Allen,C,21,BRK,70,64,1852,302,465,0.649,0,6,0.0,302,459,0.658,0.649,171,270,0.633,216,455,671,110,40,92,77,162,775
Kadeem Allen,PG,27,NYK,10,0,117,19,44,0.432,5,16,0.313,14,28,0.5,0.489,7,11,0.636,2,7,9,21,5,2,8,7,50
Al-Farouq Aminu,PF,29,ORL,18,2,380,25,86,0.291,9,36,0.25,16,50,0.32,0.343,19,29,0.655,24,63,87,21,18,8,17,27,78
Justin Anderson,SG,26,BRK,10,1,107,10,38,0.263,6,29,0.207,4,9,0.444,0.342,2,4,0.5,1,20,21,8,0,6,4,13,28


In [203]:
team_rosters["TOR"]

['Chris Boucher',
 'Fred VanVleet',
 'Svi Mykhailiuk',
 'Scottie Barnes',
 'Gary Trent Jr.',
 'Precious Achiuwa',
 'Dalano Banton',
 'Pascal Siakam',
 'OG Anunoby',
 'Malachi Flynn',
 'Justin Champagnie',
 'Khem Birch',
 'Yuta Watanabe',
 'Isaac Bonga',
 'Goran Dragić',
 'David Johnson']

In [213]:
player_totals["Precious Achiuwa"]

G        94
MP     1579
FGA     500
3PA      42
FTA     164
ORB     154
DRB     310
AST      72
STL      40
BLK      50
TOV      79
PF      167
Name: Precious Achiuwa, dtype: int64

In [200]:
team_averages["TOR"]

FGA     431.654051
3PA     169.532297
FTA     104.762231
ORB      52.629501
DRB     148.217007
AST     114.550021
STL      41.797467
BLK      23.080149
TOV      58.868088
PF       99.462307
MPG    1179.119931
Name: Chris Boucher, dtype: float64

In [202]:
team_averages["ATL"]

FGA     605.587168
3PA     217.993439
FTA     177.309522
ORB      72.296756
DRB     230.984193
AST     158.272178
STL      46.515357
BLK      32.909554
TOV      89.686558
PF      122.981939
MPG    1591.818104
Name: Trae Young, dtype: float64

In [137]:
test    

FGA    24.369313
3PA     8.400138
FTA     5.500112
ORB     3.445033
DRB    10.178161
AST     6.257533
STL     2.050025
BLK     1.741741
TOV     4.264333
PF      6.184418
Name: Jayson Tatum, dtype: float64

In [142]:
player_totals[2021]

Unnamed: 0_level_0,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Precious Achiuwa,PF,21,MIA,61,4,737,124,228,0.544,0,1,0.0,124,227,0.546,0.544,56,110,0.509,73,135,208,29,20,28,43,91,304
Jaylen Adams,PG,24,MIL,7,0,18,1,8,0.125,0,2,0.0,1,6,0.167,0.125,0,0,,0,3,3,2,0,0,0,1,2
Steven Adams,C,27,NOP,58,58,1605,189,308,0.614,0,3,0.0,189,305,0.62,0.614,60,135,0.444,213,301,514,111,54,38,78,113,438
Bam Adebayo,C,23,MIA,64,64,2143,456,800,0.57,2,8,0.25,454,792,0.573,0.571,283,354,0.799,142,431,573,346,75,66,169,145,1197
LaMarcus Aldridge,C,35,TOT,26,23,674,140,296,0.473,31,80,0.388,109,216,0.505,0.525,41,47,0.872,19,99,118,49,11,29,27,47,352
Ty-Shon Alexander,SG,22,PHO,15,0,47,3,12,0.25,2,9,0.222,1,3,0.333,0.333,1,2,0.5,2,8,10,6,0,1,3,2,9
Nickeil Alexander-Walker,SG,22,NOP,46,13,1007,192,458,0.419,76,219,0.347,116,239,0.485,0.502,48,66,0.727,13,131,144,102,47,22,69,88,508
Grayson Allen,SG,25,MEM,50,38,1259,173,414,0.418,107,274,0.391,66,140,0.471,0.547,79,91,0.868,19,141,160,108,46,8,48,71,532
Jarrett Allen,C,22,TOT,63,45,1864,298,482,0.618,6,19,0.316,292,463,0.631,0.624,204,290,0.703,196,435,631,106,32,90,100,96,806
Al-Farouq Aminu,PF,30,TOT,23,14,434,38,99,0.384,8,37,0.216,30,62,0.484,0.424,18,22,0.818,22,88,110,31,19,9,28,29,102


In [5]:
player_df = players_combined[2021]

In [9]:
month = "january"
year = "2021"

url = f'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fleagues%2FNBA_{year}_games-{month}.html&div=div_schedule'
html = requests.get(url).content
df_list = pd.read_html(html)
games_df = df_list[-1]

In [10]:
games_df.head()

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes
0,"Fri, Jan 1, 2021",7:00p,Memphis Grizzlies,108,Charlotte Hornets,93,Box Score,,0,
1,"Fri, Jan 1, 2021",7:00p,Miami Heat,83,Dallas Mavericks,93,Box Score,,0,
2,"Fri, Jan 1, 2021",7:00p,Boston Celtics,93,Detroit Pistons,96,Box Score,,0,
3,"Fri, Jan 1, 2021",7:30p,Atlanta Hawks,114,Brooklyn Nets,96,Box Score,,0,
4,"Fri, Jan 1, 2021",8:00p,Chicago Bulls,96,Milwaukee Bucks,126,Box Score,,0,


In [31]:
roster_home = ["Devonte' Graham", "Terry Rozier", "Gordon Hayward", "P.J. Washington", "Bismack Biyombo", "Miles Bridges", "LaMelo Ball", "Cody Martin", "Caleb Martin", "Jalen McDaniels", "Malik Monk", "Nick Richards", "Vernon Carey Jr.", "Nate Darling"]
roster_away = ["Tyus Jones", "Dillon Brooks", "Kyle Anderson", "Brandon Clarke", "Jonas Valančiūnas", "Desmond Bane", "John Konchar", "Gorgui Dieng", "Sean McDermott"]

In [64]:
team_home = player_df.loc[roster_home]
team_away = player_df.loc[roster_away]

In [28]:
team_home

Unnamed: 0_level_0,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,Unnamed: 41_level_0,OWS,DWS,WS,WS/48,Unnamed: 46_level_0,OBPM,DBPM,BPM,VORP
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
Devonte' Graham,PG,25,CHO,55,44,1659,252,669,0.377,179,477,0.375,73,192,0.38,0.51,133,158,0.842,22,126,148,295,48,6,84,77,816,14.6,0.552,0.713,0.236,1.4,8.4,4.8,26.3,1.4,0.4,10.2,21.4,,2.5,0.9,3.4,0.099,,1.8,-1.1,0.7,1.1
Terry Rozier,SG,26,CHO,69,69,2383,510,1134,0.45,222,571,0.389,288,563,0.512,0.548,165,202,0.817,47,255,302,293,87,26,128,119,1407,17.1,0.575,0.504,0.178,2.1,11.8,6.9,20.0,1.8,1.1,9.5,24.4,,3.3,2.0,5.2,0.105,,2.2,-0.5,1.6,2.2
Gordon Hayward,SF,30,CHO,44,44,1496,311,658,0.473,85,205,0.415,226,453,0.499,0.537,156,185,0.843,37,221,258,181,52,14,91,74,863,17.6,0.584,0.312,0.281,2.6,16.3,9.3,19.5,1.7,0.9,11.0,23.9,,2.2,1.4,3.5,0.114,,1.6,-0.1,1.6,1.4
P.J. Washington,PF,22,CHO,64,61,1954,302,686,0.44,112,290,0.386,190,396,0.48,0.522,108,145,0.745,93,325,418,161,69,79,128,171,824,13.4,0.549,0.423,0.211,5.1,18.3,11.6,12.2,1.7,4.0,14.6,19.4,,0.3,2.4,2.7,0.066,,-1.5,0.6,-0.9,0.6
Bismack Biyombo,C,28,CHO,66,36,1349,142,242,0.587,0,1,0.0,142,241,0.589,0.587,47,105,0.448,137,210,347,81,17,74,71,141,331,11.7,0.574,0.004,0.434,10.8,17.1,13.9,8.3,0.6,5.4,19.8,11.5,,0.9,1.4,2.3,0.081,,-3.4,0.4,-3.0,-0.4
Miles Bridges,PF,22,CHO,66,19,1932,313,622,0.503,116,290,0.4,197,332,0.593,0.596,98,113,0.867,81,316,397,147,44,52,106,143,840,14.7,0.625,0.466,0.182,4.5,18.0,11.1,11.4,1.1,2.7,13.6,17.3,,2.4,1.9,4.2,0.105,,0.2,0.1,0.2,1.1
LaMelo Ball,PG,19,CHO,51,31,1469,293,672,0.436,92,261,0.352,201,411,0.489,0.504,125,165,0.758,63,239,302,313,81,18,145,136,803,17.5,0.539,0.388,0.246,4.6,17.9,11.1,33.9,2.7,1.2,16.3,26.1,,1.0,1.9,2.8,0.093,,1.1,0.7,1.8,1.4
Cody Martin,SF,25,CHO,52,10,849,83,188,0.441,16,58,0.276,67,130,0.515,0.484,25,43,0.581,52,109,161,86,37,12,40,52,207,10.6,0.5,0.309,0.229,6.5,14.1,10.3,13.9,2.1,1.4,16.2,12.5,,0.2,0.9,1.1,0.061,,-2.7,0.7,-2.0,0.0
Caleb Martin,SF,25,CHO,53,3,818,96,256,0.375,31,125,0.248,65,131,0.496,0.436,41,64,0.641,31,110,141,67,37,12,33,62,264,9.5,0.465,0.488,0.25,4.0,14.8,9.3,11.5,2.2,1.5,10.4,16.7,,-0.4,0.9,0.5,0.03,,-3.6,0.2,-3.4,-0.3
Jalen McDaniels,SF,23,CHO,47,18,904,133,284,0.468,35,105,0.333,98,179,0.547,0.53,45,64,0.703,45,124,169,53,28,19,45,95,346,11.4,0.554,0.37,0.225,5.3,15.1,10.1,8.6,1.5,2.1,12.6,17.0,,0.3,0.9,1.2,0.062,,-2.8,-0.4,-3.2,-0.3


In [34]:
team_away

Unnamed: 0_level_0,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,Unnamed: 41_level_0,OWS,DWS,WS,WS/48,Unnamed: 46_level_0,OBPM,DBPM,BPM,VORP
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
Tyus Jones,PG,24,MEM,70,9,1222,178,413,0.431,45,140,0.321,133,273,0.487,0.485,41,45,0.911,21,119,140,259,64,6,48,31,442,15.0,0.511,0.339,0.109,1.8,10.4,6.1,28.6,2.5,0.4,10.0,16.6,,1.5,1.3,2.8,0.11,,-0.1,0.9,0.8,0.9
Dillon Brooks,SF,25,MEM,67,67,1997,432,1031,0.419,128,372,0.344,304,659,0.461,0.481,159,195,0.815,53,143,196,157,78,26,119,237,1151,12.1,0.515,0.361,0.189,2.8,7.7,5.2,11.7,1.9,1.2,9.6,26.1,,-0.5,1.8,1.4,0.033,,-2.6,-1.1,-3.7,-0.9
Kyle Anderson,PF,27,MEM,69,69,1887,308,658,0.468,94,261,0.36,214,397,0.539,0.54,144,184,0.783,52,344,396,250,84,57,86,120,854,17.2,0.578,0.397,0.28,2.9,19.5,11.1,18.3,2.1,2.7,10.4,18.5,,2.9,2.7,5.6,0.143,,1.4,1.9,3.3,2.5
Brandon Clarke,PF,24,MEM,59,16,1415,256,495,0.517,20,77,0.26,236,418,0.565,0.537,78,113,0.69,97,231,328,95,60,51,33,84,610,17.3,0.56,0.156,0.228,7.2,17.5,12.3,9.5,2.0,3.2,5.7,17.2,,2.2,2.0,4.2,0.142,,0.4,1.2,1.5,1.3
Jonas Valančiūnas,C,28,MEM,62,61,1755,440,743,0.592,21,57,0.368,419,686,0.611,0.606,157,203,0.773,253,523,776,112,35,57,100,179,1058,24.4,0.636,0.077,0.273,15.1,31.9,23.4,10.0,1.0,2.9,10.7,22.4,,5.1,2.6,7.8,0.213,,3.2,-0.3,3.0,2.2
Desmond Bane,SG,22,MEM,68,17,1519,234,499,0.469,117,271,0.432,117,228,0.513,0.586,40,49,0.816,31,179,210,118,41,16,59,125,625,12.2,0.6,0.543,0.098,2.1,12.6,7.3,10.6,1.3,0.9,10.2,16.1,,1.6,1.3,2.9,0.093,,-0.6,-0.2,-0.8,0.5
John Konchar,SG,24,MEM,43,0,575,69,138,0.5,21,56,0.375,48,82,0.585,0.576,25,30,0.833,35,93,128,47,30,9,18,40,184,14.8,0.608,0.406,0.217,6.4,17.3,11.8,10.7,2.5,1.4,10.6,12.4,,0.9,0.8,1.7,0.145,,-0.2,1.4,1.2,0.5
Gorgui Dieng,C,31,TOT,38,1,552,85,163,0.521,30,70,0.429,55,93,0.591,0.613,58,67,0.866,40,100,140,48,26,16,32,56,258,19.2,0.67,0.429,0.411,7.6,19.5,13.5,12.0,2.3,2.6,14.3,17.4,,1.3,0.8,2.1,0.185,,1.8,1.5,3.4,0.7
Sean McDermott,SF,24,MEM,18,0,158,13,33,0.394,5,22,0.227,8,11,0.727,0.47,8,8,1.0,6,13,19,4,2,3,4,8,39,7.2,0.534,0.667,0.242,4.0,8.8,6.4,3.1,0.6,1.7,9.9,10.8,,0.1,0.1,0.2,0.05,,-3.1,-1.2,-4.3,-0.1


In [90]:
team_home_fin = team_home.reset_index(drop=True)

In [91]:
drop_columns = ["Pos", "Age", "Tm", "G", "GS", "FG%", "3P%", "2P", "2PA", "2P%", "eFG%", "FT%", "ORB", "DRB", "TS%", "3PAr", "FTr", "ORB%", "DRB%", "TRB%", "AST%", "STL%", "BLK%", "TOV%", "USG%", "WS/48", "OBPM", "DBPM", "BPM", "VORP"]
team_home_fin = team_home_fin.drop(columns=drop_columns)
team_home_fin = team_home_fin.dropna(axis=1, how="all")

In [92]:
team_home_fin = team_home_fin.drop(team_home_fin.columns[19], axis=1)

In [101]:
team_home_fin

Unnamed: 0,FG,FGA,3P,3PA,FT,FTA,TRB,AST,STL,BLK,TOV,PF,PTS,PER,OWS,DWS,WS
0,0.151899,0.403255,0.107896,0.287523,0.080169,0.095238,0.08921,0.177818,0.028933,0.003617,0.050633,0.046414,0.491863,0.0088,0.001507,0.000542,0.002049
1,0.214016,0.475871,0.09316,0.239614,0.06924,0.084767,0.126731,0.122954,0.036509,0.010911,0.053714,0.049937,0.590432,0.007176,0.001385,0.000839,0.002182
2,0.207888,0.43984,0.056818,0.137032,0.104278,0.123663,0.17246,0.120989,0.034759,0.009358,0.060829,0.049465,0.576872,0.011765,0.001471,0.000936,0.00234
3,0.154555,0.351075,0.057318,0.148414,0.055271,0.074207,0.21392,0.082395,0.035312,0.04043,0.065507,0.087513,0.421699,0.006858,0.000154,0.001228,0.001382
4,0.105263,0.179392,0.0,0.000741,0.034841,0.077835,0.257228,0.060044,0.012602,0.054855,0.052632,0.104522,0.245367,0.008673,0.000667,0.001038,0.001705
5,0.162008,0.321946,0.060041,0.150104,0.050725,0.058489,0.205487,0.076087,0.022774,0.026915,0.054865,0.074017,0.434783,0.007609,0.001242,0.000983,0.002174
6,0.199455,0.457454,0.062628,0.177672,0.085092,0.112321,0.205582,0.21307,0.05514,0.012253,0.098707,0.09258,0.54663,0.011913,0.000681,0.001293,0.001906
7,0.097762,0.221437,0.018846,0.068316,0.029446,0.050648,0.189635,0.101296,0.043581,0.014134,0.047114,0.061249,0.243816,0.012485,0.000236,0.00106,0.001296
8,0.117359,0.312958,0.037897,0.152812,0.050122,0.07824,0.172372,0.081907,0.045232,0.01467,0.040342,0.075795,0.322738,0.011614,-0.000489,0.0011,0.000611
9,0.147124,0.314159,0.038717,0.11615,0.049779,0.070796,0.186947,0.058628,0.030973,0.021018,0.049779,0.105088,0.382743,0.012611,0.000332,0.000996,0.001327


In [94]:
for col in team_home_fin:
    team_home_fin[col] = team_home_fin[col].astype(float)

In [98]:
for col in team_home_fin.columns[1:]:
    team_home_fin[col] = team_home_fin[col]/team_home_fin["MP"]
team_home_fin["PER"] = team_home_fin["PER"] * team_home_fin["MP"]

In [100]:
team_home_fin = team_home_fin.drop(columns="MP")

In [88]:
team_home_fin.dtypes

MP     float64
FG     float64
FGA    float64
3P     float64
3PA    float64
FT     float64
FTA    float64
TRB    float64
AST    float64
STL    float64
BLK    float64
TOV    float64
PF     float64
PTS    float64
PER    float64
OWS    float64
DWS    float64
WS     float64
dtype: object

In [102]:
team_home_fin = team_home_fin.sum()

In [104]:
team_home_fin

[FG     2.051305
 FGA    4.656960
 3P     0.715751
 3PA    2.065808
 FT     0.980918
 FTA    1.293849
 TRB    2.382452
 AST    1.281877
 STL    0.376151
 BLK    0.294657
 TOV    0.766322
 PF     1.062726
 PTS    5.799279
 PER    0.619418
 OWS    0.007754
 DWS    0.011455
 WS     0.019850
 dtype: float64]

In [105]:
!pip install nba_api



In [5]:
from nba_api.stats.static import teams

nba_teams = teams.get_teams()
# Select the dictionary for the Celtics, which contains their team ID
celtics = [team for team in nba_teams if team['abbreviation'] == 'BOS'][0]
celtics_id = celtics['id']

In [6]:
from nba_api.stats.endpoints import leaguegamefinder

# Query for games where the Celtics were playing
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=celtics_id)
# The first DataFrame of those returned is what we want.
games = gamefinder.get_data_frames()[0]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22021,1610612738,BOS,Boston Celtics,22100654,2022-01-17,BOS vs. NOP,W,241,104,38,80,0.475,10,32,0.313,18,20,0.9,5,42,47,22,6,4,14,18,12.0
1,22021,1610612738,BOS,Boston Celtics,22100647,2022-01-15,BOS vs. CHI,W,240,114,44,90,0.489,9,26,0.346,17,19,0.895,17,33,50,27,6,6,11,15,2.0
2,22021,1610612738,BOS,Boston Celtics,22100635,2022-01-14,BOS @ PHI,L,238,99,37,78,0.474,11,31,0.355,14,18,0.778,7,37,44,20,4,4,20,21,-12.0
3,22021,1610612738,BOS,Boston Celtics,22100618,2022-01-12,BOS @ IND,W,239,119,40,78,0.513,18,38,0.474,21,25,0.84,7,30,37,23,6,5,11,22,19.0
4,22021,1610612738,BOS,Boston Celtics,22100609,2022-01-10,BOS vs. IND,W,265,101,35,88,0.398,8,28,0.286,23,28,0.821,12,41,53,17,10,6,20,19,3.0


In [7]:
games

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22021,1610612738,BOS,Boston Celtics,22100654,2022-01-17,BOS vs. NOP,W,241,104,38,80,0.475,10,32,0.313,18,20,0.9,5,42,47,22,6,4,14,18,12.0
1,22021,1610612738,BOS,Boston Celtics,22100647,2022-01-15,BOS vs. CHI,W,240,114,44,90,0.489,9,26,0.346,17,19,0.895,17,33,50,27,6,6,11,15,2.0
2,22021,1610612738,BOS,Boston Celtics,22100635,2022-01-14,BOS @ PHI,L,238,99,37,78,0.474,11,31,0.355,14,18,0.778,7,37,44,20,4,4,20,21,-12.0
3,22021,1610612738,BOS,Boston Celtics,22100618,2022-01-12,BOS @ IND,W,239,119,40,78,0.513,18,38,0.474,21,25,0.84,7,30,37,23,6,5,11,22,19.0
4,22021,1610612738,BOS,Boston Celtics,22100609,2022-01-10,BOS vs. IND,W,265,101,35,88,0.398,8,28,0.286,23,28,0.821,12,41,53,17,10,6,20,19,3.0
5,22021,1610612738,BOS,Boston Celtics,22100595,2022-01-08,BOS vs. NYK,W,243,99,36,76,0.474,12,29,0.414,15,17,0.882,9,36,45,26,8,8,10,15,24.0
6,22021,1610612738,BOS,Boston Celtics,22100578,2022-01-06,BOS @ NYK,L,240,105,38,80,0.475,17,45,0.378,12,13,0.923,8,30,38,22,5,10,12,16,-3.0
7,22021,1610612738,BOS,Boston Celtics,22100570,2022-01-05,BOS vs. SAS,L,242,97,40,90,0.444,9,28,0.321,8,9,0.889,13,28,41,19,10,9,8,12,-2.0
8,22021,1610612738,BOS,Boston Celtics,22100546,2022-01-02,BOS vs. ORL,W,265,116,39,86,0.453,13,36,0.361,25,28,0.893,9,37,46,25,5,10,21,23,5.0
9,22021,1610612738,BOS,Boston Celtics,22100529,2021-12-31,BOS vs. PHX,W,239,123,45,83,0.542,10,27,0.37,23,26,0.885,13,38,51,28,6,7,17,18,15.0


In [20]:
test = games.drop(columns=["SEASON_ID", "TEAM_ID", "TEAM_ABBREVIATION", "TEAM_NAME", "GAME_ID", "GAME_DATE", "MATCHUP", "WL", "MIN", "FG_PCT", "FG3_PCT", "FT_PCT", "REB", "PLUS_MINUS", "FGM", "FG3M", "FTM"])

In [21]:
test

Unnamed: 0,PTS,FGA,FG3A,FTA,OREB,DREB,AST,STL,BLK,TOV,PF
0,104,80,32,20,5,42,22,6,4,14,18
1,114,90,26,19,17,33,27,6,6,11,15
2,99,78,31,18,7,37,20,4,4,20,21
3,119,78,38,25,7,30,23,6,5,11,22
4,101,88,28,28,12,41,17,10,6,20,19
5,99,76,29,17,9,36,26,8,8,10,15
6,105,80,45,13,8,30,22,5,10,12,16
7,97,90,28,9,13,28,19,10,9,8,12
8,116,86,36,28,9,37,25,5,10,21,23
9,123,83,27,26,13,38,28,6,7,17,18


In [64]:
# Split our preprocessed data into our features and target arrays
X = test.drop("PTS", 1)
y = test["PTS"]

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [65]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_train)
y_pred

array([108.80455795,  87.02704202,  92.63993704, ...,  80.92581497,
       114.062698  , 117.60170176])

In [66]:
print(f'Training Score: {model.score(X_train, y_train)}')
print(f'Testing Score: {model.score(X_test, y_test)}')

Training Score: 0.6812653406570583
Testing Score: 0.7075634909190502


In [113]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [121]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 40
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="relu"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 40)                560       
                                                                 
 dense_4 (Dense)             (None, 10)                410       
                                                                 
 dense_5 (Dense)             (None, 1)                 11        
                                                                 
Total params: 981
Trainable params: 981
Non-trainable params: 0
_________________________________________________________________


In [122]:
# Compile the model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"])

In [123]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100, validation_data=(X_test_scaled, y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100


Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [124]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

29/29 - 0s - loss: 0.0867 - accuracy: 0.0000e+00 - 38ms/epoch - 1ms/step
Loss: 0.08666856586933136, Accuracy: 0.0


In [26]:
model = LinearRegression()

In [27]:
model.fit(X, y)

LinearRegression()

In [28]:
y_pred = model.predict(X)

In [29]:
y_pred

array([100.04876065, 105.66373762,  94.37855633, ..., 109.70337735,
       107.90901682, 116.88866167])

In [133]:
y

array([104, 114,  99, ..., 119, 108, 121], dtype=int64)

In [135]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [136]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier

clf = BalancedRandomForestClassifier(random_state=1, n_estimators=100).fit(X_train, y_train)

In [141]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [139]:
# Calculated the balanced accuracy score
y_pred = clf.predict(X_test)
balanced_accuracy_score(y_test, y_pred)



0.026220816549474803

In [142]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [143]:
model.fit(X_train, y_train)
training_score = model.score(X_train, y_train)
testing_score = model.score(X_test, y_test)

In [144]:
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

Training Score: 1.0
Testing Score: 1.0
