<a href="https://colab.research.google.com/github/cartersmotherman/nba-predictions/blob/main/NBA_Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install nba_api

Collecting nba_api
  Downloading nba_api-1.4.1-py3-none-any.whl (261 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/261.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/261.7 kB[0m [31m3.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.7/261.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: nba_api
Successfully installed nba_api-1.4.1


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the webpage
url = "https://hashtagbasketball.com/nba-defense-vs-position"

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Find all tables on the webpage
tables = soup.find_all("table")

# Read the table data into the DataFrame
DefenseVsPosition = pd.read_html(str(tables[2]))[0]

# Remove "Sort: " from column names
DefenseVsPosition.columns = DefenseVsPosition.columns.str.replace('Sort: ', '')

# Columns to convert to float (excluding 'Position' and 'Team')
columns_to_convert = DefenseVsPosition.columns.difference(['Position', 'Team'])

# Loop through specified columns, remove ranks, and convert to floats
for col in columns_to_convert:
    DefenseVsPosition[col] = DefenseVsPosition[col].apply(lambda x: float(x.split()[0]))

# Clean 'Team' column to remove ranks
DefenseVsPosition['Team'] = DefenseVsPosition['Team'].apply(lambda x: x.split()[0])

replacement_map = {'SA': 'SAS', 'PHO': 'PHX', 'NY': 'NYK', 'NO': 'NOP', 'GS': 'GSW'}

# Replace values in 'Team' column using the replacement_map
DefenseVsPosition['Team'].replace(replacement_map, inplace=True)


In [None]:
DefenseVsPosition.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 11 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Position  150 non-null    object 
 1   Team      150 non-null    object 
 2   PTS       150 non-null    float64
 3   FG%       150 non-null    float64
 4   FT%       150 non-null    float64
 5   3PM       150 non-null    float64
 6   REB       150 non-null    float64
 7   AST       150 non-null    float64
 8   STL       150 non-null    float64
 9   BLK       150 non-null    float64
 10  TO        150 non-null    float64
dtypes: float64(9), object(2)
memory usage: 13.0+ KB


In [None]:
from nba_api.stats.endpoints import leaguedashteamstats

# Fetch team opponent stats
from nba_api.stats.endpoints import leaguedashteamstats

# Fetch per game opponent stats
team_opponent_stats = leaguedashteamstats.LeagueDashTeamStats(per_mode_detailed='PerGame', measure_type_detailed_defense='Opponent',last_n_games=15)

# Convert the obtained data to a pandas DataFrame
team_opponent_stats_df = team_opponent_stats.get_data_frames()[0]


# Display the DataFrame containing team opponent stats
team_opponent_stats_df['TEAM_NAME'].unique()

team_abbreviations = {
    'Atlanta Hawks': 'ATL', 'Boston Celtics': 'BOS', 'Brooklyn Nets': 'BKN', 'Charlotte Hornets': 'CHA',
    'Chicago Bulls': 'CHI', 'Cleveland Cavaliers': 'CLE', 'Dallas Mavericks': 'DAL', 'Denver Nuggets': 'DEN',
    'Detroit Pistons': 'DET', 'Golden State Warriors': 'GSW', 'Houston Rockets': 'HOU', 'Indiana Pacers': 'IND',
    'LA Clippers': 'LAC', 'Los Angeles Lakers': 'LAL', 'Memphis Grizzlies': 'MEM', 'Miami Heat': 'MIA',
    'Milwaukee Bucks': 'MIL', 'Minnesota Timberwolves': 'MIN', 'New Orleans Pelicans': 'NOP',
    'New York Knicks': 'NYK', 'Oklahoma City Thunder': 'OKC', 'Orlando Magic': 'ORL', 'Philadelphia 76ers': 'PHI',
    'Phoenix Suns': 'PHX', 'Portland Trail Blazers': 'POR', 'Sacramento Kings': 'SAC',
    'San Antonio Spurs': 'SAS', 'Toronto Raptors': 'TOR', 'Utah Jazz': 'UTA', 'Washington Wizards': 'WAS'
}

# Replace the team names with abbreviations using the mapping
team_opponent_stats_df['TEAM'] = team_opponent_stats_df['TEAM_NAME'].map(team_abbreviations)

# Your DataFrame containing the columns
columns = [
    'TEAM', 'OPP_FGM', 'OPP_FGA', 'OPP_FG_PCT', 'OPP_FG3M', 'OPP_FG3A', 'OPP_FG3_PCT',
    'OPP_FTM', 'OPP_FTA', 'OPP_FT_PCT', 'OPP_OREB', 'OPP_DREB', 'OPP_REB', 'OPP_AST',
    'OPP_TOV', 'OPP_STL', 'OPP_BLK', 'OPP_BLKA', 'OPP_PF', 'OPP_PFD', 'OPP_PTS'
]

# Sample DataFrame (replace this with your actual DataFrame)
TeamOpponent = team_opponent_stats_df[columns]

# Define the desired column order
desired_order = [
    'TEAM', 'OPP_FGM', 'OPP_FGA', 'OPP_FG_PCT', 'OPP_FG3M', 'OPP_FG3A', 'OPP_FG3_PCT',
    'OPP_FTM', 'OPP_FTA', 'OPP_FT_PCT', 'OPP_OREB', 'OPP_DREB', 'OPP_REB', 'OPP_AST',
    'OPP_TOV', 'OPP_STL', 'OPP_BLK', 'OPP_BLKA', 'OPP_PF', 'OPP_PFD', 'OPP_PTS'
]

# Reorder columns
TeamOpponent = TeamOpponent.reindex(columns=desired_order)

# Display the updated DataFra


In [None]:
import pandas as pd
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players

# Read the CSV file containing player names
file_path = '/content/drive/MyDrive/PlayerList.csv'
df = pd.read_csv(file_path)

# List of player names from the CSV file
player_names = df['Player'].tolist()  # Assuming 'Player Name' is the column header in the CSV

# Get player IDs for the given player names
nba_players = players.get_players()
player_logs = []  # List to store player game logs

for player_name in player_names:
    player_info = [player for player in nba_players if player['full_name'] == player_name]

    if player_info:
        player_id = player_info[0]['id']
        # Get player's game log for a specific season
        season = '2023-24'  # Replace with the season you want to retrieve
        player_log = playergamelog.PlayerGameLog(player_id=player_id, season=season)
        player_data = player_log.get_data_frames()[0]  # Get player's game log DataFrame
        # Add a 'Player' column with the player's name
        player_data['Player'] = player_name

        # Modify 'MATCHUP' column to create 'OPP' column with the last 3 characters
        player_data['OPP'] = player_data['MATCHUP'].str[-3:]

        player_logs.append(player_data)  # Append DataFrame to the list
    else:
        print(f"Player '{player_name}' not found.")

# Concatenate all player game logs into a single DataFrame
master_df = pd.concat(player_logs, ignore_index=True)



In [None]:
selected_columns = ['Player', 'GAME_DATE','OPP', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV']
master_df = master_df[selected_columns]
playerlogs = master_df[master_df['MIN'] > 23]
playerlogs

Unnamed: 0,Player,GAME_DATE,OPP,MIN,PTS,REB,AST,STL,BLK,TOV
0,Joel Embiid,"JAN 05, 2024",NYK,36,30,10,3,1,3,6
1,Joel Embiid,"JAN 02, 2024",CHI,31,31,15,10,1,2,4
2,Joel Embiid,"DEC 22, 2023",TOR,35,31,10,9,2,4,6
3,Joel Embiid,"DEC 20, 2023",MIN,36,51,12,3,2,1,2
4,Joel Embiid,"DEC 18, 2023",CHI,37,40,14,6,0,2,4
...,...,...,...,...,...,...,...,...,...,...
6119,Bismack Biyombo,"NOV 08, 2023",MIA,30,9,10,0,1,0,1
6120,Bismack Biyombo,"NOV 05, 2023",POR,26,8,11,4,0,3,2
6123,Gabe Vincent,"OCT 30, 2023",ORL,25,9,0,2,1,0,0
6124,Gabe Vincent,"OCT 29, 2023",SAC,32,2,0,2,1,0,0


In [None]:
# Read the CSV file containing player names
file_path = '/content/drive/MyDrive/PlayerPositions.csv'
player_positions = pd.read_csv(file_path)

merged_df = pd.merge(master_df, player_positions, on='Player', how='left')

# Display the updated merged DataFrame
selected_columns = ['Player','Pos','GAME_DATE', 'OPP', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV']
playerlogs = merged_df[selected_columns]

playerlogs['Pos'].unique()
playerlogs

Unnamed: 0,Player,Pos,GAME_DATE,OPP,MIN,PTS,REB,AST,STL,BLK,TOV
0,Joel Embiid,C,"JAN 05, 2024",NYK,36,30,10,3,1,3,6
1,Joel Embiid,C,"JAN 02, 2024",CHI,31,31,15,10,1,2,4
2,Joel Embiid,C,"DEC 22, 2023",TOR,35,31,10,9,2,4,6
3,Joel Embiid,C,"DEC 20, 2023",MIN,36,51,12,3,2,1,2
4,Joel Embiid,C,"DEC 18, 2023",CHI,37,40,14,6,0,2,4
...,...,...,...,...,...,...,...,...,...,...,...
6122,Gabe Vincent,G,"DEC 20, 2023",CHI,14,3,1,3,0,0,0
6123,Gabe Vincent,G,"OCT 30, 2023",ORL,25,9,0,2,1,0,0
6124,Gabe Vincent,G,"OCT 29, 2023",SAC,32,2,0,2,1,0,0
6125,Gabe Vincent,G,"OCT 26, 2023",PHX,35,7,3,6,3,0,0


In [None]:
# Assuming 'playerlogs' is the DataFrame containing player logs

# Map positions accordingly
position_mapping = {'F': 'SF', 'G': 'PG', 'GF': 'SF', 'FC': 'C'}

# Replace positions with the updated ones
playerlogs['Pos'] = playerlogs['Pos'].replace(position_mapping)

# Filter positions to keep only 'PG', 'SG', 'SF', 'PF', 'C'
playerlogs = playerlogs[playerlogs['Pos'].isin(['PG', 'SG', 'SF', 'PF', 'C'])]

# Display unique positions after transformation
playerlogs.columns
playerlogs

Unnamed: 0,Player,Pos,GAME_DATE,OPP,MIN,PTS,REB,AST,STL,BLK,TOV
0,Joel Embiid,C,"JAN 05, 2024",NYK,36,30,10,3,1,3,6
1,Joel Embiid,C,"JAN 02, 2024",CHI,31,31,15,10,1,2,4
2,Joel Embiid,C,"DEC 22, 2023",TOR,35,31,10,9,2,4,6
3,Joel Embiid,C,"DEC 20, 2023",MIN,36,51,12,3,2,1,2
4,Joel Embiid,C,"DEC 18, 2023",CHI,37,40,14,6,0,2,4
...,...,...,...,...,...,...,...,...,...,...,...
6122,Gabe Vincent,PG,"DEC 20, 2023",CHI,14,3,1,3,0,0,0
6123,Gabe Vincent,PG,"OCT 30, 2023",ORL,25,9,0,2,1,0,0
6124,Gabe Vincent,PG,"OCT 29, 2023",SAC,32,2,0,2,1,0,0
6125,Gabe Vincent,PG,"OCT 26, 2023",PHX,35,7,3,6,3,0,0


In [None]:
DefenseVsPosition.columns

Index(['Position', 'Team', 'PTS', 'FG%', 'FT%', '3PM', 'REB', 'AST', 'STL',
       'BLK', 'TO'],
      dtype='object')

In [None]:

merged_data = playerlogs.merge(DefenseVsPosition, left_on=['OPP', 'Pos'], right_on=['Team', 'Position'], how='left')

merged_data


Unnamed: 0,Player,Pos,GAME_DATE,OPP,MIN,PTS_x,REB_x,AST_x,STL_x,BLK_x,...,Team,PTS_y,FG%,FT%,3PM,REB_y,AST_y,STL_y,BLK_y,TO
0,Joel Embiid,C,"JAN 05, 2024",NYK,36,30,10,3,1,3,...,NYK,20.8,54.5,73.3,1.3,13.5,2.4,1.2,2.3,2.4
1,Joel Embiid,C,"JAN 02, 2024",CHI,31,31,15,10,1,2,...,CHI,23.1,56.7,70.9,1.2,15.4,4.5,1.2,2.2,2.4
2,Joel Embiid,C,"DEC 22, 2023",TOR,35,31,10,9,2,4,...,TOR,22.6,58.2,78.0,0.9,14.8,4.5,1.0,2.4,2.4
3,Joel Embiid,C,"DEC 20, 2023",MIN,36,51,12,3,2,1,...,MIN,21.2,51.4,75.9,0.8,12.7,4.6,1.4,1.7,2.2
4,Joel Embiid,C,"DEC 18, 2023",CHI,37,40,14,6,0,2,...,CHI,23.1,56.7,70.9,1.2,15.4,4.5,1.2,2.2,2.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6122,Gabe Vincent,PG,"DEC 20, 2023",CHI,14,3,1,3,0,0,...,CHI,21.6,42.6,86.0,2.8,5.3,9.0,1.2,0.6,3.4
6123,Gabe Vincent,PG,"OCT 30, 2023",ORL,25,9,0,2,1,0,...,ORL,26.3,46.1,81.8,3.1,5.6,8.1,1.6,0.5,3.2
6124,Gabe Vincent,PG,"OCT 29, 2023",SAC,32,2,0,2,1,0,...,SAC,24.7,46.2,84.5,3.1,6.1,7.9,1.6,0.6,3.0
6125,Gabe Vincent,PG,"OCT 26, 2023",PHX,35,7,3,6,3,0,...,PHX,26.2,46.9,84.7,3.1,6.4,8.7,2.0,0.6,2.6


In [None]:

playerlogs_columns = {
    'PTS_x': 'PTS',
    'REB_x': 'REB',
    'AST_x': 'AST',
    'STL_x': 'STL',
    'BLK_x': 'BLK',
}

# Rename the columns from DefenseVsPosition with 'OPP_' prefix
opp_columns = {
    'PTS_y': 'OPP_PTSvsPos',
    'REB_y': 'OPP_REBvsPos',
    'AST_y': 'OPP_ASTvsPos',
    'STL_y': 'OPP_STLvsPos',
    'BLK_y': 'OPP_BLKvsPos',
    'TO': 'OPP_TOVvsPos'
}

# Rename columns in the DataFrame
final_data = merged_data.rename(columns={**playerlogs_columns, **opp_columns})

# Select only the desired columns
selected_columns = ['Player', 'Pos','GAME_DATE', 'OPP', 'MIN','PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'OPP_PTSvsPos', 'OPP_REBvsPos', 'OPP_ASTvsPos', 'OPP_STLvsPos', 'OPP_BLKvsPos', 'OPP_TOVvsPos']
final_data = final_data[selected_columns]


# Display the final DataFrame
print(final_data.head())


        Player Pos     GAME_DATE  OPP  MIN  PTS  REB  AST  STL  BLK  TOV  \
0  Joel Embiid   C  JAN 05, 2024  NYK   36   30   10    3    1    3    6   
1  Joel Embiid   C  JAN 02, 2024  CHI   31   31   15   10    1    2    4   
2  Joel Embiid   C  DEC 22, 2023  TOR   35   31   10    9    2    4    6   
3  Joel Embiid   C  DEC 20, 2023  MIN   36   51   12    3    2    1    2   
4  Joel Embiid   C  DEC 18, 2023  CHI   37   40   14    6    0    2    4   

   OPP_PTSvsPos  OPP_REBvsPos  OPP_ASTvsPos  OPP_STLvsPos  OPP_BLKvsPos  \
0          20.8          13.5           2.4           1.2           2.3   
1          23.1          15.4           4.5           1.2           2.2   
2          22.6          14.8           4.5           1.0           2.4   
3          21.2          12.7           4.6           1.4           1.7   
4          23.1          15.4           4.5           1.2           2.2   

   OPP_TOVvsPos  
0           2.4  
1           2.4  
2           2.4  
3           2.2  
4 

In [None]:
final_data = pd.merge(final_data, TeamOpponent, left_on='OPP', right_on='TEAM')
final_data.columns

Index(['Player', 'Pos', 'GAME_DATE', 'OPP', 'MIN', 'PTS', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'OPP_PTSvsPos', 'OPP_REBvsPos', 'OPP_ASTvsPos',
       'OPP_STLvsPos', 'OPP_BLKvsPos', 'OPP_TOVvsPos', 'TEAM', 'OPP_FGM',
       'OPP_FGA', 'OPP_FG_PCT', 'OPP_FG3M', 'OPP_FG3A', 'OPP_FG3_PCT',
       'OPP_FTM', 'OPP_FTA', 'OPP_FT_PCT', 'OPP_OREB', 'OPP_DREB', 'OPP_REB',
       'OPP_AST', 'OPP_TOV', 'OPP_STL', 'OPP_BLK', 'OPP_BLKA', 'OPP_PF',
       'OPP_PFD', 'OPP_PTS'],
      dtype='object')

In [None]:
final_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6127 entries, 0 to 6126
Data columns (total 38 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Player        6127 non-null   object 
 1   Pos           6127 non-null   object 
 2   GAME_DATE     6127 non-null   object 
 3   OPP           6127 non-null   object 
 4   MIN           6127 non-null   int64  
 5   PTS           6127 non-null   int64  
 6   REB           6127 non-null   int64  
 7   AST           6127 non-null   int64  
 8   STL           6127 non-null   int64  
 9   BLK           6127 non-null   int64  
 10  TOV           6127 non-null   int64  
 11  OPP_PTSvsPos  6127 non-null   float64
 12  OPP_REBvsPos  6127 non-null   float64
 13  OPP_ASTvsPos  6127 non-null   float64
 14  OPP_STLvsPos  6127 non-null   float64
 15  OPP_BLKvsPos  6127 non-null   float64
 16  OPP_TOVvsPos  6127 non-null   float64
 17  TEAM          6127 non-null   object 
 18  OPP_FGM       6127 non-null 

In [None]:


# Sort the dataframe by 'Player' and 'Game' columns
final_data = final_data.sort_values(by=['Player', 'GAME_DATE']).reset_index(drop=True)

# Calculate rolling averages for points, rebounds, and assists for the previous 5 games
final_data['Avg_Points_Prev_5'] = final_data.groupby('Player')['PTS'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
final_data['Avg_Rebounds_Prev_5'] = final_data.groupby('Player')['REB'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
final_data['Avg_Assists_Prev_5'] = final_data.groupby('Player')['AST'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)

# Shift the rolling averages by one row to represent the previous 5 games excluding the current game
final_data['Avg_Points_Prev_5'] = final_data.groupby('Player')['Avg_Points_Prev_5'].shift(fill_value=0)
final_data['Avg_Rebounds_Prev_5'] = final_data.groupby('Player')['Avg_Rebounds_Prev_5'].shift(fill_value=0)
final_data['Avg_Assists_Prev_5'] = final_data.groupby('Player')['Avg_Assists_Prev_5'].shift(fill_value=0)
final_data.dropna(inplace=True)
# Display the updated DataFrame
final_data.head()


Unnamed: 0,Player,Pos,GAME_DATE,OPP,MIN,PTS,REB,AST,STL,BLK,...,OPP_TOV,OPP_STL,OPP_BLK,OPP_BLKA,OPP_PF,OPP_PFD,OPP_PTS,Avg_Points_Prev_5,Avg_Rebounds_Prev_5,Avg_Assists_Prev_5
0,Aaron Gordon,SF,"DEC 02, 2023",SAC,33,17,7,3,0,1,...,13.7,6.6,5.7,4.3,18.2,19.7,117.7,0.0,0.0,0.0
1,Aaron Gordon,SF,"DEC 06, 2023",LAC,31,8,6,2,2,0,...,12.6,7.5,4.0,5.1,19.0,19.1,114.6,17.0,7.0,3.0
2,Aaron Gordon,SF,"DEC 08, 2023",HOU,22,3,6,3,0,0,...,14.3,7.0,5.4,3.9,20.7,21.6,117.6,12.5,6.5,2.5
3,Aaron Gordon,SF,"DEC 11, 2023",ATL,29,18,12,1,0,0,...,12.7,7.6,4.9,4.9,21.2,19.8,126.1,9.333333,6.333333,2.666667
4,Aaron Gordon,SF,"DEC 12, 2023",CHI,32,14,7,6,0,0,...,13.7,6.6,5.0,5.5,18.5,18.5,109.1,11.5,7.75,2.25


In [None]:
pt_pwr = pd.read_csv('/content/drive/MyDrive/PointPower.csv')
data = pd.merge(final_data,pt_pwr,on=['OPP'])
print(pt_pwr)

    OPP  PT_PWR
0   BOS  329.52
1   DEN  329.01
2   MIL  338.76
3   MIN  321.09
4   OKC  332.52
5   PHI  329.97
6   LAC  329.94
7   DAL  334.35
8   NOP  326.93
9   NYK  330.34
10  SAC  332.27
11  CLE  325.13
12  IND  345.81
13  MIA  326.26
14  ORL  324.53
15  PHX  329.60
16  HOU  323.00
17  GSW  331.74
18  LAL  326.07
19  UTA  330.02
20  BKN  331.41
21  CHI  322.51
22  ATL  342.21
23  TOR  329.54
24  MEM  319.96
25  POR  323.15
26  CHA  329.96
27  WAS  336.47
28  SAS  329.11
29  DET  330.54


In [None]:
data = data.sort_values(by=['Player', 'GAME_DATE']).reset_index(drop=True)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import pandas as pd
import xgboost as xgb

# Your data (I'll assume it's in a DataFrame named 'data')
# ...

# Unique players in the dataset
unique_players = data['Player'].unique()

# Dictionary to store the best model for each player's target variable ('PTS', 'REB', 'AST')
player_best_models = {player: {} for player in unique_players}

# ... (previous code remains unchanged)

# Iterate over unique players
for player in unique_players:
    # Filter data for the current player
    player_data = data[data['Player'] == player]

    # Check if the player has at least 20 rows
    if len(player_data) < 20:
        continue

    # Target variables
    target_variables = ['PTS', 'REB', 'AST']

    for target_variable in target_variables:
        # Features: All numeric columns except the target variable and excluded columns
        excluded_columns = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV']
        features = player_data.select_dtypes(include='number').drop(columns=[target_variable] + excluded_columns).columns.tolist()

        # Conditionally drop columns based on the target variable
        if target_variable == 'PTS':
            features = [feature for feature in features if feature not in ['Avg_Assists_Prev_5', 'Avg_Rebounds_Prev_5']]
        elif target_variable in ['REB', 'AST']:
            features = [feature for feature in features if feature not in ['Avg_Assists_Prev_5', 'Avg_Points_Prev_5']]

        # Rest of the code remains unchanged...
        # ... (Continuation from the original code)

        average_value = player_data[target_variable].mean()
        relative_threshold_percentage = 10  # Adjust this percentage based on your preference

        # Convert the percentage to an absolute value
        range_threshold = average_value * (relative_threshold_percentage / 100)

        # Features and target variable
        X = player_data[features]
        y = player_data[target_variable]

        # Normalize features
        scaler = StandardScaler()
        X_normalized = scaler.fit_transform(X)

        # Split data into train and test sets (80% train, 20% test)
        X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

        # Creating different models
        models = [
            LinearRegression(),
            Lasso(),
            Ridge(),
            DecisionTreeRegressor(),
            RandomForestRegressor(),
            xgb.XGBRegressor()  # Adding XGBoost Regressor
        ]

        # Train and evaluate each model
        best_model = None
        best_accuracy = 0  # Initialize with 0, as higher accuracy is better
        for model in models:
            model.fit(X_train, y_train)
            predictions = model.predict(X_test)

            # Calculate accuracy based on the specified range (-2 to 2)
            correct_predictions = ((predictions >= y_test - range_threshold) & (predictions <= y_test + range_threshold)).sum()
            total_predictions = len(predictions)
            accuracy = correct_predictions / total_predictions

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                print(player, target_variable, best_accuracy, range_threshold)
                best_model = model

        # Check if the best model is not None before proceeding
        if best_model is not None:
            # Get the top 10 coefficients from the best model
            if hasattr(best_model, 'coef_'):
                coef_abs = abs(best_model.coef_)
            elif hasattr(best_model, 'feature_importances_'):
                coef_abs = abs(best_model.feature_importances_)
            else:
                coef_abs = best_model.feature_importances_

            top_10_indices = coef_abs.argsort()[-15:][::-1]  # Indexes of top 10 coefficients
            top_10_features = [features[i] for i in top_10_indices]

            # Refit the best model with the top 10 features
            X_top_10 = X[top_10_features]
            scaler.fit(X_top_10)  # Refit scaler with the top 10 features
            X_top_10_normalized = scaler.transform(X_top_10)

            best_model.fit(X_top_10_normalized, y)  # Refit the best model with the top 10 features

            # Calculate accuracy for the best model using the top 10 features
            predictions_top_10 = best_model.predict(X_test[:, top_10_indices])
            correct_predictions_top_10 = (
                (predictions_top_10 >= y_test - range_threshold) & (predictions_top_10 <= y_test + range_threshold)).sum()
            total_predictions_top_10 = len(predictions_top_10)
            accuracy_top_10 = correct_predictions_top_10 / total_predictions_top_10

            # Store the best model, top 10 features, and accuracy for the player and target variable
            player_best_models[player][target_variable] = {'model': best_model,
                                                            'top_10_features': top_10_features,
                                                            'score': accuracy_top_10}



Aaron Gordon PTS 0.14285714285714285 1.3818181818181818
Aaron Gordon PTS 0.2857142857142857 1.3818181818181818
Aaron Gordon REB 0.14285714285714285 0.6575757575757577
Aaron Gordon AST 0.2857142857142857 0.3212121212121212
Aaron Nesmith PTS 0.42857142857142855 1.1714285714285715
Aaron Nesmith REB 0.14285714285714285 0.32571428571428573
Aaron Nesmith REB 0.2857142857142857 0.32571428571428573
Aaron Nesmith AST 0.14285714285714285 0.11142857142857143
Aaron Nesmith AST 0.2857142857142857 0.11142857142857143
Al Horford PTS 0.14285714285714285 0.7741935483870969
Al Horford PTS 0.2857142857142857 0.7741935483870969
Al Horford REB 0.14285714285714285 0.6870967741935484
Al Horford REB 0.2857142857142857 0.6870967741935484
Al Horford AST 0.2857142857142857 0.3032258064516129
Alex Caruso PTS 0.14285714285714285 0.9906250000000001
Alex Caruso REB 0.14285714285714285 0.353125
Alex Caruso AST 0.2857142857142857 0.25
Alperen Sengun PTS 0.125 2.155555555555556
Alperen Sengun PTS 0.25 2.155555555555556

In [None]:
t = {k: v for k, v in player_best_models.items() if v}
my_dict = {k: t[k] for k in set(t)}


# Remove duplicates using dictionary comprehension
player_best_models = {k: v for k, v in my_dict.items() if list(my_dict.keys()).count(k) == 1}

# Filter out entries with None values for 'PTS', 'REB', or 'AST'
player_best_models = {
    player: stats
    for player, stats in player_best_models.items()
    if all(stats.get(key) is not None for key in ['PTS', 'REB', 'AST'])
}

In [None]:
import pandas as pd

# Create empty lists to store player scores for each category
pts_scores = []
reb_scores = []
ast_scores = []

# Iterate through each player in the player_best_models dictionary
for player, stats in player_best_models.items():
    if 'PTS' in stats and stats['PTS'] is not None and 'score' in stats['PTS']:
        pts_score = stats['PTS']['score']  # Get the PTS score for each player
        pts_scores.append({'Player': player, 'PTS_Score': pts_score})

    if 'REB' in stats and stats['REB'] is not None and 'score' in stats['REB']:
        reb_score = stats['REB']['score']  # Get the REB score for each player
        reb_scores.append({'Player': player, 'REB_Score': reb_score})

    if 'AST' in stats and stats['AST'] is not None and 'score' in stats['AST']:
        ast_score = stats['AST']['score']  # Get the AST score for each player
        ast_scores.append({'Player': player, 'AST_Score': ast_score})

# Create DataFrames from the player scores for each category
pts_scores_df = pd.DataFrame(pts_scores)
reb_scores_df = pd.DataFrame(reb_scores)
ast_scores_df = pd.DataFrame(ast_scores)

# Find top 5 players based on PTS score
if not pts_scores_df.empty:
    top_5_pts_players = pts_scores_df.nlargest(5, 'PTS_Score')
    print("Top 5 Players based on PTS Score:")
    print(top_5_pts_players)
else:
    print("No available PTS scores for any player.")

# Find top 5 players based on REB score
if not reb_scores_df.empty:
    top_5_reb_players = reb_scores_df.nlargest(5, 'REB_Score')
    print("\nTop 5 Players based on REB Score:")
    print(top_5_reb_players)
else:
    print("\nNo available REB scores for any player.")

# Find top 5 players based on AST score
if not ast_scores_df.empty:
    top_5_ast_players = ast_scores_df.nlargest(5, 'AST_Score')
    print("\nTop 5 Players based on AST Score:")
    print(top_5_ast_players)
else:
    print("\nNo available AST scores for any player.")


Top 5 Players based on PTS Score:
             Player  PTS_Score
7       Jalen Green        1.0
11    Stephen Curry        1.0
12     LeBron James        1.0
13  Dejounte Murray        1.0
14        Max Strus        1.0

Top 5 Players based on REB Score:
                      Player  REB_Score
13           Dejounte Murray        1.0
14                 Max Strus        1.0
15           Zion Williamson        1.0
23             Aaron Nesmith        1.0
24  Kentavious Caldwell-Pope        1.0

Top 5 Players based on AST Score:
            Player  AST_Score
2    Devin Vassell        1.0
5      Ivica Zubac        1.0
9   Daniel Gafford        1.0
11   Stephen Curry        1.0
19   Kevin Huerter        1.0


In [None]:
import re

team_mapping = {
    'Atlanta': 'ATL', 'Boston': 'BOS', 'Brooklyn': 'BKN', 'Charlotte': 'CHA',
    'Chicago': 'CHI', 'Cleveland': 'CLE', 'Dallas': 'DAL', 'Denver': 'DEN',
    'Detroit': 'DET', 'Golden State': 'GSW', 'Houston': 'HOU', 'Indiana': 'IND',
    'LA': 'LAC', 'Los Angeles': 'LAL', 'Memphis': 'MEM', 'Miami': 'MIA',
    'Milwaukee': 'MIL', 'Minnesota': 'MIN', 'New Orleans': 'NOP', 'New York': 'NYK',
    'Oklahoma City': 'OKC', 'Orlando': 'ORL', 'Philadelphia': 'PHI', 'Phoenix': 'PHX',
    'Portland': 'POR', 'Sacramento': 'SAC', 'San Antonio': 'SAS', 'Toronto': 'TOR',
    'Utah': 'UTA', 'Washington': 'WAS'
}

text = """
MATCHUP
TIME
TV
TICKETS

Brooklyn
  @

Cleveland
2:00 PM
NBA TV
Tickets as low as $105

Boston
  @

Milwaukee
7:30 PM
TNT
Tickets as low as $28

Portland
  @

Oklahoma City
8:00 PM		Tickets as low as $8

New York
  @

Dallas
8:30 PM		Tickets as low as $18

Phoenix
  @

Los Angeles
10:00 PM
TNT
Tickets as low as $67
"""

lines = text.split('\n')

# Remove empty lines and header rows
relevant_lines = [line.strip() for line in lines if line.strip() and line.strip() != 'MATCHUP' and line.strip() != 'TIME' and line.strip() != 'TV' and line.strip() != 'TICKETS']

team_abbreviations = [team_mapping[line] for line in relevant_lines if line in team_mapping]
team_opponent_pairs = [(team_abbreviations[i], team_abbreviations[i + 1]) for i in range(0, len(team_abbreviations), 2)]

# Display team-opponent pairs
for pair in team_opponent_pairs:
    print(pair)


('BKN', 'CLE')
('BOS', 'MIL')
('POR', 'OKC')
('NYK', 'DAL')
('PHX', 'LAL')


In [None]:
player_teams = pd.read_csv('/content/drive/MyDrive/PlayerTeam.csv')

predict_data_temp = []
for team, opponent in team_opponent_pairs:
    # Filter players by the team
    team_players = player_teams[player_teams['Team'] == team]['Player']
    # Create rows for each player with their respective opponent
    for player in team_players:
        predict_data_temp.append({'Player': player, 'OPP': opponent})

    # Similarly, now get the players from the opponent team
    opponent_players = player_teams[player_teams['Team'] == opponent]['Player']
    # Create rows for each player with their respective opponent (opponent team)
    for player in opponent_players:
        predict_data_temp.append({'Player': player, 'OPP': team})

# Create DataFrame
predict_data = pd.DataFrame(predict_data_temp)
predict_data

Unnamed: 0,Player,OPP
0,Cam Thomas,CLE
1,Mikal Bridges,CLE
2,Cameron Johnson,CLE
3,Spencer Dinwiddie,CLE
4,Nic Claxton,CLE
...,...,...
63,D'Angelo Russell,PHX
64,Rui Hachimura,PHX
65,Taurean Prince,PHX
66,Cam Reddish,PHX


In [None]:
predict_data = pd.merge(predict_data, player_positions, on='Player', how='left')
predict_data['Pos'] = predict_data['Pos'].replace(position_mapping)

# Filter positions to keep only 'PG', 'SG', 'SF', 'PF', 'C'
predict_data = predict_data[predict_data['Pos'].isin(['PG', 'SG', 'SF', 'PF', 'C'])]
predict_data

Unnamed: 0,Player,OPP,Pos
0,Cam Thomas,CLE,PG
1,Mikal Bridges,CLE,SG
2,Cameron Johnson,CLE,PG
3,Spencer Dinwiddie,CLE,PG
4,Nic Claxton,CLE,C
...,...,...,...
63,D'Angelo Russell,PHX,SG
64,Rui Hachimura,PHX,SF
65,Taurean Prince,PHX,SF
66,Cam Reddish,PHX,PG


In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def preprocess_player_name(player_name):
    # Add your custom logic to handle variations in player names
    name_parts = player_name.split()
    if len(name_parts) == 2 and all(len(part) == 2 for part in name_parts):
        return f"{name_parts[0][0]}.{name_parts[1][0]}. {name_parts[1]}"
    else:
        return player_name

url = "https://www.numberfire.com/nba/daily-fantasy/daily-basketball-projections"
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all rows in the table
    rows = soup.find_all('tr', class_='')

    # Create lists to store player names and projected minutes
    player_names = []
    projected_minutes = []

    # Loop through each row and extract player name and projected minutes
    for row in rows:
        # Extract player name
        player_name_elem = row.find('a', class_='full')
        player_name = player_name_elem.contents[0].strip() if player_name_elem else None

        # Preprocess player name
        if player_name:
          player_name = preprocess_player_name(player_name)

        # Extract projected minutes
        minutes_elem = row.find('td', class_='min')
        minutes = minutes_elem.get_text(strip=True) if minutes_elem else None

        # Append data to lists
        player_names.append(player_name)
        projected_minutes.append(minutes)

    # Create a DataFrame named PlayerMinutes
    PlayerMinutes = pd.DataFrame({'Player': player_names, 'MIN': projected_minutes})

    # Display the DataFrame
    print(PlayerMinutes)
else:
    print("Failed to retrieve the webpage. Status code:", response.status_code)


                      Player    MIN
0                       None   None
1                       None   None
2      Giannis Antetokounmpo  36.04
3              Anthony Davis  38.61
4    Shai Gilgeous-Alexander  35.77
..                       ...    ...
123              Luka Doncic   0.00
124              Maxi Kleber   0.00
125         Ryan Arcidiacono   0.10
126               Dante Exum   0.00
127              Robin Lopez   0.09

[128 rows x 2 columns]


In [None]:
PlayerMinutes.dropna(inplace=True)
PlayerMinutes.reset_index(drop=True,inplace=True)
predict_data = pd.merge(predict_data, PlayerMinutes, on='Player', how='left')
PlayerMinutes.head(25)

Unnamed: 0,Player,MIN
0,Giannis Antetokounmpo,36.04
1,Anthony Davis,38.61
2,Shai Gilgeous-Alexander,35.77
3,LeBron James,37.62
4,Kyrie Irving,36.87
5,Julius Randle,36.58
6,Jayson Tatum,38.17
7,Kevin Durant,35.41
8,Jalen Brunson,36.56
9,Damian Lillard,35.5


In [None]:
temp_df = predict_data.merge(DefenseVsPosition, left_on=['OPP', 'Pos'], right_on=['Team', 'Position'], how='left')

print(temp_df.shape)
opp_columns = {
    'PTS': 'OPP_PTSvsPos',
    'REB': 'OPP_REBvsPos',
    'AST': 'OPP_ASTvsPos',
    'STL': 'OPP_STLvsPos',
    'BLK': 'OPP_BLKvsPos',
    'TO': 'OPP_TOVvsPos'
}
# Rename columns in the DataFrame
today_data = temp_df.rename(columns={**opp_columns})
print(today_data.shape)
selected_columns = ['Player', 'Pos', 'OPP','MIN','OPP_PTSvsPos', 'OPP_REBvsPos', 'OPP_ASTvsPos', 'OPP_STLvsPos', 'OPP_BLKvsPos', 'OPP_TOVvsPos']
today_data = today_data[selected_columns]
today_data = pd.merge(today_data, TeamOpponent, left_on='OPP', right_on='TEAM')
print(today_data.head())
today_data = pd.merge(today_data,pt_pwr,on=['OPP']).dropna()
print(today_data.shape)

(68, 15)
(68, 15)
              Player Pos  OPP  MIN  OPP_PTSvsPos  OPP_REBvsPos  OPP_ASTvsPos  \
0         Cam Thomas  PG  CLE  NaN          24.5           5.9           7.2   
1      Mikal Bridges  SG  CLE  NaN          19.6           6.2           4.4   
2    Cameron Johnson  PG  CLE  NaN          24.5           5.9           7.2   
3  Spencer Dinwiddie  PG  CLE  NaN          24.5           5.9           7.2   
4        Nic Claxton   C  CLE  NaN          22.6          14.2           3.9   

   OPP_STLvsPos  OPP_BLKvsPos  OPP_TOVvsPos  ... OPP_DREB  OPP_REB  OPP_AST  \
0           2.0           0.6           3.1  ...     31.0     39.9     24.0   
1           1.5           0.7           2.6  ...     31.0     39.9     24.0   
2           2.0           0.6           3.1  ...     31.0     39.9     24.0   
3           2.0           0.6           3.1  ...     31.0     39.9     24.0   
4           1.3           2.5           2.8  ...     31.0     39.9     24.0   

   OPP_TOV  OPP_STL  OPP_B

In [None]:
today_data.dropna(inplace=True)

In [None]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
import pandas as pd

# Get the list of NBA players
nba_players = players.get_players()

# Create a dictionary mapping player names to player IDs
player_name_to_id = {player['full_name']: player['id'] for player in nba_players}

# Function to fetch last 5 games' stats for a player
def get_last_5_games_stats(player_id, season):
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
    gamelog_data = gamelog.get_data_frames()[0]
    last_5_games = gamelog_data.head(6)  # Considering the data is already sorted by the latest games
    return last_5_games

# Create an empty DataFrame to store the results
all_players_last_5_stats = pd.DataFrame(columns=['Player', 'PTS', 'REB', 'AST', 'MIN'])

# Loop through each player in the DataFrame and fetch their last 5 games' stats
for player_name in today_data['Player']:
    if player_name in player_name_to_id:
        player_id = player_name_to_id[player_name]
        player_stats = get_last_5_games_stats(player_id, '2023-24')  # Replace '2023-24' with the desired season
        player_stats['Player'] = player_name  # Add a 'Player' column with player's name
        all_players_last_5_stats = pd.concat([all_players_last_5_stats, player_stats])


# Select only the desired columns
all_players_last_5_stats = all_players_last_5_stats[['Player', 'PTS', 'REB', 'AST']]

# Rename columns for clarity
all_players_last_5_stats.rename(columns={
    'Player': 'Player',
    'PTS': 'Avg_Points_Prev_5',
    'REB': 'Avg_Rebounds_Prev_5',
    'AST': 'Avg_Assists_Prev_5'
}, inplace=True)

# Calculate the average per-minute stats for each player
average_stats_per_minute = all_players_last_5_stats.groupby('Player').mean().reset_index()

print("\nAverage per-minute statistics for each player:")
print(average_stats_per_minute)



Average per-minute statistics for each player:
                     Player  Avg_Points_Prev_5  Avg_Rebounds_Prev_5  \
0                Al Horford           8.666667             5.833333   
1           Anfernee Simons          20.833333             4.000000   
2             Anthony Davis          29.333333            11.833333   
3             Austin Reaves          15.166667             3.166667   
4              Bobby Portis           7.500000             4.833333   
5              Bradley Beal          19.833333             4.500000   
6               Brook Lopez          12.500000             5.833333   
7               Cam Reddish           6.166667             1.666667   
8             Chet Holmgren          19.166667             5.833333   
9          D'Angelo Russell          11.333333             1.166667   
10           Damian Lillard          20.333333             3.333333   
11            Deandre Ayton          13.500000            10.666667   
12        Derrick Jones Jr.  

In [None]:
average_stats = average_stats_per_minute.copy()
today_data = pd.merge(today_data,average_stats,on = 'Player')

In [None]:
t = {k: v for k, v in player_best_models.items() if v}
my_dict = {k: t[k] for k in set(t)}


# Remove duplicates using dictionary comprehension
player_best_models = {k: v for k, v in my_dict.items() if list(my_dict.keys()).count(k) == 1}

# Filter out entries with None values for 'PTS', 'REB', or 'AST'
player_best_models = {
    player: stats
    for player, stats in player_best_models.items()
    if all(stats.get(key) is not None for key in ['PTS', 'REB', 'AST'])
}

player_best_models

{'Shai Gilgeous-Alexander': {'PTS': {'model': Lasso(),
   'top_10_features': ['MIN',
    'Avg_Points_Prev_5',
    'OPP_BLKA',
    'OPP_FT_PCT',
    'OPP_STL',
    'OPP_STLvsPos',
    'OPP_BLK',
    'OPP_ASTvsPos',
    'OPP_FG3A',
    'OPP_BLKvsPos',
    'OPP_TOVvsPos',
    'OPP_FGM',
    'OPP_FGA',
    'OPP_REBvsPos',
    'OPP_PTSvsPos'],
   'score': 0.7142857142857143},
  'REB': {'model': Lasso(),
   'top_10_features': ['PT_PWR',
    'OPP_FTM',
    'OPP_PTSvsPos',
    'OPP_REBvsPos',
    'OPP_ASTvsPos',
    'OPP_STLvsPos',
    'OPP_BLKvsPos',
    'OPP_TOVvsPos',
    'OPP_FGM',
    'OPP_FGA',
    'OPP_FG_PCT',
    'OPP_FG3M',
    'OPP_FG3A',
    'OPP_FG3_PCT',
    'OPP_FTA'],
   'score': 0.14285714285714285},
  'AST': {'model': Lasso(),
   'top_10_features': ['OPP_PTSvsPos',
    'OPP_FTM',
    'PT_PWR',
    'OPP_REBvsPos',
    'OPP_ASTvsPos',
    'OPP_STLvsPos',
    'OPP_BLKvsPos',
    'OPP_TOVvsPos',
    'OPP_FGM',
    'OPP_FGA',
    'OPP_FG_PCT',
    'OPP_FG3M',
    'OPP_FG3A',
    '

In [None]:
t = {k: v for k, v in player_best_models.items() if v}
my_dict = {k: t[k] for k in set(t)}


# Remove duplicates using dictionary comprehension
player_best_models = {k: v for k, v in my_dict.items() if list(my_dict.keys()).count(k) == 1}

# Filter out entries with None values for 'PTS', 'REB', or 'AST'
player_best_models = {
    player: stats
    for player, stats in player_best_models.items()
    if all(stats.get(key) is not None for key in ['PTS', 'REB', 'AST'])
}

player_best_models

{'Shai Gilgeous-Alexander': {'PTS': {'model': Lasso(),
   'top_10_features': ['MIN',
    'Avg_Points_Prev_5',
    'OPP_BLKA',
    'OPP_FT_PCT',
    'OPP_STL',
    'OPP_STLvsPos',
    'OPP_BLK',
    'OPP_ASTvsPos',
    'OPP_FG3A',
    'OPP_BLKvsPos',
    'OPP_TOVvsPos',
    'OPP_FGM',
    'OPP_FGA',
    'OPP_REBvsPos',
    'OPP_PTSvsPos'],
   'score': 0.7142857142857143},
  'REB': {'model': Lasso(),
   'top_10_features': ['PT_PWR',
    'OPP_FTM',
    'OPP_PTSvsPos',
    'OPP_REBvsPos',
    'OPP_ASTvsPos',
    'OPP_STLvsPos',
    'OPP_BLKvsPos',
    'OPP_TOVvsPos',
    'OPP_FGM',
    'OPP_FGA',
    'OPP_FG_PCT',
    'OPP_FG3M',
    'OPP_FG3A',
    'OPP_FG3_PCT',
    'OPP_FTA'],
   'score': 0.14285714285714285},
  'AST': {'model': Lasso(),
   'top_10_features': ['OPP_PTSvsPos',
    'OPP_FTM',
    'PT_PWR',
    'OPP_REBvsPos',
    'OPP_ASTvsPos',
    'OPP_STLvsPos',
    'OPP_BLKvsPos',
    'OPP_TOVvsPos',
    'OPP_FGM',
    'OPP_FGA',
    'OPP_FG_PCT',
    'OPP_FG3M',
    'OPP_FG3A',
    '

In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Create separate dictionaries for points, rebounds, assists, blocks, steals, and turnovers predictions
points_predictions = {'Player': [], 'Points': [], 'R2_Score': []}
rebounds_predictions = {'Player': [], 'Rebounds': [], 'R2_Score': []}
assists_predictions = {'Player': [], 'Assists': [], 'R2_Score': []}


# Iterate through each player in 'today_data'
for player, player_data in today_data.groupby('Player'):
    # Check if the player has a fitted model in player_best_models
    if player in player_best_models:
        # Get the selected models and their scores for the player
        selected_models = player_best_models[player]
        if selected_models:
            # Prepare predictions for each target variable
            for target_variable, model_info in selected_models.items():
                # Get the model for the target variable
                model = model_info['model']
                r_squared = model_info.get('score', 0)  # Default to 0 if R-squared is missing

                # Check if the model exists and R-squared is above 0.5 for the target variable
                if model and r_squared > 0.0:
                    # Features for the current player from 'today_data' based on the top features
                    top_features = model_info.get('top_10_features', [])
                    X_player_today = player_data[top_features]

                    # Check if X_player_today is not empty
                    if not X_player_today.empty:
                        scaler = StandardScaler()
                        X_player_today_normalized = scaler.fit_transform(X_player_today)  # Normalize features

                        # Make predictions using the selected model
                        prediction = model.predict(X_player_today_normalized)

                        # Round predictions to two decimal places and convert to float
                        rounded_prediction = round(prediction[0], 2)

                        # Append predictions and R-squared scores to the respective prediction dictionary based on the target variable
                        if target_variable == 'PTS':
                            points_predictions['Player'].append(player)
                            points_predictions['Points'].append(float(rounded_prediction))
                            points_predictions['R2_Score'].append(r_squared)
                        elif target_variable == 'REB':
                            rebounds_predictions['Player'].append(player)
                            rebounds_predictions['Rebounds'].append(float(rounded_prediction))
                            rebounds_predictions['R2_Score'].append(r_squared)
                        elif target_variable == 'AST':
                            assists_predictions['Player'].append(player)
                            assists_predictions['Assists'].append(float(rounded_prediction))
                            assists_predictions['R2_Score'].append(r_squared)

# Convert the lists of predictions to DataFrames
df_points_predictions = pd.DataFrame(points_predictions)
df_rebounds_predictions = pd.DataFrame(rebounds_predictions)
df_assists_predictions = pd.DataFrame(assists_predictions)

# Sort DataFrames by R2 Score in descending order
df_points_predictions = df_points_predictions.sort_values(by='R2_Score', ascending=False)
df_rebounds_predictions = df_rebounds_predictions.sort_values(by='R2_Score', ascending=False)
df_assists_predictions = df_assists_predictions.sort_values(by='R2_Score', ascending=False)

# Display predictions for points, rebounds, assists, blocks, steals, and turnovers
print("Points Predictions:")
print(df_points_predictions)

print("\nRebounds Predictions:")
print(df_rebounds_predictions)

print("\nAssists Predictions:")
print(df_assists_predictions)


Points Predictions:
                     Player     Points  R2_Score
0                Al Horford   6.600000  1.000000
14            Grayson Allen   9.000000  1.000000
35            Rui Hachimura  18.000000  1.000000
34               OG Anunoby  14.070000  1.000000
33         Matisse Thybulle   6.000000  1.000000
30            Luguentz Dort  12.000000  1.000000
29             LeBron James  18.090000  1.000000
26             Jusuf Nurkic  18.410000  1.000000
25            Julius Randle  19.959999  1.000000
21              Josh Giddey   7.000000  1.000000
41           Toumani Camara   4.000000  1.000000
32          Malcolm Brogdon  15.570000  0.833333
38  Shai Gilgeous-Alexander  31.400000  0.714286
37           Shaedon Sharpe  16.230000  0.714286
7            Damian Lillard  25.150000  0.714286
28       Kristaps Porzingis  19.190000  0.666667
12    Giannis Antetokounmpo  30.230000  0.625000
17           Jalen Williams  18.180000  0.571429
31              Luka Doncic  35.240000  0.571429


In [None]:
props_read = pd.read_csv('/content/nba-player-props-rotowire.csv',skiprows=1)
print(props_read.head())
props_read[props_read['Player'] == 'Derrick Jones']

                    Player Team   Opp   PTS   REB  AST  3PT  BLK  STL  PTS.1  \
0    Giannis Antetokounmpo  MIL   BOS  33.5  12.5  6.5  0.5  NaN  NaN   33.5   
1  Shai Gilgeous-Alexander  OKC   POR  31.5   6.5  5.5  1.5  NaN  NaN   31.5   
2             Kyrie Irving  DAL   NYK  30.5   6.5  6.5  2.5  NaN  NaN   30.5   
3            Anthony Davis  LAL   PHX  28.5  12.5  3.5  0.5  NaN  NaN   28.5   
4            Julius Randle  NYK  @DAL  27.5   8.5  4.5  1.5  NaN  NaN   27.5   

   ...  AST.2  3PT.2  BLK.2  STL.2  PTS.3  REB.3  AST.3  3PT.3  BLK.3  STL.3  
0  ...    6.5    0.5    0.5    1.5   33.5   13.5    6.5    NaN    NaN    NaN  
1  ...    5.5    1.5    0.5    2.5   32.5    6.5    5.5    NaN    NaN    NaN  
2  ...    6.5    2.5    0.5    1.5   30.5    6.5    6.5    NaN    NaN    NaN  
3  ...    3.5    0.5    2.5    1.5   29.5   12.5    3.5    NaN    NaN    NaN  
4  ...    4.5    1.5    0.5    0.5   28.5    9.5    4.5    NaN    NaN    NaN  

[5 rows x 27 columns]


Unnamed: 0,Player,Team,Opp,PTS,REB,AST,3PT,BLK,STL,PTS.1,...,AST.2,3PT.2,BLK.2,STL.2,PTS.3,REB.3,AST.3,3PT.3,BLK.3,STL.3
32,Derrick Jones,DAL,NYK,12.5,5.5,,1.5,,,12.5,...,1.5,1.5,0.5,0.5,12.5,4.5,,,,


In [None]:


# Finding all columns related to PTS, REB, and AST
cols_PTS = [col for col in props_read.columns if 'PTS' in col]
cols_REB = [col for col in props_read.columns if 'REB' in col]
cols_AST = [col for col in props_read.columns if 'AST' in col]

# Extracting columns related to PTS, REB, and AST
PTS_columns = props_read[cols_PTS]
REB_columns = props_read[cols_REB]
AST_columns = props_read[cols_AST]

# Calculating averages for PTS, REB, AST
props_read['Points'] = PTS_columns.mean(axis=1, skipna=True)
props_read['Rebounds'] = REB_columns.mean(axis=1, skipna=True)
props_read['Assists'] = AST_columns.mean(axis=1, skipna=True)

# Creating a new DataFrame with selected columns
player_props = props_read[['Player', 'Points', 'Rebounds', 'Assists']]


player_props

Unnamed: 0,Player,Points,Rebounds,Assists
0,Giannis Antetokounmpo,33.5,12.75,6.25
1,Shai Gilgeous-Alexander,31.75,6.25,5.5
2,Kyrie Irving,30.5,6.25,6.5
3,Anthony Davis,28.75,12.5,3.5
4,Julius Randle,27.75,9.0,4.5
5,Jalen Brunson,27.0,3.5,7.5
6,Kevin Durant,27.166667,6.5,5.0
7,Donovan Mitchell,27.0,5.5,6.5
8,Jayson Tatum,27.5,8.5,4.5
9,LeBron James,26.5,6.5,7.75


In [None]:
df_points_predictions['Player'] = df_points_predictions['Player'].str.replace(' Jr.', '')
df_rebounds_predictions['Player'] = df_rebounds_predictions['Player'].str.replace(' Jr.', '')
df_assists_predictions['Player'] = df_assists_predictions['Player'].str.replace(' Jr.', '')
player_props['Player'] = player_props['Player'].str.replace(' Jr.', '')

  df_points_predictions['Player'] = df_points_predictions['Player'].str.replace(' Jr.', '')
  df_rebounds_predictions['Player'] = df_rebounds_predictions['Player'].str.replace(' Jr.', '')
  df_assists_predictions['Player'] = df_assists_predictions['Player'].str.replace(' Jr.', '')
  player_props['Player'] = player_props['Player'].str.replace(' Jr.', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  player_props['Player'] = player_props['Player'].str.replace(' Jr.', '')


In [None]:
df_points_predictions['Player'].unique()

array(['Al Horford', 'Grayson Allen', 'Rui Hachimura', 'OG Anunoby',
       'Matisse Thybulle', 'Luguentz Dort', 'LeBron James',
       'Jusuf Nurkic', 'Julius Randle', 'Josh Giddey', 'Toumani Camara',
       'Malcolm Brogdon', 'Shai Gilgeous-Alexander', 'Shaedon Sharpe',
       'Damian Lillard', 'Kristaps Porzingis', 'Giannis Antetokounmpo',
       'Jalen Williams', 'Luka Doncic', 'Jerami Grant', 'Cam Reddish',
       'Josh Hart', 'Jalen Brunson', 'Devin Booker', 'Jrue Holiday',
       'Grant Williams', 'Jayson Tatum', 'Chet Holmgren', 'Derrick Jones',
       'Austin Reaves', 'Josh Green', 'Eric Gordon', 'Khris Middleton',
       'Derrick White', 'Jaylen Brown', 'Taurean Prince', 'Brook Lopez',
       'Bobby Portis', 'Anthony Davis', 'Scoot Henderson', 'Tim Hardaway',
       'Isaiah Hartenstein'], dtype=object)

In [None]:
prediction_dfs = [df_points_predictions, df_rebounds_predictions, df_assists_predictions]
prediction_columns = ['Points', 'Rebounds', 'Assists']
stat_names = ['PTS', 'REB', 'AST']
result_pts = pd.DataFrame(columns=['Player', 'Prediction', 'Prop', 'Difference', 'R2_Score'])
result_reb = pd.DataFrame(columns=['Player', 'Prediction', 'Prop', 'Difference', 'R2_Score'])
result_ast = pd.DataFrame(columns=['Player', 'Prediction', 'Prop', 'Difference', 'R2_Score'])

# ... (Previous code remains the same)

# Loop through each prediction DataFrame and compare with player_props
for pred_df, column, stat_name in zip(prediction_dfs, prediction_columns, stat_names):
    for index, row in pred_df.iterrows():
        player = row['Player']
        pred = row[column]
        r2_score = row['R2_Score']

        # Check if the player exists in both DataFrames
        if (player_props['Player'] == player).any() and (player in player_props['Player'].values):
            if stat_name == 'PTS' and 'Points' in player_props.columns:
                difference = pred - player_props[player_props['Player'] == player]['Points'].iloc[0]
                result_pts = result_pts.append({'Player': player, 'Prediction': pred, 'Prop': player_props[player_props['Player'] == player]['Points'].iloc[0], 'Difference': difference, 'R2_Score': r2_score}, ignore_index=True)
            elif stat_name == 'REB' and 'Rebounds' in player_props.columns:
                difference = pred - player_props[player_props['Player'] == player]['Rebounds'].iloc[0]
                result_reb = result_reb.append({'Player': player, 'Prediction': pred, 'Prop':player_props[player_props['Player'] == player]['Rebounds'].iloc[0], 'Difference': difference, 'R2_Score': r2_score}, ignore_index=True)
            elif stat_name == 'AST' and 'Assists' in player_props.columns:
                difference = pred - player_props[player_props['Player'] == player]['Assists'].iloc[0]
                result_ast = result_ast.append({'Player': player, 'Prediction': pred, 'Prop': player_props[player_props['Player'] == player]['Assists'].iloc[0], 'Difference': difference, 'R2_Score': r2_score}, ignore_index=True)



  result_pts = result_pts.append({'Player': player, 'Prediction': pred, 'Prop': player_props[player_props['Player'] == player]['Points'].iloc[0], 'Difference': difference, 'R2_Score': r2_score}, ignore_index=True)
  result_pts = result_pts.append({'Player': player, 'Prediction': pred, 'Prop': player_props[player_props['Player'] == player]['Points'].iloc[0], 'Difference': difference, 'R2_Score': r2_score}, ignore_index=True)
  result_pts = result_pts.append({'Player': player, 'Prediction': pred, 'Prop': player_props[player_props['Player'] == player]['Points'].iloc[0], 'Difference': difference, 'R2_Score': r2_score}, ignore_index=True)
  result_pts = result_pts.append({'Player': player, 'Prediction': pred, 'Prop': player_props[player_props['Player'] == player]['Points'].iloc[0], 'Difference': difference, 'R2_Score': r2_score}, ignore_index=True)
  result_pts = result_pts.append({'Player': player, 'Prediction': pred, 'Prop': player_props[player_props['Player'] == player]['Points'].iloc[0]

In [None]:
# Round numerical columns including 'Prediction', 'Prop', 'Difference', 'R2_Score' to 2 decimal points
columns_to_round = ['Prediction', 'Prop', 'Difference', 'R2_Score']
result_pts[columns_to_round] = result_pts[columns_to_round].round(2)
result_reb[columns_to_round] = result_reb[columns_to_round].round(2)
result_ast[columns_to_round] = result_ast[columns_to_round].round(2)

# Sort the DataFrames by the 'Difference' column
result_pts_sorted = result_pts.sort_values(by='R2_Score', ascending=False)
result_reb_sorted = result_reb.sort_values(by='R2_Score', ascending=False)
result_ast_sorted = result_ast.sort_values(by='R2_Score', ascending=False)


result_pts_sorted = result_pts_sorted.rename(columns={'R2_Score': 'Model Score'})
result_reb_sorted = result_reb_sorted.rename(columns={'R2_Score': 'Model Score'})
result_ast_sorted = result_ast_sorted.rename(columns={'R2_Score': 'Model Score'})

# Display the sorted and rounded DataFrames for each stat with modified column names
print("\033[1mResults for Points (Sorted by Difference):\033[0m")  # \033[1m and \033[0m for bold and reset
print(result_pts_sorted.to_string(index=False))

print("\n\033[1mResults for Rebounds (Sorted by Difference):\033[0m")
print(result_reb_sorted.to_string(index=False))

print("\n\033[1mResults for Assists (Sorted by Difference):\033[0m")
print(result_ast_sorted.to_string(index=False))



[1mResults for Points (Sorted by Difference):[0m
                 Player  Prediction  Prop  Difference  Model Score
          Grayson Allen        9.00 12.50       -3.50         1.00
          Luguentz Dort       12.00 10.50        1.50         1.00
           LeBron James       18.09 26.50       -8.41         1.00
           Jusuf Nurkic       18.41 11.75        6.66         1.00
          Julius Randle       19.96 27.75       -7.79         1.00
            Josh Giddey        7.00 12.00       -5.00         1.00
         Toumani Camara        4.00  5.50       -1.50         1.00
             OG Anunoby       14.07 13.50        0.57         1.00
Shai Gilgeous-Alexander       31.40 31.75       -0.35         0.71
         Damian Lillard       25.15 24.50        0.65         0.71
     Kristaps Porzingis       19.19 20.50       -1.31         0.67
  Giannis Antetokounmpo       30.23 33.50       -3.27         0.62
           Jerami Grant       20.65 18.50        2.15         0.57
           