In [1]:
import os
import json
import pandas as pd
from datetime import datetime, timedelta
from nba_api.live.nba.endpoints import scoreboard
import warnings

try:
    # Get Live NBA schedule and process
    print("Fetching live NBA data...")

    # Today's Score Board
    games = scoreboard.ScoreBoard()

    # Get the dictionary representation of the data
    games_data = games.get_dict()

    # Extract the 'games' key from the dictionary
    games_list = games_data['scoreboard']['games']

    # Convert the list of games to a DataFrame
    games_df = pd.DataFrame(games_list)

    # Check if games_df is empty
    if games_df.empty:
        print("No games today")
        # You can exit the script or add any other logic you need
        

    # Continue with the rest of your code

except Exception as e:
    print(f"An error occurred: {e}")


Fetching live NBA data...
No games today


In [2]:
games_df

In [5]:
processed_data_path = 'datasets/processed_data.csv'
read_file = os.path.join(os.path.dirname(os.getcwd()), processed_data_path)
# Read the processed data CSV file to get column headers
processed_data_df = pd.read_csv(read_file, index_col = 0)
processed_data_df

Unnamed: 0_level_0,mp_10_x,fg_10_x,fga_10_x,fg%_10_x,3p_10_x,3pa_10_x,3p%_10_x,ft_10_x,fta_10_x,ft%_10_x,...,home_10_y,won_10_y,team_10_y,season_10_y,team_opp_next_y,team_y,home_next,date_next,season,target
team_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SAC,0.000,0.463636,0.420588,0.419378,0.286207,0.268182,0.456888,0.358140,0.363492,0.626721,...,0.4,1,TOR,2016,SAC,TOR,1.0,2015-11-15,2016,1
TOR,0.000,0.375000,0.323529,0.390431,0.224138,0.239394,0.382423,0.548837,0.471429,0.765111,...,0.8,1,SAC,2016,TOR,SAC,0.0,2015-11-15,2016,0
CLE,0.050,0.438636,0.355882,0.447608,0.317241,0.351515,0.401306,0.372093,0.373016,0.636639,...,0.3,0,DET,2016,CLE,DET,0.0,2015-11-17,2016,0
GSW,0.025,0.534091,0.416176,0.510526,0.431034,0.398485,0.499644,0.369767,0.338095,0.736756,...,0.3,0,TOR,2016,GSW,TOR,1.0,2015-11-17,2016,1
DEN,0.000,0.395455,0.367647,0.391627,0.265517,0.303030,0.389786,0.390698,0.365079,0.709218,...,0.4,0,NOP,2016,DEN,NOP,0.0,2015-11-17,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BOS,0.000,0.386364,0.283824,0.441627,0.455172,0.471212,0.439667,0.458140,0.393651,0.765694,...,0.6,0,GSW,2022,BOS,GSW,1.0,2022-06-10,2022,0
GSW,0.000,0.502273,0.388235,0.500478,0.493103,0.501515,0.458789,0.309302,0.276190,0.741657,...,0.5,0,BOS,2022,GSW,BOS,1.0,2022-06-13,2022,1
BOS,0.000,0.381818,0.292647,0.428230,0.468966,0.477273,0.448100,0.434884,0.373016,0.764177,...,0.6,1,GSW,2022,BOS,GSW,0.0,2022-06-13,2022,0
GSW,0.000,0.502273,0.364706,0.517703,0.455172,0.481818,0.440736,0.320930,0.282540,0.757993,...,0.5,0,BOS,2022,GSW,BOS,0.0,2022-06-16,2022,1


In [6]:
#Clean dataframe

# Columns to drop
columns_to_drop = ['gameCode','gameStatusText','gameId','gameStatus', 'period', 'gameClock', 'gameEt', 'regulationPeriods', 'ifNecessary', 'seriesGameNumber', 'seriesText', 'seriesConference', 'poRoundDesc', 'gameSubtype', 'gameLeaders', 'pbOdds']

# Drop specified columns and any columns with null values
games_df = games_df.drop(columns=columns_to_drop).dropna(axis=1, how='all')

# Extract 'teamName' from 'homeTeam' and 'awayTeam'
games_df['homeTeam'] = games_df['homeTeam'].apply(lambda x: x['teamName'] if isinstance(x, dict) and 'teamName' in x else None)
games_df['awayTeam'] = games_df['awayTeam'].apply(lambda x: x['teamName'] if isinstance(x, dict) and 'teamName' in x else None)

games_df

KeyError: "['gameCode', 'gameStatusText', 'gameId', 'gameStatus', 'period', 'gameClock', 'gameEt', 'regulationPeriods', 'ifNecessary', 'seriesGameNumber', 'seriesText', 'seriesConference', 'poRoundDesc', 'gameSubtype', 'gameLeaders', 'pbOdds'] not found in axis"

In [10]:
# Read abbreviations from the JSON file into a dictionary
json_path = os.path.join('datasets', 'teams.json')
file = os.path.join(os.path.dirname(os.getcwd()), json_path)
with open(file, 'r') as file:
    team_name_to_abbr = json.load(file)

# Function to replace team names with abbreviations
def replace_team_with_abbr(team_name):
    for abbr, full_name in team_name_to_abbr.items():
        if team_name in full_name:
            return abbr
    return team_name

# Replace team names with abbreviations in the homeTeam and awayTeam columns
games_df['homeTeam'] = games_df['homeTeam'].apply(replace_team_with_abbr)
games_df['awayTeam'] = games_df['awayTeam'].apply(replace_team_with_abbr)

games_df


KeyError: 'homeTeam'

In [11]:
# Split 'gameTimeUTC' into 'date' and 'time' manually
games_df['date'] = games_df['gameTimeUTC'].str.split('T').str[0]
games_df['time'] = games_df['gameTimeUTC'].str.split('T').str[1].str[:-1]  # Remove the 'Z' at the end

# Convert 'time' column to PST in AM/PM format
games_df['time'] = pd.to_datetime(games_df['time'], format='%H:%M:%S') - pd.Timedelta(hours=8)
games_df['time'] = games_df['time'].dt.strftime('%I:%M %p').str.lstrip('0')  # Remove leading zero

# Drop the 'gameTimeUTC' column
games_df = games_df.drop('gameTimeUTC', axis=1)

games_df


KeyError: 'gameTimeUTC'

In [12]:
# Specify the date and time format
date_time_format = "%Y-%m-%d %I:%M %p"

# Combine 'date' and 'time' columns and convert to datetime
games_df['datetime'] = pd.to_datetime(games_df['date'] + ' ' + games_df['time'], format=date_time_format)

# Sort DataFrame by date and time
games_df = games_df.sort_values(by=['datetime'])

# Reset the index
games_df = games_df.reset_index(drop=True)

# Drop the 'datetime' column if you don't need it in the final CSV
games_df = games_df.drop(columns=['datetime'])

# Specify the relative path
relative_path = 'datasets'

# Save the sorted and reset index DataFrame to a CSV file in the 'datasets' folder
games_df.to_csv(os.path.join(relative_path, 'games.csv'), index=False)
games_df

KeyError: 'date'

In [3]:
# Suppress FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning, module="pandas")

# Specify the relative paths to the CSV files
games_path = 'datasets/games.csv'
predict_data_path = 'datasets/predict.csv'
processed_data_path = 'datasets/processed_data.csv'

read_file = os.path.join(os.path.dirname(os.getcwd()), processed_data_path)
game_file = os.path.join(os.path.dirname(os.getcwd()), games_path)
predict_file = os.path.join(os.path.dirname(os.getcwd()), predict_data_path)

# Read the processed data CSV file to get column headers
processed_data_df = pd.read_csv(read_file)
column_headers = processed_data_df.columns.tolist()

# Create an empty DataFrame with the column headers
predict_df = pd.DataFrame(columns=column_headers)

# Read the games.csv file
games_df = pd.read_csv(game_file)

# # Ignore FutureWarnings related to setting items of incompatible dtype
# warnings.filterwarnings("ignore", category=FutureWarning, module="pandas")

# Iterate through each game in games_df
for index, row in games_df.iterrows():
    # Create a row for the home team
    home_row = pd.Series(index=column_headers)
    home_row['team_x'] = row['homeTeam']
    home_row['team_opp_next_x'] = row['awayTeam']
    home_row['team_y'] = row['awayTeam']
    home_row['team_opp_next_y'] = row['homeTeam']
    home_row['home_next'] = 1
    home_row['date_next'] = row['date']
    
    # Use df.loc to append the row to predict_df
    predict_df.loc[len(predict_df)] = home_row

    # Create a row for the away team
    away_row = pd.Series(index=column_headers)
    away_row['team_x'] = row['awayTeam']
    away_row['team_opp_next_x'] = row['homeTeam']
    away_row['home_next'] = 0
    away_row['team_y'] = row['homeTeam']
    away_row['team_opp_next_y'] = row['awayTeam']
    away_row['date_next'] = row['date']

    # Use df.loc to append the row to predict_df
    predict_df.loc[len(predict_df)] = away_row
    
    # Select columns containing 'season'
    season_columns = predict_df.filter(like='season')

    # Set all cells in the selected columns to 2023
    predict_df.loc[len(predict_df) - 1, season_columns.columns] = 2023
    predict_df.loc[len(predict_df) - 2, season_columns.columns] = 2023

# Display the resulting DataFrame
predict_df


  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']
  home_row['team_x'] = row['homeTeam']
  away_row['team_x'] = row['awayTeam']


Unnamed: 0,team_x,mp_10_x,fg_10_x,fga_10_x,fg%_10_x,3p_10_x,3pa_10_x,3p%_10_x,ft_10_x,fta_10_x,...,home_10_y,won_10_y,team_10_y,season_10_y,team_opp_next_y,team_y,home_next,date_next,season,target
0,LAC,,,,,,,,,,...,,,,2023,LAC,GSW,1,2023-12-02,2023,
1,GSW,,,,,,,,,,...,,,,2023,GSW,LAC,0,2023-12-02,2023,
2,CHA,,,,,,,,,,...,,,,2023,CHA,MIN,1,2023-12-02,2023,
3,MIN,,,,,,,,,,...,,,,2023,MIN,CHA,0,2023-12-02,2023,
4,BKN,,,,,,,,,,...,,,,2023,BKN,ORL,1,2023-12-03,2023,
5,ORL,,,,,,,,,,...,,,,2023,ORL,BKN,0,2023-12-03,2023,
6,DET,,,,,,,,,,...,,,,2023,DET,CLE,1,2023-12-03,2023,
7,CLE,,,,,,,,,,...,,,,2023,CLE,DET,0,2023-12-03,2023,
8,MIA,,,,,,,,,,...,,,,2023,MIA,IND,1,2023-12-03,2023,
9,IND,,,,,,,,,,...,,,,2023,IND,MIA,0,2023-12-03,2023,


In [166]:
for column in processed_data_df.columns:
    print(column)

team_x
mp_10_x
fg_10_x
fga_10_x
fg%_10_x
3p_10_x
3pa_10_x
3p%_10_x
ft_10_x
fta_10_x
ft%_10_x
orb_10_x
drb_10_x
trb_10_x
ast_10_x
stl_10_x
blk_10_x
tov_10_x
pf_10_x
pts_10_x
ts%_10_x
efg%_10_x
3par_10_x
ftr_10_x
orb%_10_x
drb%_10_x
trb%_10_x
ast%_10_x
stl%_10_x
blk%_10_x
tov%_10_x
usg%_10_x
ortg_10_x
drtg_10_x
fg_max_10_x
fga_max_10_x
fg%_max_10_x
3p_max_10_x
3pa_max_10_x
3p%_max_10_x
ft_max_10_x
fta_max_10_x
ft%_max_10_x
orb_max_10_x
drb_max_10_x
trb_max_10_x
ast_max_10_x
stl_max_10_x
blk_max_10_x
tov_max_10_x
pf_max_10_x
pts_max_10_x
+/-_max_10_x
ts%_max_10_x
efg%_max_10_x
3par_max_10_x
ftr_max_10_x
orb%_max_10_x
drb%_max_10_x
trb%_max_10_x
ast%_max_10_x
stl%_max_10_x
blk%_max_10_x
tov%_max_10_x
usg%_max_10_x
ortg_max_10_x
drtg_max_10_x
total_10_x
home_10_x
won_10_x
team_10_x
season_10_x
team_opp_next_x
mp_10_y
fg_10_y
fga_10_y
fg%_10_y
3p_10_y
3pa_10_y
3p%_10_y
ft_10_y
fta_10_y
ft%_10_y
orb_10_y
drb_10_y
trb_10_y
ast_10_y
stl_10_y
blk_10_y
tov_10_y
pf_10_y
pts_10_y
ts%_10_y
efg%_10_y

In [4]:
processed_data_df

Unnamed: 0,team_x,mp_10_x,fg_10_x,fga_10_x,fg%_10_x,3p_10_x,3pa_10_x,3p%_10_x,ft_10_x,fta_10_x,...,home_10_y,won_10_y,team_10_y,season_10_y,team_opp_next_y,team_y,home_next,date_next,season,target
0,SAC,0.000,0.463636,0.420588,0.419378,0.286207,0.268182,0.456888,0.358140,0.363492,...,0.4,1,TOR,2016,SAC,TOR,1.0,2015-11-15,2016,1
1,TOR,0.000,0.375000,0.323529,0.390431,0.224138,0.239394,0.382423,0.548837,0.471429,...,0.8,1,SAC,2016,TOR,SAC,0.0,2015-11-15,2016,0
2,CLE,0.050,0.438636,0.355882,0.447608,0.317241,0.351515,0.401306,0.372093,0.373016,...,0.3,0,DET,2016,CLE,DET,0.0,2015-11-17,2016,0
3,GSW,0.025,0.534091,0.416176,0.510526,0.431034,0.398485,0.499644,0.369767,0.338095,...,0.3,0,TOR,2016,GSW,TOR,1.0,2015-11-17,2016,1
4,DEN,0.000,0.395455,0.367647,0.391627,0.265517,0.303030,0.389786,0.390698,0.365079,...,0.4,0,NOP,2016,DEN,NOP,0.0,2015-11-17,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15769,BOS,0.000,0.386364,0.283824,0.441627,0.455172,0.471212,0.439667,0.458140,0.393651,...,0.6,0,GSW,2022,BOS,GSW,1.0,2022-06-10,2022,0
15770,GSW,0.000,0.502273,0.388235,0.500478,0.493103,0.501515,0.458789,0.309302,0.276190,...,0.5,0,BOS,2022,GSW,BOS,1.0,2022-06-13,2022,1
15771,BOS,0.000,0.381818,0.292647,0.428230,0.468966,0.477273,0.448100,0.434884,0.373016,...,0.6,1,GSW,2022,BOS,GSW,0.0,2022-06-13,2022,0
15772,GSW,0.000,0.502273,0.364706,0.517703,0.455172,0.481818,0.440736,0.320930,0.282540,...,0.5,0,BOS,2022,GSW,BOS,0.0,2022-06-16,2022,1


In [168]:
for col in processed_data_df.columns:
    print(col)

team_x
mp_10_x
fg_10_x
fga_10_x
fg%_10_x
3p_10_x
3pa_10_x
3p%_10_x
ft_10_x
fta_10_x
ft%_10_x
orb_10_x
drb_10_x
trb_10_x
ast_10_x
stl_10_x
blk_10_x
tov_10_x
pf_10_x
pts_10_x
ts%_10_x
efg%_10_x
3par_10_x
ftr_10_x
orb%_10_x
drb%_10_x
trb%_10_x
ast%_10_x
stl%_10_x
blk%_10_x
tov%_10_x
usg%_10_x
ortg_10_x
drtg_10_x
fg_max_10_x
fga_max_10_x
fg%_max_10_x
3p_max_10_x
3pa_max_10_x
3p%_max_10_x
ft_max_10_x
fta_max_10_x
ft%_max_10_x
orb_max_10_x
drb_max_10_x
trb_max_10_x
ast_max_10_x
stl_max_10_x
blk_max_10_x
tov_max_10_x
pf_max_10_x
pts_max_10_x
+/-_max_10_x
ts%_max_10_x
efg%_max_10_x
3par_max_10_x
ftr_max_10_x
orb%_max_10_x
drb%_max_10_x
trb%_max_10_x
ast%_max_10_x
stl%_max_10_x
blk%_max_10_x
tov%_max_10_x
usg%_max_10_x
ortg_max_10_x
drtg_max_10_x
total_10_x
home_10_x
won_10_x
team_10_x
season_10_x
team_opp_next_x
mp_10_y
fg_10_y
fga_10_y
fg%_10_y
3p_10_y
3pa_10_y
3p%_10_y
ft_10_y
fta_10_y
ft%_10_y
orb_10_y
drb_10_y
trb_10_y
ast_10_y
stl_10_y
blk_10_y
tov_10_y
pf_10_y
pts_10_y
ts%_10_y
efg%_10_y

In [21]:
selected_columns = ['team_x', 'team_opp_next_x','team_y', 'team_opp_next_y', 'home_next', 'date_next', 'season']
predict_df[selected_columns]

Unnamed: 0,team_x,team_opp_next_x,team_y,team_opp_next_y,home_next,date_next,season
0,LAC,GSW,GSW,LAC,1,2023-12-02,2023
1,GSW,LAC,LAC,GSW,0,2023-12-02,2023
2,CHA,MIN,MIN,CHA,1,2023-12-02,2023
3,MIN,CHA,CHA,MIN,0,2023-12-02,2023
4,BKN,ORL,ORL,BKN,1,2023-12-03,2023
5,ORL,BKN,BKN,ORL,0,2023-12-03,2023
6,DET,CLE,CLE,DET,1,2023-12-03,2023
7,CLE,DET,DET,CLE,0,2023-12-03,2023
8,MIA,IND,IND,MIA,1,2023-12-03,2023
9,IND,MIA,MIA,IND,0,2023-12-03,2023


In [22]:
# Iterate through each row in predict_df
for index, row in predict_df.iterrows():
    # Iterate through processed_data_df starting from the last row for team_x
    found_x = False
    for _, team_x_row in processed_data_df[processed_data_df['team_x'].isin([row['team_x'], 'BNK', 'BRK'])].iloc[::-1].iterrows():
        # Find the columns with '10_x' suffix not including 'opp_10_x'
        columns_to_fill_x = team_x_row.index[team_x_row.index.str.endswith('10_x') & ~team_x_row.index.str.endswith('opp_10_x')]

        # Switch 'BRK' to 'BNK'
        if team_x_row['team_x'] == 'BRK':
            team_x_row['team_x'] = 'BNK'

        # Fill in the corresponding columns in predict_df for team_x
        for col_x in columns_to_fill_x:
            predict_df.at[index, col_x] = team_x_row[col_x]
        found_x = True
        break

    # If no matching row is found for team_x, you can handle this case accordingly
    if not found_x:
        print(f"No match found for team_x: {row['team_x']}")

    # Repeat the process for team_y
    found_y = False
    for _, team_y_row in processed_data_df[processed_data_df['team_y'].isin([row['team_y'], 'BNK', 'BRK'])].iloc[::-1].iterrows():
        # Find the columns with '10_y' suffix not including 'opp_10_y'
        columns_to_fill_y = team_y_row.index[team_y_row.index.str.endswith('10_y') & ~team_y_row.index.str.endswith('opp_10_y')]

        # Switch 'BRK' to 'BNK'
        if team_y_row['team_y'] == 'BRK':
            team_y_row['team_y'] = 'BNK'

        # Fill in the corresponding columns in predict_df for team_y
        for col_y in columns_to_fill_y:
            predict_df.at[index, col_y] = team_y_row[col_y]
        found_y = True
        break

    # If no matching row is found for team_y, you can handle this case accordingly
    if not found_y:
        print(f"No match found for team_y: {row['team_y']}")
        
        



In [23]:
# Drop certain columns from predict_df
columns_to_drop = ['season_10_x', 'season_10_y', 'team_10_x', 'team_10_y', 'won_10_y']
predict_df = predict_df.drop(columns=columns_to_drop, errors='ignore')

# Drop columns with NaN values
predict_df = predict_df.dropna(axis=1, how='all')

# Display the selected columns
predict_df

Unnamed: 0,team_x,mp_10_x,fg_10_x,fga_10_x,fg%_10_x,3p_10_x,3pa_10_x,3p%_10_x,ft_10_x,fta_10_x,...,usg%_max_10_y,ortg_max_10_y,drtg_max_10_y,total_10_y,home_10_y,team_opp_next_y,team_y,home_next,date_next,season
0,LAC,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,...,0.192555,0.5891,0.512941,0.444231,0.6,LAC,GSW,1,2023-12-02,2023
1,GSW,0.0,0.502273,0.364706,0.517703,0.455172,0.481818,0.440736,0.32093,0.28254,...,0.137869,0.404265,0.550588,0.493269,0.6,GSW,LAC,0,2023-12-02,2023
2,CHA,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,...,0.133119,0.459242,0.551765,0.498077,0.6,CHA,MIN,1,2023-12-02,2023
3,MIN,0.0,0.465909,0.375,0.459809,0.506897,0.521212,0.455344,0.490698,0.431746,...,0.137869,0.404265,0.550588,0.493269,0.6,MIN,CHA,0,2023-12-02,2023
4,BKN,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,...,0.137869,0.404265,0.550588,0.493269,0.6,BKN,ORL,1,2023-12-03,2023
5,ORL,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,...,0.137869,0.404265,0.550588,0.493269,0.6,ORL,BKN,0,2023-12-03,2023
6,DET,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,...,0.137869,0.404265,0.550588,0.493269,0.6,DET,CLE,1,2023-12-03,2023
7,CLE,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,...,0.137869,0.404265,0.550588,0.493269,0.6,CLE,DET,0,2023-12-03,2023
8,MIA,0.0,0.404545,0.367647,0.395694,0.348276,0.45303,0.357601,0.374419,0.315873,...,0.137869,0.404265,0.550588,0.493269,0.6,MIA,IND,1,2023-12-03,2023
9,IND,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,...,0.146727,0.384834,0.507059,0.353846,0.4,IND,MIA,0,2023-12-03,2023


In [24]:
print(len(predict_df.columns))

144


In [25]:
predict_df[['team_x', 'team_opp_next_x', 'team_y', 'team_opp_next_y']]

Unnamed: 0,team_x,team_opp_next_x,team_y,team_opp_next_y
0,LAC,GSW,GSW,LAC
1,GSW,LAC,LAC,GSW
2,CHA,MIN,MIN,CHA
3,MIN,CHA,CHA,MIN
4,BKN,ORL,ORL,BKN
5,ORL,BKN,BKN,ORL
6,DET,CLE,CLE,DET
7,CLE,DET,DET,CLE
8,MIA,IND,IND,MIA
9,IND,MIA,MIA,IND


In [27]:
# Save the resulting DataFrame to a new CSV file
predict_df.to_csv(predict_file, index=False)

In [28]:
# Specify the path to the prediction dataset
predict_path = 'datasets/predict.csv'
predict_file = os.path.join(os.path.dirname(os.getcwd()), predict_data_path)

# Load the prediction data
predict_df = pd.read_csv(predict_file)

In [29]:
# Extract features from the prediction dataset
removed_columns = list(predict_df.columns[predict_df.dtypes == "object"])
selected_columns = predict_df.columns[~predict_df.columns.isin(removed_columns)]

# Exclude columns with specific words
excluded_words = ["season", "date", "won", "target", "team", "team_opp"]
features_columns = [col for col in selected_columns if not any(word in col for word in excluded_words)]

predict_features = predict_df[features_columns].copy()
predict_features

Unnamed: 0,mp_10_x,fg_10_x,fga_10_x,fg%_10_x,3p_10_x,3pa_10_x,3p%_10_x,ft_10_x,fta_10_x,ft%_10_x,...,ast%_max_10_y,stl%_max_10_y,blk%_max_10_y,tov%_max_10_y,usg%_max_10_y,ortg_max_10_y,drtg_max_10_y,total_10_y,home_10_y,home_next
0,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,0.769778,...,0.393922,0.0569,0.1146,0.516352,0.192555,0.5891,0.512941,0.444231,0.6,1
1,0.0,0.502273,0.364706,0.517703,0.455172,0.481818,0.440736,0.32093,0.28254,0.757993,...,0.229817,0.0495,0.1045,0.445493,0.137869,0.404265,0.550588,0.493269,0.6,0
2,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,0.769778,...,0.329587,0.0491,0.0961,0.523899,0.133119,0.459242,0.551765,0.498077,0.6,1
3,0.0,0.465909,0.375,0.459809,0.506897,0.521212,0.455344,0.490698,0.431746,0.754026,...,0.229817,0.0495,0.1045,0.445493,0.137869,0.404265,0.550588,0.493269,0.6,0
4,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,0.769778,...,0.229817,0.0495,0.1045,0.445493,0.137869,0.404265,0.550588,0.493269,0.6,1
5,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,0.769778,...,0.229817,0.0495,0.1045,0.445493,0.137869,0.404265,0.550588,0.493269,0.6,0
6,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,0.769778,...,0.229817,0.0495,0.1045,0.445493,0.137869,0.404265,0.550588,0.493269,0.6,1
7,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,0.769778,...,0.229817,0.0495,0.1045,0.445493,0.137869,0.404265,0.550588,0.493269,0.6,0
8,0.0,0.404545,0.367647,0.395694,0.348276,0.45303,0.357601,0.374419,0.315873,0.770712,...,0.229817,0.0495,0.1045,0.445493,0.137869,0.404265,0.550588,0.493269,0.6,1
9,0.025,0.529545,0.386765,0.536364,0.434483,0.406061,0.486817,0.397674,0.339683,0.769778,...,0.236239,0.0655,0.0849,0.361111,0.146727,0.384834,0.507059,0.353846,0.4,0
