In [1]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression, LinearRegression
# from sklearn.metrics import accuracy_score, mean_squared_error
# from colorama import Fore, Style
# from tabulate import tabulate
# from prettytable import PrettyTable

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from colorama import Fore, Style
from prettytable import PrettyTable

In [3]:
df_existing = pd.read_excel('NBA2324.xlsx')

In [4]:
df_existing

Unnamed: 0,Away team,Away Pts,Home team,Home Pts,Overtime,Attend.,Arena,Win,Loss
0,Los Angeles Lakers,107,Denver Nuggets,119,No,19842,Ball Arena,Denver Nuggets,Los Angeles Lakers
1,Phoenix Suns,108,Golden State Warriors,104,No,18064,Chase Center,Phoenix Suns,Golden State Warriors
2,Houston Rockets,86,Orlando Magic,116,No,18846,Amway Center,Orlando Magic,Houston Rockets
3,Boston Celtics,108,New York Knicks,104,No,19812,Madison Square Garden (IV),Boston Celtics,New York Knicks
4,Washington Wizards,120,Indiana Pacers,143,No,16004,Gainbridge Fieldhouse,Indiana Pacers,Washington Wizards
...,...,...,...,...,...,...,...,...,...
711,Milwaukee Bucks,116,Portland Trail Blazers,119,No,17832,Moda Center,Portland Trail Blazers,Milwaukee Bucks
712,Los Angeles Lakers,114,Boston Celtics,105,No,17832,TD Garden,Los Angeles Lakers,Boston Celtics
713,Indiana Pacers,105,New York Knicks,109,No,17832,Madison Square Garden (IV),New York Knicks,Indiana Pacers
714,Cleveland Cavaliers,108,Memphis Grizzlies,101,No,17832,FedEx Forum,Cleveland Cavaliers,Memphis Grizzlies


In [5]:
#checking if the 'Win' column exists before proceeding
winning_team = 'Win'  # Update with the actual column name
if winning_team in df_existing.columns:
    #creating a binary target variable indicating whether the home team wins
    df_existing['HomeWin'] = (df_existing['Home team'] == df_existing[winning_team]).astype(int)

    #specifying the categorical columns for one-hot encoding
    categorical_columns = ['Away team', 'Home team', 'Overtime', 'Arena']

    #checking if categorical columns exist in the DataFrame
    missing_columns = [col for col in categorical_columns if col not in df_existing.columns]

    if not missing_columns:
        #extracting the necessary columns for the model
        X_completed = df_existing[categorical_columns]
        y_completed = df_existing['HomeWin']

        #one-hot encode categorical columns
        X_completed_encoded = pd.get_dummies(X_completed, drop_first=True)

        #splitting the completed data into training and testing sets
        X_train_completed, _, y_train_completed, _ = train_test_split(
            X_completed_encoded, y_completed, test_size=0.2, random_state=42
        )

        #choosing a model for completed games (Logistic Regression in this example)
        model_completed = LogisticRegression()

        #training the model on the completed data
        model_completed.fit(X_train_completed, y_train_completed)
    else:
        print(f"Error: Columns {missing_columns} not found in DataFrame.")
else:
    print(f"Error: Column '{winning_team}' not found in DataFrame.")

In [6]:
# upcoming game data
upcoming_game_data = {
    'Away team': ['Los Angeles Clippers', 'Miami Heat', 'Phoenix Suns', 'Sacramento Kings', 'Toronto Raptors', 'Golden State Warriors', 'Orlando Magic', 'Charlotte Hornets', 'New Orleans Pelicans', 'Portland Trail Blazers'],
    'Home team': ['Detroit Pistons', 'Washington Wizards', 'Atlanta Hawks', 'Indiana Pacers', 'Houston Rockets', 'Memphis Grizzlies', 'Minnesota Timberwolves', 'Oklahoma City Thunder', 'San Antonio Spurs', 'Denver Nuggets'],
    'Overtime': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # placeholder for upcoming games
    'Arena': ['Little Caesars Arena', 'Capital One Arena', 'State Farm Arena', 'Gainbridge Fieldhouse', 'Toyota Center', 'FedEx Forum', 'Target Center', 'Paycom Center', 'Frost Bank Center', 'Ball Arena'],
}

In [7]:
#creating a DataFrame for the upcoming game
df_upcoming_game = pd.DataFrame(upcoming_game_data)

In [8]:
#ensuring the columns in df_upcoming_game_encoded match the columns used during training
df_upcoming_game_encoded = pd.get_dummies(df_upcoming_game)
missing_columns = set(X_train_completed.columns) - set(df_upcoming_game_encoded.columns)
for column in missing_columns:
    df_upcoming_game_encoded[column] = 0

In [9]:
#reordering columns to match the order during training
df_upcoming_game_encoded = df_upcoming_game_encoded[X_train_completed.columns]

#making predictions using the trained model for binary outcome
predicted_probabilities = model_completed.predict_proba(df_upcoming_game_encoded)

In [10]:
prediction_date = 'Friday 2 Feb 2024'

In [11]:
#ANSI escape codes for blue color
blue_color = '\033[94m'
reset_color = '\033[0m'

#data for PrettyTable
table_new = PrettyTable()
table_new.field_names = [f"{blue_color}NBA, {prediction_date}{reset_color}", f"{blue_color}Projected Winners{reset_color}"]
table_new.align["Projected Winners"] = "l"
table_new.horizontal_char = '-'  # Use a horizontal line as a separator

#defining color codes for text
green_color = '\033[92m'
reset_color = '\033[0m'

In [12]:
for i, team in enumerate(df_upcoming_game.itertuples(), start=1):  #starting from index 1 to skip the header row
    team1_probability = predicted_probabilities[i-1][1] * 100
    team2_probability = 100 - team1_probability

    #determining the team with the higher win probability as the projected winner
    if team1_probability > 50:
        predicted_winner = team[1]
        projected_win_percentage = team1_probability
    else:
        predicted_winner = team[2]
        projected_win_percentage = team2_probability

    team1_info = f"{green_color}{team[1]} ({df_existing['Win'].eq(team[1]).sum()}-{df_existing['Loss'].eq(team[1]).sum()}){reset_color}"
    team2_info = f"{green_color}{team[2]} ({df_existing['Win'].eq(team[2]).sum()}-{df_existing['Loss'].eq(team[2]).sum()}){reset_color}"

    team_info = f"{team1_info} vs {team2_info}"

    table_new.add_row([team_info, green_color + f"{predicted_winner} ({projected_win_percentage:.2f}%) {reset_color}"])

In [13]:
print(table_new)

+------------------------------------------------------------+----------------------------------+
|                   [94mNBA, Friday 2 Feb 2024[0m                   |        [94mProjected Winners[0m         |
+------------------------------------------------------------+----------------------------------+
|   [92mLos Angeles Clippers (31-15)[0m vs [92mDetroit Pistons (6-41)[0m   |    [92mDetroit Pistons (88.91%) [0m     |
|      [92mMiami Heat (25-23)[0m vs [92mWashington Wizards (9-38)[0m       |   [92mWashington Wizards (86.77%) [0m   |
|       [92mPhoenix Suns (28-20)[0m vs [92mAtlanta Hawks (20-27)[0m        |     [92mAtlanta Hawks (85.01%) [0m      |
|     [92mSacramento Kings (27-19)[0m vs [92mIndiana Pacers (27-23)[0m     |    [92mSacramento Kings (55.65%) [0m    |
|     [92mToronto Raptors (17-30)[0m vs [92mHouston Rockets (22-25)[0m     |    [92mToronto Raptors (68.72%) [0m     |
| [92mGolden State Warriors (20-24)[0m vs [92mMemphis Grizzl