In [4]:

from google.colab import drive
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np
#Linear regression model is used because of the dependent nature of sports

# Mount Google Drive
drive.mount('/content/drive')

# Load datasets from Google Drive
elo_data_path = '/content/drive/MyDrive/Colab Notebooks/nba_elo.csv'

elo_data = pd.read_csv(elo_data_path)

# Filter data for seasons from 2018 to 2023
nba_data_filtered = elo_data[(elo_data['season'] == 2023)]

# Drop empty columns
nba_data_cleaned_filtered = nba_data_filtered.dropna(axis=1, how='all')

# Select features and target variables
features = ['elo_prob1', 'elo_prob2']
target = ['score1', 'score2']

# Drop rows with missing values in the selected columns
model_data_filtered = nba_data_cleaned_filtered.dropna(subset=features + target)

if len(model_data_filtered) == 0:
    print("No data available for the specified season.")
else:
    # Select only the first 1230 games
    model_data_subset = model_data_filtered.head(1230)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        model_data_subset[features],
        model_data_subset[target],
        test_size=0.2,
        random_state=42
    )

    if len(X_train) == 0:
        print("Not enough data for training.")
    else:
        # Initialize and train the model
        model = LinearRegression()
        model.fit(X_train, y_train)

        # Predict scores for the first 1230 games in 2023
        # Use the model to predict scores with added random element
        predictions_2023 = model.predict(model_data_subset[features])

        # Ensure no draw in the game
        predictions_2023[:, 0] = np.maximum(predictions_2023[:, 0], predictions_2023[:, 1] + 1)
        predictions_2023[:, 1] = np.maximum(predictions_2023[:, 1], predictions_2023[:, 0] + 1)

        # Increase the deviation by multiplying the standard deviation by 5
        predictions_2023 += np.random.normal(0, 5 * 2, predictions_2023.shape)

        # Round up the predicted scores and remove ".0"
        rounded_predictions_2023 = np.ceil(predictions_2023).astype(int)

        # Create a DataFrame for the predicted scores
        predicted_scores_2023 = pd.DataFrame({
            'team1': model_data_subset['team1'],
            'team2': model_data_subset['team2'],
            'predicted_score1': rounded_predictions_2023[:, 0],
            'predicted_score2': rounded_predictions_2023[:, 1],
        })

        # Create a new DataFrame to store the wins for each team
        team_wins_2023_subset = pd.DataFrame({
            'Team': pd.concat([predicted_scores_2023['team1'], predicted_scores_2023['team2']]).unique(),
            'Wins': 0  # Initialize all wins to zero
        })

        # Update the wins based on predicted scores
        for index, row in predicted_scores_2023.iterrows():
            if row['predicted_score1'] > row['predicted_score2']:
                team_wins_2023_subset.loc[team_wins_2023_subset['Team'] == row['team1'], 'Wins'] += 1
            else:
                team_wins_2023_subset.loc[team_wins_2023_subset['Team'] == row['team2'], 'Wins'] += 1

        # Display the wins for each team in the 2023 season for the first 1230 games
        print("\nPredicted Wins for Each Team in the First 1230 Games of 2023:")
        print(team_wins_2023_subset)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Predicted Wins for Each Team in the First 1230 Games of 2023:
   Team  Wins
0   BOS    47
1   GSW    46
2   IND    45
3   DET    46
4   ATL    43
5   MEM    50
6   MIA    36
7   BRK    49
8   TOR    37
9   SAS    50
10  MIN    41
11  UTA    35
12  PHO    51
13  SAC    38
14  PHI    43
15  LAL    39
16  WAS    36
17  CHO    40
18  NYK    44
19  HOU    35
20  POR    35
21  ORL    32
22  MIL    37
23  CHI    38
24  DAL    40
25  DEN    34
26  CLE    44
27  NOP    37
28  OKC    38
29  LAC    44


In [2]:

# Define the teams in each conference
eastern_conference = ['BOS', 'BRK', 'NYK', 'PHI', 'TOR', 'CHI', 'CLE', 'DET', 'IND', 'MIL', 'ATL', 'CHO', 'MIA', 'ORL', 'WAS']
western_conference = ['DEN', 'MIN', 'OKC', 'POR', 'UTA', 'GSW', 'LAC', 'LAL', 'PHO', 'SAC', 'DAL', 'HOU', 'MEM', 'NOP', 'SAS']

# Create a DataFrame for the teams and their predicted wins
teams_predicted_wins = team_wins_2023_subset[team_wins_2023_subset['Team'].isin(eastern_conference + western_conference)].reset_index(drop=True)

# Sort the teams by predicted wins
sorted_teams = teams_predicted_wins.sort_values(by='Wins', ascending=False)
# Display the sorted teams and their predicted wins
print("\nPredicted Wins for Each Team in the First 1230 Games of 2023:")
print(sorted_teams)

# Separate teams into conferences
eastern_teams = sorted_teams[sorted_teams['Team'].isin(eastern_conference)].reset_index(drop=True)
western_teams = sorted_teams[sorted_teams['Team'].isin(western_conference)].reset_index(drop=True)

# Display the sorted teams in each conference
print("\nEastern Conference Standings:")
print(eastern_teams)

print("\nWestern Conference Standings:")
print(western_teams)


Predicted Wins for Each Team in the First 1230 Games of 2023:
   Team  Wins
17  CHO    50
10  MIN    49
4   ATL    48
25  DEN    48
28  OKC    47
5   MEM    47
22  MIL    47
15  LAL    45
12  PHO    44
26  CLE    43
14  PHI    42
6   MIA    42
16  WAS    42
23  CHI    41
29  LAC    41
18  NYK    40
2   IND    40
20  POR    39
8   TOR    39
24  DAL    39
3   DET    39
1   GSW    38
9   SAS    38
13  SAC    36
19  HOU    36
21  ORL    36
27  NOP    36
11  UTA    35
0   BOS    32
7   BRK    31

Eastern Conference Standings:
   Team  Wins
0   CHO    50
1   ATL    48
2   MIL    47
3   CLE    43
4   PHI    42
5   MIA    42
6   WAS    42
7   CHI    41
8   NYK    40
9   IND    40
10  TOR    39
11  DET    39
12  ORL    36
13  BOS    32
14  BRK    31

Western Conference Standings:
   Team  Wins
0   MIN    49
1   DEN    48
2   OKC    47
3   MEM    47
4   LAL    45
5   PHO    44
6   LAC    41
7   POR    39
8   DAL    39
9   GSW    38
10  SAS    38
11  SAC    36
12  HOU    36
13  NOP    36
14  UTA

In [3]:

import random

# Generate a random number between 0 and 4
L = random.randint(0, 4)

# Print the random number

# Define odds decrease factor
odds_decrease_factor = 0.2

# Function to calculate decreased odds
def calculate_decreased_odds(base_odds, position):
    return base_odds - (position * odds_decrease_factor)

# Select the first 8 teams from each conference as playoffs
playoffs_east = eastern_teams.head(8)['Team'].tolist()
playoffs_west = western_teams.head(8)['Team'].tolist()

# Calculate initial odds for each team
initial_odds = 1.0

# Calculate odds for semifinalists in each conference
semifinalists_east = random.choices(playoffs_east,
                                    weights=[calculate_decreased_odds(initial_odds, i) for i in range(8)],
                                    k=len(playoffs_east) // 2)

semifinalists_west = random.choices(playoffs_west,
                                    weights=[calculate_decreased_odds(initial_odds, i) for i in range(8)],
                                    k=len(playoffs_west) // 2)

# Calculate odds for finalists in each conference
finalists_east = random.choices(semifinalists_east,
                                weights=[calculate_decreased_odds(initial_odds, i) for i in range(4)],
                                k=len(semifinalists_east) // 2)

finalists_west = random.choices(semifinalists_west,
                                weights=[calculate_decreased_odds(initial_odds, i) for i in range(4)],
                                k=len(semifinalists_west) // 2)

# Display the selected teams and the NBA Finals winner
print("\nPlayoff Teams:")
print(f"Eastern Conference: {playoffs_east}")
print(f"Western Conference: {playoffs_west}")

print("\nSemifinalists:")
print(f"Eastern Conference: {semifinalists_east}")
print(f"Western Conference: {semifinalists_west}")

print("\nFinalists:")
print(f"Eastern Conference: {finalists_east}")
print(f"Western Conference: {finalists_west}")

# Randomly select one team from each conference as NBA Finals winner
nba_finals_winner = {
    'East': random.choice(finalists_east),
    'West': random.choice(finalists_west),
}

# Display the selected NBA Finals teams
print("\nNBA Finals:")
print(f"{nba_finals_winner['East']} (East) vs {nba_finals_winner['West']} (West)")

# NBA Finals winner
overall_winner = random.choice([nba_finals_winner['East'], nba_finals_winner['West']])

# Display the NBA Finals winner
print("\nNBA Finals Winner:")
print(f"The overall NBA Finals winner is: {overall_winner}")


Playoff Teams:
Eastern Conference: ['CHO', 'ATL', 'MIL', 'CLE', 'PHI', 'MIA', 'WAS', 'CHI']
Western Conference: ['MIN', 'DEN', 'OKC', 'MEM', 'LAL', 'PHO', 'LAC', 'POR']

Semifinalists:
Eastern Conference: ['ATL', 'CHO', 'CHO', 'MIL']
Western Conference: ['OKC', 'DEN', 'OKC', 'MIN']

Finalists:
Eastern Conference: ['ATL', 'ATL']
Western Conference: ['OKC', 'OKC']

NBA Finals:
ATL (East) vs OKC (West)

NBA Finals Winner:
The overall NBA Finals winner is: ATL
