# NBA simulation central

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

nba = pd.read_csv('2017-2018.csv', sep=';')

teams_abb = pd.read_csv('data/Team Abbrev.csv')
teams_abb = teams_abb[teams_abb['season'] == 2018]
teams_abb = teams_abb[['team', 'abbreviation']]

nba = nba.merge(teams_abb, how='left', left_on='HomeTeam', right_on='abbreviation')
nba['HomeTeam'] = nba['team']
nba.drop(['abbreviation', 'team'], axis=1, inplace=True)

nba = nba.merge(teams_abb, how='left', left_on='AwayTeam', right_on='abbreviation')
nba['AwayTeam'] = nba['team']
nba.drop(['abbreviation', 'team'], axis=1, inplace=True)

nba_regular = nba[nba['Type'] == 'Regular']

## Creating team coefficients

In [2]:
decimal_places = 2

# Create an empty dictionary to store team statistics
teams_dict = {
    'Team': [],
    'Wins': [],
    'Losses': [],
    'OFFH': [],
    'DEFH': [],
    'OFFA': [],
    'DEFA': [],
    'OFFHsd': [],
    'DEFHsd': [],
    'OFFAsd': [],
    'DEFAsd': []
}

# Calculate statistics for each team
for team in nba['HomeTeam'].unique():
    team_home = nba[nba['HomeTeam'] == team]
    team_away = nba[nba['AwayTeam'] == team]
    
    # Calculate Home and Away statistics
    home_offense_mean = team_home['HomePTS'].mean()
    home_defense_mean = team_home['AwayPTS'].mean()
    away_offense_mean = team_away['AwayPTS'].mean()
    away_defense_mean = team_away['HomePTS'].mean()
    
    home_offense_std = team_home['HomePTS'].std()
    home_defense_std = team_home['AwayPTS'].std()
    away_offense_std = team_away['AwayPTS'].std()
    away_defense_std = team_away['HomePTS'].std()
    
    # Calculate Wins and Losses
    wins = len(nba[(nba['HomeTeam'] == team) & (nba['HomePTS'] > nba['AwayPTS'])]) + len(nba[(nba['AwayTeam'] == team) & (nba['AwayPTS'] > nba['HomePTS'])])
    losses = len(nba[(nba['HomeTeam'] == team) & (nba['HomePTS'] < nba['AwayPTS'])]) + len(nba[(nba['AwayTeam'] == team) & (nba['AwayPTS'] < nba['HomePTS'])])

    # Append calculated values to the dictionary
    teams_dict['Team'].append(team)
    teams_dict['Wins'].append(wins)
    teams_dict['Losses'].append(losses)
    teams_dict['OFFH'].append(home_offense_mean)
    teams_dict['DEFH'].append(home_defense_mean)
    teams_dict['OFFA'].append(away_offense_mean)
    teams_dict['DEFA'].append(away_defense_mean)
    teams_dict['OFFHsd'].append(home_offense_std)
    teams_dict['DEFHsd'].append(home_defense_std)
    teams_dict['OFFAsd'].append(away_offense_std)
    teams_dict['DEFAsd'].append(away_defense_std)

# Create a dataframe from the dictionary
teams = pd.DataFrame(teams_dict).sort_values(by='Wins', ascending=False)

teams

Unnamed: 0,Team,Wins,Losses,OFFH,DEFH,OFFA,DEFA,OFFHsd,DEFHsd,OFFAsd,DEFAsd
23,Houston Rockets,76,23,113.156863,104.392157,108.875,103.0625,11.189946,12.045046,12.366091,11.071155
1,Golden State Warriors,74,29,113.5,104.153846,112.176471,107.980392,13.457996,12.432381,13.741479,11.432393
6,Boston Celtics,66,35,105.461538,99.730769,101.44898,101.265306,11.30584,11.710755,9.502327,9.264123
13,Toronto Raptors,63,29,112.456522,102.934783,109.913043,106.521739,12.012413,14.291259,11.204813,12.221546
0,Cleveland Cavaliers,62,42,109.788462,108.019231,107.826923,108.846154,12.760749,12.378809,13.348601,12.093402
18,Philadelphia 76ers,57,35,111.173913,103.043478,108.217391,107.478261,10.137509,10.757853,10.404941,11.370017
9,Utah Jazz,53,40,104.043478,97.130435,103.829787,103.595745,11.832078,11.29328,12.967047,11.259121
21,New Orleans Pelicans,53,38,113.511111,111.777778,109.630435,109.152174,11.218131,10.708724,11.898382,11.696091
3,Indiana Pacers,51,38,106.75,103.227273,103.622222,103.666667,11.899042,10.503297,11.493652,11.3838
14,Oklahoma City Thunder,50,38,108.772727,103.522727,106.022727,105.5,9.98339,11.997071,15.123505,11.163687


## Custom functions

In [22]:
def simulate_game_by_quarters(home, away, teams):
    max_overtimes = 10
    overtime_multiplier = 5/48

    home_stats = teams[teams['Team'] == home].iloc[0]
    away_stats = teams[teams['Team'] == away].iloc[0]

    homeOFFH = home_stats['OFFH']
    homeOFFHsd = home_stats['OFFHsd']
    homeDEFH = home_stats['DEFH']
    homeDEFHsd = home_stats['DEFHsd']

    awayOFFA = away_stats['OFFA']
    awayOFFAsd = away_stats['OFFAsd']
    awayDEFA = away_stats['DEFA']
    awayDEFAsd = away_stats['DEFAsd']

    # Simulating Quarters
    quarters = {'H_Q1': [], 'A_Q1': [], 'H_Q2': [], 'A_Q2': [], 'H_Q3': [], 'A_Q3': [], 'H_Q4': [], 'A_Q4': []}
    for i in range(1, 5):
        home_quarter = int(round(((random.gauss(homeOFFH, homeOFFHsd) + random.gauss(awayDEFA, awayDEFAsd)) / 2) / 4, 0))
        away_quarter = int(round(((random.gauss(awayOFFA, awayOFFAsd) + random.gauss(homeDEFH, homeDEFHsd)) / 2) / 4, 0))
        quarters[f'H_Q{i}'].append(home_quarter)
        quarters[f'A_Q{i}'].append(away_quarter)

    # Calculating scores
    home_scores = sum(quarters['H_Q1']) + sum(quarters['H_Q2']) + sum(quarters['H_Q3']) + sum(quarters['H_Q4'])
    away_scores = sum(quarters['A_Q1']) + sum(quarters['A_Q2']) + sum(quarters['A_Q3']) + sum(quarters['A_Q4'])

    # Handling overtime if necessary
    home_OT = 0
    away_OT = 0
    num_overtimes = 0
    if home_scores == away_scores:
        num_overtimes = 0
        while home_scores == away_scores and num_overtimes < max_overtimes:
            num_overtimes += 1
            home_OT += int(round(((random.gauss(homeOFFH, homeOFFHsd) + random.gauss(awayDEFA, awayDEFAsd)) / 2) * overtime_multiplier, 0))
            away_OT += int(round(((random.gauss(awayOFFA, awayOFFAsd) + random.gauss(homeDEFH, homeDEFHsd)) / 2) * overtime_multiplier, 0))
            home_scores += home_OT
            away_scores += away_OT

    # Creating the DataFrame
    data = {
        'HomeTeam': [home],
        'AwayTeam': [away],
        'HomePTS': [home_scores],
        'AwayPTS': [away_scores],
        'H_Q1': [sum(quarters['H_Q1'])],
        'A_Q1': [sum(quarters['A_Q1'])],
        'H_Q2': [sum(quarters['H_Q2'])],
        'A_Q2': [sum(quarters['A_Q2'])],
        'H_Q3': [sum(quarters['H_Q3'])],
        'A_Q3': [sum(quarters['A_Q3'])],
        'H_Q4': [sum(quarters['H_Q4'])],
        'A_Q4': [sum(quarters['A_Q4'])],
        'H_OT': [home_OT],
        'A_OT': [away_OT],
        '#_OT': [num_overtimes]
    }
    return pd.DataFrame(data)

def simulate_game(home, away, teams):
    max_overtimes = 10
    overtime_multiplier = 5/48

    home_stats = teams[teams['Team'] == home].iloc[0]
    away_stats = teams[teams['Team'] == away].iloc[0]

    homeOFFH = home_stats['OFFH']
    homeOFFHsd = home_stats['OFFHsd']
    homeDEFH = home_stats['DEFH']
    homeDEFHsd = home_stats['DEFHsd']

    awayOFFA = away_stats['OFFA']
    awayOFFAsd = away_stats['OFFAsd']
    awayDEFA = away_stats['DEFA']
    awayDEFAsd = away_stats['DEFAsd']

    # Calculating scores
    home_scores = int(round(((random.gauss(homeOFFH, homeOFFHsd) + random.gauss(awayDEFA, awayDEFAsd)) / 2), 0))
    away_scores = int(round(((random.gauss(awayOFFA, awayOFFAsd) + random.gauss(homeDEFH, homeDEFHsd)) / 2), 0))

    # Handling overtime if necessary
    home_OT = 0
    away_OT = 0
    num_overtimes = 0
    if home_scores == away_scores:
        num_overtimes = 0
        while home_scores == away_scores and num_overtimes < max_overtimes:
            num_overtimes += 1
            home_OT += int(round(((random.gauss(homeOFFH, homeOFFHsd) + random.gauss(awayDEFA, awayDEFAsd)) / 2) * overtime_multiplier, 0))
            away_OT += int(round(((random.gauss(awayOFFA, awayOFFAsd) + random.gauss(homeDEFH, homeDEFHsd)) / 2) * overtime_multiplier, 0))
            home_scores += home_OT
            away_scores += away_OT

    # Creating the DataFrame
    data = {
        'HomeTeam': [home],
        'AwayTeam': [away],
        'HomePTS': [home_scores],
        'AwayPTS': [away_scores],
        'H_OT': [home_OT],
        'A_OT': [away_OT],
        '#_OT': [num_overtimes]
    }
    return pd.DataFrame(data)

# Simulating a match

In [52]:
game = simulate_game('Golden State Warriors', 'Phoenix Suns', teams)
game

Unnamed: 0,HomeTeam,AwayTeam,HomePTS,AwayPTS,H_OT,A_OT,#_OT
0,Golden State Warriors,Phoenix Suns,119,115,0,0,0


# Simulating a match N times

In [54]:
games = []
iterations = 10000

for _ in range(iterations):
    game = simulate_game('Golden State Warriors', 'Phoenix Suns', teams)
    games.append(game)

montecarlo_game = pd.concat(games, ignore_index=True)

home_wins = montecarlo_game[montecarlo_game['HomePTS'] > montecarlo_game['AwayPTS']]
away_wins = montecarlo_game[montecarlo_game['AwayPTS'] > montecarlo_game['HomePTS']]

home_wins_count = len(home_wins)
away_wins_count = len(away_wins)

print("Home team wins:", home_wins_count)
print("Away team wins:", away_wins_count)

Home team wins: 7875
Away team wins: 2125


## Details

In [55]:
montecarlo_game.describe()

Unnamed: 0,HomePTS,AwayPTS,H_OT,A_OT,#_OT
count,10000.0,10000.0,10000.0,10000.0,10000.0
mean,113.8715,104.907,0.3983,0.3656,0.0337
std,9.225806,9.408167,2.716946,2.533886,0.231019
min,79.0,68.0,0.0,0.0,0.0
25%,108.0,99.0,0.0,0.0,0.0
50%,114.0,105.0,0.0,0.0,0.0
75%,120.0,110.0,0.0,0.0,0.0
max,267.0,265.0,57.0,55.0,5.0


# Simulating a season

In [56]:
game_dfs = []

# Loop through each row in the 'nba' DataFrame
for index, row in nba_regular.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    
    # Call simulate_game function with HomeTeam, AwayTeam, and teams DataFrame as parameters
    game_result = simulate_game(home_team, away_team, teams)
    
    # Append the resulting DataFrame to the list
    game_dfs.append(game_result)

# Concatenate all individual game DataFrames into a single DataFrame
season = pd.concat(game_dfs, ignore_index=True)

# Display or use the concatenated DataFrame 'result_df'
season

Unnamed: 0,HomeTeam,AwayTeam,HomePTS,AwayPTS,H_OT,A_OT,#_OT
0,Cleveland Cavaliers,Boston Celtics,100,96,0,0,0
1,Golden State Warriors,Houston Rockets,98,102,0,0,0
2,Detroit Pistons,Charlotte Hornets,97,101,0,0,0
3,Indiana Pacers,Brooklyn Nets,103,95,0,0,0
4,Orlando Magic,Miami Heat,85,110,0,0,0
...,...,...,...,...,...,...,...
1225,Orlando Magic,Washington Wizards,119,108,0,0,0
1226,Philadelphia 76ers,Milwaukee Bucks,95,100,0,0,0
1227,Los Angeles Clippers,Los Angeles Lakers,110,98,0,0,0
1228,Portland Trail Blazers,Utah Jazz,103,99,0,0,0


# Simulating a season N times

In [59]:
# Initialize an empty list to store individual season DataFrames
seasons = []

# Number of times to simulate the season (change this to your desired value)
N = 50

for _ in range(N):
    game_dfs = []

    # Loop through each row in the 'nba' DataFrame
    for index, row in nba_regular.iterrows():
        home_team = row['HomeTeam']
        away_team = row['AwayTeam']
        
        # Call simulate_game function with HomeTeam, AwayTeam, and teams DataFrame as parameters
        game_result = simulate_game(home_team, away_team, teams)
        
        # Append the resulting DataFrame to the list
        game_dfs.append(game_result)

    # Concatenate all individual game DataFrames into a single DataFrame for the season
    season = pd.concat(game_dfs, ignore_index=True)
    
    # Append the season DataFrame to the list of seasons
    seasons.append(season)

# Concatenate all season DataFrames into a single DataFrame
montecarlo_seasons = pd.concat(seasons, ignore_index=True)

# Display or use the concatenated DataFrame 'all_seasons'
montecarlo_seasons

Unnamed: 0,HomeTeam,AwayTeam,HomePTS,AwayPTS,H_OT,A_OT,#_OT
0,Cleveland Cavaliers,Boston Celtics,112,104,0,0,0
1,Golden State Warriors,Houston Rockets,104,114,0,0,0
2,Detroit Pistons,Charlotte Hornets,115,97,0,0,0
3,Indiana Pacers,Brooklyn Nets,115,93,0,0,0
4,Orlando Magic,Miami Heat,109,90,0,0,0
...,...,...,...,...,...,...,...
61495,Orlando Magic,Washington Wizards,100,109,0,0,0
61496,Philadelphia 76ers,Milwaukee Bucks,103,97,0,0,0
61497,Los Angeles Clippers,Los Angeles Lakers,121,130,0,0,0
61498,Portland Trail Blazers,Utah Jazz,102,107,0,0,0


## Details

In [60]:
montecarlo_seasons.describe()

Unnamed: 0,HomePTS,AwayPTS,H_OT,A_OT,#_OT
count,61500.0,61500.0,61500.0,61500.0,61500.0
mean,108.183236,105.805382,0.528325,0.51535,0.047106
std,10.372625,10.710469,3.280949,3.223872,0.292914
min,73.0,66.0,0.0,0.0,0.0
25%,102.0,99.0,0.0,0.0,0.0
50%,108.0,105.0,0.0,0.0,0.0
75%,114.0,112.0,0.0,0.0,0.0
max,421.0,420.0,79.0,78.0,7.0
