In [None]:
# This is a very basic shot simulator that takes in the actual shot data from Celtics players in the 2022-2023 regular season, and then uses that to create a naive prediction of the total scoring.
# This model is being actively improved for better accuracy and to take in more factors such as the players in the game, rebounds, turnovers, etc.

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import random
import scipy
import math

In [2]:
# Import Celtics season data to get the percentage of shots that were taken by each Celtic player
df_team = pd.read_csv('celtics_shooting_2023.csv')
df_team_totals = pd.read_csv('celtics_totals_2023.csv')
df_team_totals['Percent of Shots'] = df_team_totals['FGA']/df_team_totals['FGA'].sum()
df_team_totals

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Percent of Shots
0,1.0,Jayson Tatum,24.0,74.0,74.0,2732.0,727.0,1559.0,0.466,240.0,...,78.0,571.0,649.0,342.0,78.0,51.0,213.0,160.0,2225.0,0.214207
1,2.0,Jaylen Brown,26.0,67.0,67.0,2405.0,679.0,1383.0,0.491,163.0,...,78.0,381.0,459.0,232.0,75.0,26.0,197.0,172.0,1784.0,0.190025
2,3.0,Derrick White,28.0,82.0,70.0,2319.0,350.0,758.0,0.462,149.0,...,52.0,241.0,293.0,321.0,54.0,76.0,95.0,177.0,1017.0,0.104149
3,4.0,Grant Williams,24.0,79.0,23.0,2045.0,216.0,476.0,0.454,115.0,...,87.0,276.0,363.0,131.0,41.0,31.0,82.0,192.0,641.0,0.065403
4,5.0,Marcus Smart,28.0,61.0,61.0,1957.0,250.0,602.0,0.415,115.0,...,46.0,145.0,191.0,382.0,93.0,23.0,143.0,172.0,703.0,0.082715
5,6.0,Al Horford,36.0,63.0,63.0,1922.0,228.0,479.0,0.476,145.0,...,73.0,317.0,390.0,189.0,30.0,61.0,37.0,121.0,616.0,0.065815
6,7.0,Malcolm Brogdon,30.0,67.0,0.0,1744.0,354.0,732.0,0.484,132.0,...,42.0,238.0,280.0,248.0,45.0,18.0,98.0,109.0,1000.0,0.100577
7,8.0,Sam Hauser,25.0,80.0,8.0,1290.0,180.0,396.0,0.455,140.0,...,35.0,169.0,204.0,71.0,29.0,21.0,30.0,99.0,512.0,0.054411
8,9.0,Robert Williams,25.0,35.0,20.0,824.0,127.0,170.0,0.747,0.0,...,104.0,188.0,292.0,50.0,22.0,48.0,34.0,68.0,279.0,0.023358
9,10.0,Luke Kornet,27.0,69.0,0.0,804.0,113.0,170.0,0.665,3.0,...,86.0,111.0,197.0,53.0,11.0,46.0,25.0,81.0,261.0,0.023358


In [3]:
# Import player shot data
df_shots = pd.read_csv('2023-celtics-shots.csv')
df_shots['Make Flag'] = np.where(df_shots['Make or Miss']== 'Made', 1, 0)


# Make Dataframe for all players' shot data
df_all_shots = df_shots[['Player','Team','Shot','Distance','Make Flag']].groupby(by=['Player','Shot','Distance','Team'] ).agg(['count','sum'])
df_all_shots['Makes'] = df_shots[['Player','Shot','Distance','Make Flag','Team']].groupby(by=['Player','Shot','Distance','Team'] ).agg(['sum'])
df_all_shots['Taken'] = df_shots[['Player','Shot','Distance','Make Flag','Team']].groupby(by=['Player','Shot','Distance','Team'] ).agg(['count'])
df_all_shots['Shot Percentage'] = df_all_shots['Makes']/df_all_shots['Taken']
df_all_shots['Shot Likelihood'] = df_all_shots['Taken']/df_all_shots['Taken'].groupby(by=['Player']).sum()
df_all_shots

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Make Flag,Make Flag,Makes,Taken,Shot Percentage,Shot Likelihood
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,count,sum,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Player,Shot,Distance,Team,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
A.J. Lawson,3-pointer,25,Dallas,1,0,0,1,0.000000,1.000000
AJ Griffin,2-pointer,0,Atlanta,1,1,1,1,1.000000,0.038462
AJ Griffin,2-pointer,4,Atlanta,1,1,1,1,1.000000,0.038462
AJ Griffin,2-pointer,5,Atlanta,1,0,0,1,0.000000,0.038462
AJ Griffin,2-pointer,7,Atlanta,2,1,1,2,0.500000,0.076923
...,...,...,...,...,...,...,...,...,...
Zach LaVine,3-pointer,28,Chicago,8,3,3,8,0.375000,0.111111
Zach LaVine,3-pointer,29,Chicago,3,1,1,3,0.333333,0.041667
Zach LaVine,3-pointer,31,Chicago,1,0,0,1,0.000000,0.013889
Zeke Nnaji,2-pointer,2,Denver,1,1,1,1,1.000000,0.500000


In [4]:
# Improved shot distance choice based on distribution
def shot_choice(player):
    shot_likelihood = df_all_shots[['Shot Likelihood']].filter(like=player, axis=0).to_numpy().flatten()
    shot_value = df_all_shots[['Shot Likelihood']].filter(like=player, axis=0).index.tolist()
    shot_value_index = df_all_shots[['Shot Likelihood']].filter(like=player, axis=0).reset_index().index.tolist()
    final_choice = np.random.choice(shot_value_index, p=shot_likelihood)
    return (shot_value[final_choice], final_choice)

def shot_result(player):
    shot_selection = shot_choice(player)[0]
    shot_sel_percent = float(df_all_shots.loc[shot_selection]['Shot Percentage'])
    make_or_miss = [1,0]
    likelihood = [shot_sel_percent, 1-shot_sel_percent]
    result = np.random.choice(make_or_miss, p=likelihood)
    return (shot_selection, result, likelihood[0])

shot_result('Jayson Tatum')

(('Jayson Tatum', '3-pointer', 27, 'Celtics'), 0, 0.3269230769230769)

In [5]:
# Create List of all players and shot likelihood
players = df_team_totals['Player'].tolist()
cleanedPlayers = [x for x in players if str(x) != 'nan']
shot_likelihood = df_team_totals['Percent of Shots'].tolist()
cleanedShot = [x for x in shot_likelihood if str(x) != 'nan']

# Generate a list of players randomly selected to take shots
def list_players(num_shots):
    list_of_players = random.choices(cleanedPlayers, weights=cleanedShot, k=num_shots)
    return list_of_players

# Generate shots from random list of players
def sim_shots(players_list):
    players = []
    total_shots = []
    total_makes = []
    possible_points = []
    shot_type = []
    distance = []
    made_points = []
    shot_likelihood = []
    for x in players_list:     
        shot_outcome = shot_result(x)
        players.append(shot_outcome[0][0])
        total_shots.append(1)
        total_makes.append(shot_outcome[1])
        possible_points.append(float(shot_outcome[0][1][0]))
        made_points.append(float(shot_outcome[0][1][0])*shot_outcome[1])
        shot_type.append(shot_outcome[0][1])
        distance.append(shot_outcome[0][2])
        shot_likelihood.append(shot_outcome[2])
    data = {'Players': players, 'Shot': total_shots, 'Make': total_makes, 'Possible Points': possible_points, 'Made Points': made_points, 'Shot Type': shot_type, 'Distance': distance, 'Shot Likelihood': shot_likelihood}
    game_dataframe = pd.DataFrame(data)
    return game_dataframe

In [6]:
# Divide the game up into 4 quarters with discrete plays for offense and defense
# Create variables for the length of each quarter and the number of plays
q1_plays = 0
q2_plays = 0
q3_plays = 0
q4_plays = 0

quarter_1 = 12*60
quarter_2 = 12*60
quarter_3 = 12*60
quarter_4 = 12*60

# Set the jump ball to be a random 50/50 toss up
jump_ball = np.random.binomial(n=1, p=0.5)

# Loop through each quarter and pick a random play length, then subtract from the total length of the quarter until hit zero
while quarter_1 > 0:
    play_length = np.random.poisson(lam=17.6)
    quarter_1 = quarter_1 - play_length
    q1_plays += 1

while quarter_2 > 0:
    play_length = np.random.poisson(lam=17.6)
    quarter_2 = quarter_2 - play_length
    q2_plays += 1
    
while quarter_3 > 0:
    play_length = np.random.poisson(lam=17.6)
    quarter_3 = quarter_3 - play_length
    q3_plays += 1
    
while quarter_4 > 0:
    play_length = np.random.poisson(lam=17.6)
    quarter_4 = quarter_4 - play_length
    q4_plays += 1

# Make an array with the results from the 4 loops
game_plays = []
game_plays.append(q1_plays)
game_plays.append(q2_plays)
game_plays.append(q3_plays)
game_plays.append(q4_plays)

# Create an array to store the number of Celtics shots (want to update this to be for possessions)
celtics_shots = []

if jump_ball == 1:
    start_team='Celtics'
    celtics_shots.append(math.ceil(game_plays[0]/2))
    celtics_shots.append(math.floor(game_plays[1]/2))
    celtics_shots.append(math.floor(game_plays[2]/2))
    celtics_shots.append(math.ceil(game_plays[3]/2))
else:
    start_team='Opponent'
    celtics_shots.append(math.floor(game_plays[0]/2))
    celtics_shots.append(math.ceil(game_plays[1]/2))
    celtics_shots.append(math.ceil(game_plays[2]/2))
    celtics_shots.append(math.floor(game_plays[3]/2))
    
print(start_team, celtics_shots)

Celtics [21, 20, 22, 22]


In [7]:
# Create a dataframe with each shot taken
num_shots = sum(celtics_shots)

final_game_data = sim_shots(list_players(num_shots))

final_game_data

Unnamed: 0,Players,Shot,Make,Possible Points,Made Points,Shot Type,Distance,Shot Likelihood
0,Derrick White,1,0,3.0,0.0,3-pointer,24,0.384615
1,Jaylen Brown,1,1,3.0,3.0,3-pointer,26,0.281690
2,Marcus Smart,1,0,2.0,0.0,2-pointer,13,0.571429
3,Jaylen Brown,1,0,3.0,0.0,3-pointer,27,0.408696
4,Jaylen Brown,1,0,3.0,0.0,3-pointer,26,0.281690
...,...,...,...,...,...,...,...,...
80,Al Horford,1,1,3.0,3.0,3-pointer,23,0.491803
81,Jayson Tatum,1,1,2.0,2.0,2-pointer,2,0.751724
82,Jaylen Brown,1,1,3.0,3.0,3-pointer,26,0.281690
83,Sam Hauser,1,0,3.0,0.0,3-pointer,25,0.400000


In [8]:
#Summary of Game Data
points = final_game_data['Made Points'].sum()
print(points)

113.0


In [9]:
#final_game_data.to_csv('game_test.csv')