In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import math
import numpy as np

# from helpers import plot_basketball_court

locations = pd.read_csv('raw-data/train_locs.csv')
play_by_play = pd.read_csv('raw-data/train_pbp.csv')

#Feels like its cheating the problem at hand to predict based on teams or conferences
play_by_play = play_by_play.drop(['team', 'opponent', 'conference', 'opp_conference'], axis=1)

In [2]:
#Combine CSV data into one large table
merged_df = pd.merge(play_by_play, locations, on='id', how='inner')

print(merged_df)

             id  is_oreb    court_x    court_y annotation_code
0           2-2        0  72.807686  37.409669              d1
1           2-2        0  79.080643  31.477796              d2
2           2-2        0  69.956321  24.467300              d3
3           2-2        0  75.468933  13.861763              d4
4           2-2        0  84.973496  19.254422              d5
...         ...      ...        ...        ...             ...
308045  5020-52        0  70.669644   7.270347               s
308046  5020-52        0  86.958520   4.382292              t1
308047  5020-52        0  58.962087  26.764930              t2
308048  5020-52        0  76.777949  20.427222              t3
308049  5020-52        0  81.019852  44.975904              t4

[308050 rows x 5 columns]


In [3]:
#Convert coordinates to only reference one direction of the court
#For now lets assume that if the shooter is on the right side of the court, the right basket is the target
#Ignore half court shots+ for now
#If the shooter has x greater than 47, mirror all points
shooter_locations = locations[(locations['annotation_code'] == 's')]
merged_df_2 = pd.merge(merged_df, shooter_locations, on='id', how='inner', suffixes=('', '_shooter'))
#if merged_df_2['court_x_shooter'] > 47, then mirror court_x to be 94 - court_x
merged_df_2['court_x'] = merged_df_2.apply(lambda x: 94 - x['court_x'] if x['court_x_shooter'] > 47 else x['court_x'], axis=1)
cleaned_data = merged_df_2.drop(['court_x_shooter', 'court_y_shooter', 'annotation_code_shooter'], axis=1)
#Note - some plays don't have a shooter and get removed
print(cleaned_data)

             id  is_oreb    court_x    court_y annotation_code
0           2-2        0  21.192314  37.409669              d1
1           2-2        0  14.919357  31.477796              d2
2           2-2        0  24.043679  24.467300              d3
3           2-2        0  18.531067  13.861763              d4
4           2-2        0   9.026504  19.254422              d5
...         ...      ...        ...        ...             ...
308045  5020-52        0  23.330356   7.270347               s
308046  5020-52        0   7.041480   4.382292              t1
308047  5020-52        0  35.037913  26.764930              t2
308048  5020-52        0  17.222051  20.427222              t3
308049  5020-52        0  12.980148  44.975904              t4

[308050 rows x 5 columns]


In [4]:
#add a column for dtb (distance to basket)
cleaned_data['dtb'] = np.linalg.norm(cleaned_data[['court_x', 'court_y']].values - [4, 25], axis=1)
print(cleaned_data.sort_values(by=['dtb']))

             id  is_oreb    court_x    court_y annotation_code        dtb
50483    1194-4        1   4.130788  25.002102              d4   0.130805
136781   3138-2        0   3.855041  25.001898              d2   0.144971
135335   3105-3        0   3.907463  24.888103               s   0.145204
238528  4472-24        1   4.095318  24.882916              t3   0.150977
277578   4861-3        0   3.921562  24.868055              t3   0.153499
...         ...      ...        ...        ...             ...        ...
184033  4000-28        0  86.758694  30.361737              d4  82.932199
263521  4717-40        0  84.417729   4.123958              d2  83.083213
263527  4717-40        0  86.473980  12.642430              t2  83.394646
184036  4000-28        0  88.125000  30.878540              t1  84.330142
263524  4717-40        0  90.292729  45.420207              d5  88.675926

[308050 rows x 6 columns]


In [5]:
# Add rankings
cleaned_data['dtb_rank'] = cleaned_data.groupby(['id'])['dtb'].rank(method="first").astype(int)
cleaned_data['team'] = np.where(cleaned_data['annotation_code'].str.contains('d'), "Defense", "Offense")
cleaned_data['dtb_team_rank'] = cleaned_data.groupby(['id', 'team'])['dtb'].rank(method="first").astype(int)

In [6]:
# Pivot the dataframe for each play
pivot_df = cleaned_data.pivot_table(index=['id', 'is_oreb'], columns=['annotation_code'], values=['court_x', 'court_y', 'dtb'])
pivot_df.columns = ['{}_{}'.format(col[0], col[1]) for col in pivot_df.columns]
pivot_df = pivot_df.reset_index()

pivot_df_2 = cleaned_data.pivot_table(index=['id', 'is_oreb'], columns=['dtb_rank'], values=['dtb'])
pivot_df_2.columns = ['{}_{}'.format(col[0], col[1]) for col in pivot_df_2.columns]
pivot_df_2 = pivot_df_2.reset_index()

pivot_df_3 = cleaned_data.pivot_table(index=['id', 'is_oreb'], columns=['dtb_team_rank', 'team'], values=['dtb'])
pivot_df_3.columns = ['{}_{}_{}'.format(col[0], col[1], col[2]) for col in pivot_df_3.columns]
pivot_df_3 = pivot_df_3.reset_index()

pivot_df = pd.merge(pivot_df, pivot_df_2, on=['id', 'is_oreb'], how='inner')
pivot_df = pd.merge(pivot_df, pivot_df_3, on=['id', 'is_oreb'], how='inner')


In [7]:
#Add some aggregate columns
pivot_df['total_offense_dtb'] = pivot_df[["dtb_t1", "dtb_t2", "dtb_t3", "dtb_t4", "dtb_s"]].sum(axis=1)
pivot_df['total_defense_dtb'] = pivot_df[["dtb_d1", "dtb_d2", "dtb_d3", "dtb_d4", "dtb_d5"]].sum(axis=1)
pivot_df['top2_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense"]].sum(axis=1)
pivot_df['top3_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense"]].sum(axis=1)
pivot_df['top4_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense"]].sum(axis=1)
pivot_df['top2_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense"]].sum(axis=1)
pivot_df['top3_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense"]].sum(axis=1)
pivot_df['top4_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense"]].sum(axis=1)
pivot_df['top1_any_dtb'] = pivot_df[["dtb_1"]].sum(axis=1)
pivot_df['top2_any_dtb'] = pivot_df[["dtb_1", "dtb_2"]].sum(axis=1)
pivot_df['top3_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3"]].sum(axis=1)
pivot_df['top4_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4"]].sum(axis=1)
pivot_df['top5_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5"]].sum(axis=1)
pivot_df['top6_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6"]].sum(axis=1)
pivot_df['top7_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7"]].sum(axis=1)
pivot_df['top8_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8"]].sum(axis=1)
pivot_df['top9_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8", "dtb_9"]].sum(axis=1)
count_less_than_5 = lambda row: sum(row < 5)
pivot_df['count_less_than_5'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8", "dtb_9", 'dtb_10']].iloc[:, 1:].apply(count_less_than_5, axis=1)
pivot_df['count_less_than_5_offense'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense", "dtb_5_Offense"]].iloc[:, 1:].apply(count_less_than_5, axis=1)
pivot_df['count_less_than_5_defense'] = pivot_df[["dtb_1_Defense", "dtb_2_Defense", "dtb_3_Defense", "dtb_4_Defense", "dtb_5_Defense"]].iloc[:, 1:].apply(count_less_than_5, axis=1)
count_less_than_10 = lambda row: sum(row < 5)
pivot_df['count_less_than_10'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8", "dtb_9", 'dtb_10']].iloc[:, 1:].apply(count_less_than_10, axis=1)
pivot_df['count_less_than_10_offense'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense", "dtb_5_Offense"]].iloc[:, 1:].apply(count_less_than_10, axis=1)
pivot_df['count_less_than_10_defense'] = pivot_df[["dtb_1_Defense", "dtb_2_Defense", "dtb_3_Defense", "dtb_4_Defense", "dtb_5_Defense"]].iloc[:, 1:].apply(count_less_than_10, axis=1)


print(pivot_df)

          id  is_oreb  court_x_d1  court_x_d2  court_x_d3  court_x_d4  \
0      10-10        0   16.757017   20.172220   13.667048   11.552852   
1      10-12        0    7.229075    7.038597    9.324267   19.419485   
2      10-13        0   26.028072    8.849291   14.908529    8.849291   
3       10-3        0   16.427338   12.561111   20.535393    8.937342   
4       10-6        0   21.055481   34.011315   12.042747   15.140892   
...      ...      ...         ...         ...         ...         ...   
30800  998-6        0    6.761558   19.689568    5.915878   11.836204   
30801  999-1        0   19.336079   23.806515    8.220410    9.670299   
30802  999-3        0   12.275106    6.344312   17.065397   19.574608   
30803  999-4        0    7.416399    8.556974   22.243458   22.927847   
30804  999-5        0    6.546554    6.755028   20.097206   20.514160   

       court_x_d5  court_x_s  court_x_t1  court_x_t2  ...  top6_any_dtb  \
0       12.853857  27.490582   18.545910   28.30

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

X = pivot_df.drop(['id', 'is_oreb'], axis=1)
Y = pivot_df['is_oreb']

# Create a SimpleImputer instance to replace NaN values with the mean of the column
imputer = SimpleImputer(strategy='mean')
scaler = StandardScaler()
X = imputer.fit_transform(X)
X = scaler.fit_transform(X)

training_images, testing_images, training_labels, testing_labels = train_test_split(X, 
                                                                   Y,
                                                                   test_size=0.2,
                                                                   shuffle=True,
                                                                   random_state=0)
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier

# Custom wrapper function for your Keras model with two parameters
def create_model(batch_size=32):
    model = Sequential()
    model.add(Dense(40, input_shape=(70,), activation='relu'))
    model.add(Dense(80, activation='relu'))
    model.add(Dense(batch_size, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Define the batch sizes you want to try
batch_sizes = list(range(8, 257, 8))

# Create the KerasClassifier
keras_classifier = KerasClassifier(build_fn=create_model, verbose=1)

# Create a parameter grid to search over
param_grid = dict(batch_size=batch_sizes)

# Create the GridSearchCV instance
grid = GridSearchCV(estimator=keras_classifier, param_grid=param_grid, cv=3)

# Fit the grid search on your data
grid_result = grid.fit(training_images, training_labels)

# Print the best results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  X, y = self._initialize(X, y)
2023-07-20 16:09:16.244518: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2023-07-20 16:09:16.244542: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2023-07-20 16:09:16.244548: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2023-07-20 16:09:16.244757: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-07-20 16:09:16.244771: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-07-20 16:09:16.582440: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:19.669690: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:20.243477: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:23.244670: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:23.777723: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:26.787207: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:27.329808: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:29.768633: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:30.249499: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:32.704056: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:33.179008: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:35.692624: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:36.166786: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:38.331357: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:38.838656: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:41.109192: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:41.561795: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:43.674454: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:44.106837: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:45.984857: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:46.395372: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:48.268451: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:48.655580: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:50.547622: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




  X, y = self._initialize(X, y)
2023-07-20 16:09:50.944527: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:52.669734: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)
2023-07-20 16:09:53.048445: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:54.730178: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/257 [..............................] - ETA: 3:41 - loss: 0.8204 - accuracy: 0.4688

2023-07-20 16:09:55.669880: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:57.555539: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)
2023-07-20 16:09:57.921733: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:09:59.511733: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)
2023-07-20 16:09:59.876103: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:01.464199: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/229 [..............................] - ETA: 1:04 - loss: 0.8160 - accuracy: 0.5139

2023-07-20 16:10:01.808642: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:03.377494: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/206 [..............................] - ETA: 59s - loss: 0.7744 - accuracy: 0.5375

2023-07-20 16:10:03.724406: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:05.176278: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/206 [..............................] - ETA: 58s - loss: 0.8363 - accuracy: 0.5375

2023-07-20 16:10:05.526696: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:06.983567: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/206 [..............................] - ETA: 59s - loss: 0.7464 - accuracy: 0.5750

2023-07-20 16:10:07.314101: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:08.784085: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/187 [..............................] - ETA: 58s - loss: 1.0018 - accuracy: 0.4205

2023-07-20 16:10:09.123707: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:10.540356: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/187 [..............................] - ETA: 52s - loss: 0.7537 - accuracy: 0.5455

2023-07-20 16:10:10.871359: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:12.239045: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/187 [..............................] - ETA: 53s - loss: 0.9015 - accuracy: 0.4091

2023-07-20 16:10:12.556112: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:13.966838: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)
2023-07-20 16:10:14.285448: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:15.623743: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/172 [..............................] - ETA: 55s - loss: 0.8211 - accuracy: 0.4688

2023-07-20 16:10:15.978999: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:17.299797: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/172 [..............................] - ETA: 51s - loss: 1.0036 - accuracy: 0.4375

2023-07-20 16:10:17.607890: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:18.922946: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/158 [..............................] - ETA: 48s - loss: 0.9546 - accuracy: 0.4808

2023-07-20 16:10:19.240419: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:20.527061: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/158 [..............................] - ETA: 46s - loss: 0.8413 - accuracy: 0.4135

2023-07-20 16:10:20.840581: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:22.072580: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/158 [..............................] - ETA: 46s - loss: 0.8122 - accuracy: 0.4615

2023-07-20 16:10:22.375280: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:23.616402: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)
2023-07-20 16:10:24.728337: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:26.011611: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/147 [..............................] - ETA: 42s - loss: 0.7326 - accuracy: 0.4911

2023-07-20 16:10:26.315880: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:27.506672: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/147 [..............................] - ETA: 42s - loss: 0.8152 - accuracy: 0.5268

2023-07-20 16:10:27.795961: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:29.006700: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)
2023-07-20 16:10:29.301617: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:30.521725: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/137 [..............................] - ETA: 40s - loss: 0.7800 - accuracy: 0.5417

2023-07-20 16:10:30.815396: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:31.942382: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/137 [..............................] - ETA: 41s - loss: 0.9051 - accuracy: 0.5250

2023-07-20 16:10:32.223089: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:33.372321: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)
2023-07-20 16:10:33.657501: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:34.774429: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/129 [..............................] - ETA: 37s - loss: 0.6812 - accuracy: 0.5938

2023-07-20 16:10:35.061431: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:36.170692: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/129 [..............................] - ETA: 37s - loss: 0.7246 - accuracy: 0.5312

2023-07-20 16:10:36.450278: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-20 16:10:37.552144: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
  X, y = self._initialize(X, y)


  1/617 [..............................] - ETA: 3:01 - loss: 0.8433 - accuracy: 0.5500

2023-07-20 16:10:37.844745: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Best: 0.717538 using {'batch_size': 40}


In [9]:
# Generate 5 random indices to select samples from X
num_random_samples = 5
random_indices = np.random.choice(X.shape[0], num_random_samples, replace=False)
# Select random samples from X and corresponding labels from Y using the random indices
random_samples = X[random_indices]
random_labels = Y[random_indices]
# Get predictions for the random samples using the trained model
predictions = model.predict(random_samples)
# Print the random samples, their corresponding actual labels from Y, and the model predictions
for i in range(num_random_samples):
    print("Random Sample {}: Actual Label: {}  Prediction: {:.2f}".format(
        i+1, random_labels.iloc[i], predictions[i][0]))

NameError: name 'model' is not defined

In [None]:
# from helpers import plot_basketball_court
locations = pd.read_csv('raw-data/test_locs.csv')
play_by_play = pd.read_csv('raw-data/test_pbp.csv')
print(play_by_play.shape)
#Feels like its cheating the problem at hand to predict based on teams or conferences
play_by_play = play_by_play.drop(['team', 'opponent', 'conference', 'opp_conference'], axis=1)
#Combine CSV data into one large table
merged_df = pd.merge(play_by_play, locations, on='id', how='left')
print(locations.shape)
print(play_by_play.shape)
#If the shooter has x greater than 47, mirror all points
shooter_locations = locations[(locations['annotation_code'] == 's')]
merged_df_2 = pd.merge(merged_df, shooter_locations, on='id', how='left', suffixes=('', '_shooter'))
#if merged_df_2['court_x_shooter'] > 47, then mirror court_x to be 94 - court_x
merged_df_2['court_x'] = merged_df_2.apply(lambda x: 94 - x['court_x'] if x['court_x_shooter'] > 47 else x['court_x'], axis=1)
cleaned_data = merged_df_2.drop(['court_x_shooter', 'court_y_shooter', 'annotation_code_shooter'], axis=1)
#add a column for dtb (distance to basket)
cleaned_data['dtb'] = np.linalg.norm(cleaned_data[['court_x', 'court_y']].values - [4, 25], axis=1)
# Add rankings
cleaned_data['dtb_rank'] = cleaned_data.groupby(['id'])['dtb'].rank(method="first").astype(int)
cleaned_data['team'] = np.where(cleaned_data['annotation_code'].str.contains('d'), "Defense", "Offense")
cleaned_data['dtb_team_rank'] = cleaned_data.groupby(['id', 'team'])['dtb'].rank(method="first").astype(int)
# Pivot the dataframe for each play
pivot_df = cleaned_data.pivot_table(index=['id'], columns=['annotation_code'], values=['court_x', 'court_y', 'dtb'])
pivot_df.columns = ['{}_{}'.format(col[0], col[1]) for col in pivot_df.columns]
pivot_df = pivot_df.reset_index()
pivot_df_2 = cleaned_data.pivot_table(index=['id'], columns=['dtb_rank'], values=['dtb'])
pivot_df_2.columns = ['{}_{}'.format(col[0], col[1]) for col in pivot_df_2.columns]
pivot_df_2 = pivot_df_2.reset_index()
pivot_df_3 = cleaned_data.pivot_table(index=['id'], columns=['dtb_team_rank', 'team'], values=['dtb'])
pivot_df_3.columns = ['{}_{}_{}'.format(col[0], col[1], col[2]) for col in pivot_df_3.columns]
pivot_df_3 = pivot_df_3.reset_index()
pivot_df = pd.merge(pivot_df, pivot_df_2, on=['id'], how='left')
pivot_df = pd.merge(pivot_df, pivot_df_3, on=['id'], how='left')
#Add some aggregate columns
pivot_df['total_offense_dtb'] = pivot_df[["dtb_t1", "dtb_t2", "dtb_t3", "dtb_t4", "dtb_s"]].sum(axis=1)
pivot_df['total_defense_dtb'] = pivot_df[["dtb_d1", "dtb_d2", "dtb_d3", "dtb_d4", "dtb_d5"]].sum(axis=1)
pivot_df['top2_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense"]].sum(axis=1)
pivot_df['top3_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense"]].sum(axis=1)
pivot_df['top4_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense"]].sum(axis=1)
pivot_df['top2_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense"]].sum(axis=1)
pivot_df['top3_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense"]].sum(axis=1)
pivot_df['top4_offense_dtb'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense"]].sum(axis=1)
pivot_df['top1_any_dtb'] = pivot_df[["dtb_1"]].sum(axis=1)
pivot_df['top2_any_dtb'] = pivot_df[["dtb_1", "dtb_2"]].sum(axis=1)
pivot_df['top3_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3"]].sum(axis=1)
pivot_df['top4_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4"]].sum(axis=1)
pivot_df['top5_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5"]].sum(axis=1)
pivot_df['top6_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6"]].sum(axis=1)
pivot_df['top7_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7"]].sum(axis=1)
pivot_df['top8_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8"]].sum(axis=1)
pivot_df['top9_any_dtb'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8", "dtb_9"]].sum(axis=1)
count_less_than_5 = lambda row: sum(row < 5)
pivot_df['count_less_than_5'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8", "dtb_9", 'dtb_10']].iloc[:, 1:].apply(count_less_than_5, axis=1)
pivot_df['count_less_than_5_offense'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense", "dtb_5_Offense"]].iloc[:, 1:].apply(count_less_than_5, axis=1)
pivot_df['count_less_than_5_defense'] = pivot_df[["dtb_1_Defense", "dtb_2_Defense", "dtb_3_Defense", "dtb_4_Defense", "dtb_5_Defense"]].iloc[:, 1:].apply(count_less_than_5, axis=1)
count_less_than_10 = lambda row: sum(row < 5)
pivot_df['count_less_than_10'] = pivot_df[["dtb_1", "dtb_2", "dtb_3", "dtb_4", "dtb_5", "dtb_6", "dtb_7", "dtb_8", "dtb_9", 'dtb_10']].iloc[:, 1:].apply(count_less_than_10, axis=1)
pivot_df['count_less_than_10_offense'] = pivot_df[["dtb_1_Offense", "dtb_2_Offense", "dtb_3_Offense", "dtb_4_Offense", "dtb_5_Offense"]].iloc[:, 1:].apply(count_less_than_10, axis=1)
pivot_df['count_less_than_10_defense'] = pivot_df[["dtb_1_Defense", "dtb_2_Defense", "dtb_3_Defense", "dtb_4_Defense", "dtb_5_Defense"]].iloc[:, 1:].apply(count_less_than_10, axis=1)

In [None]:
X = pivot_df.drop(['id'], axis=1)

# Create a SimpleImputer instance to replace NaN values with the mean of the column
imputer = SimpleImputer(strategy='mean')
scaler = StandardScaler()
X = imputer.fit_transform(X)
X = scaler.fit_transform(X)

Yhat = model.predict(X)

result_pivot_df = pivot_df
result_pivot_df['pred'] = Yhat

result_pivot_df['pred'] = result_pivot_df['pred'].apply(lambda x: round(x, 3))

# Print the resulting DataFrame with 'id' and 'Yhat' columns
print(result_pivot_df[['id', 'pred']])
result_pivot_df[['id', 'pred']].to_csv('entry1.csv', encoding='utf-8', index=False)