# Celtics Models

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report

## Model A
#### Categorizing offensive, defensive and dual threat players

##### Preprocessing

In [12]:

# Load the data
data = pd.read_csv('data/averages.csv')
print(data.head())

# Select relevant columns including 'Rk' for reference
data = data[['Rk', 'PTS', 'FT', 'TRB', 'BLK', 'ODtag']]

# Display the first few rows of the data
print(data.head())

# Check for infinite values and replace them with NaN
data.replace([np.inf, -np.inf], np.nan, inplace=True)

# Drop rows with missing values
data.dropna(inplace=True)

# Check for unique values in 'ODtag'
print("Unique values in 'ODtag':", data['ODtag'].unique())

# Ensure target labels are zero-based
data['ODtag'] = data['ODtag'] - 1

# Check again for unique values in 'ODtag'
print("Adjusted unique values in 'ODtag':", data['ODtag'].unique())

# Separate features and target
X = data.drop('ODtag', axis=1)
y = data['ODtag']

# Store the 'Rk' column separately for reference
player_ids = X['Rk']
X = X.drop('Rk', axis=1)

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test, player_ids_train, player_ids_test = train_test_split(X_scaled, y, player_ids, test_size=0.2, random_state=42)

# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(X_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)



   Rk              Player  Age   G  GS    MP   FG   FGA    FG%   3P  ...  TRB  \
0   1        Jayson Tatum   25  74  74  35.7  9.1  19.3  0.471  3.1  ...  8.1   
1   2        Jaylen Brown   27  70  70  33.5  9.0  17.9  0.499  2.1  ...  5.5   
2   3        Jrue Holiday   33  69  69  32.8  4.8  10.0  0.480  2.0  ...  5.4   
3   4       Derrick White   29  73  73  32.6  5.3  11.5  0.461  2.7  ...  4.2   
4   5  Kristaps Porziņģis   28  57  57  29.6  6.8  13.2  0.516  1.9  ...  7.2   

   AST  STL  BLK  TOV   PF   PTS  First Name  Last Name  ODtag  
0  4.9  1.0  0.6  2.5  2.0  26.9      Jayson      Tatum      1  
1  3.6  1.2  0.5  2.4  2.6  23.0      Jaylen      Brown      3  
2  4.8  0.9  0.8  1.8  1.6  12.5        Jrue    Holiday      3  
3  5.2  1.0  1.2  1.5  2.1  15.2     Derrick      White      3  
4  2.0  0.7  1.9  1.6  2.7  20.1    Kristaps  Porziņģis      1  

[5 rows x 31 columns]
   Rk   PTS   FT  TRB  BLK  ODtag
0   1  26.9  5.6  8.1  0.6      1
1   2  23.0  3.0  5.5  0.5      

##### Model

In [15]:

# Build the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')  # Assuming 3 classes: Offensive (0), Defensive (1), Dual (2)
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_dataset, epochs=100, validation_data=test_dataset)

# Evaluate the model
loss, accuracy = model.evaluate(test_dataset)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

# Make predictions
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

# Combine player IDs with predictions
predictions = pd.DataFrame({'Rk': player_ids_test, 'True_Label': y_test, 'Predicted_Label': y_pred_labels})

# Display classification report
print(classification_report(y_test, y_pred_labels, target_names=['Offensive', 'Defensive', 'Dual']))

# Print predictions for reference
print(predictions.head(20))  # Displaying first 20 predictions for reference

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
