In [780]:
# Import Packages

import pandas as pd
import numpy as np
import os
import pickle

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
from tcn import TCN
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Normalization, Conv1D, MaxPooling1D, Flatten, Dense, LSTM, GRU, concatenate, Dropout
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential

In [781]:
with open('all_batting_data.pkl', 'rb') as file:
    batting_data = pickle.load(file)

with open('all_pitching_data.pkl', 'rb') as file:
    pitching_data = pickle.load(file)

with open('all_fielding_data.pkl', 'rb') as file:
    fielding_data = pickle.load(file)

print("Data loaded from 'all_batting_data.pkl', 'all_pitching_data.pkl', and 'all_fielding_data.pkl'.")

Data loaded from 'all_batting_data.pkl', 'all_pitching_data.pkl', and 'all_fielding_data.pkl'.


In [782]:
print(batting_data.shape)
print(pitching_data.shape)
print(fielding_data.shape)

(266, 25, 19)
(266, 25, 16)
(266, 25, 10)


In [783]:
map = False

playoff_teams = pd.read_csv('playoff_teams.csv')
success = playoff_teams['Level of Success']
if map:
    mappings = {'WC': 'First Round', 'DS': 'First Round', 'LS': 'League Series', 'WS': 'World Series', 'C': 'World Series'}
    success = success.map(mappings)
success = success.to_numpy()

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(success), y=success)
class_weights = dict(enumerate(class_weights))

In [784]:
categories, inverse = np.unique(success, return_inverse=True)
y_all = np.zeros((success.size, categories.size))
y_all[np.arange(success.size), inverse] = 1

In [785]:
y_all.shape

(266, 5)

In [786]:
batting_data = batting_data.astype(np.float64)
batting_data = np.nan_to_num(batting_data, nan=0, posinf=20)
pitching_data = pitching_data.astype(np.float64)
pitching_data = np.nan_to_num(pitching_data, nan=0, posinf=20)
fielding_data = fielding_data.astype(np.float64)
fielding_data = np.nan_to_num(fielding_data, nan=0, posinf=20)

X_batting_train, X_batting_test, X_pitching_train, X_pitching_test, X_fielding_train, X_fielding_test, y_train, y_test = train_test_split(batting_data,
                                                                                                                                          pitching_data,
                                                                                                                                          fielding_data,
                                                                                                                                          y_all, test_size=0.2, random_state=98)

X_batting_train, X_batting_val, X_pitching_train, X_pitching_val, X_fielding_train, X_fielding_val, y_train, y_val = train_test_split(X_batting_train,
                                                                                                                                      X_pitching_train,
                                                                                                                                      X_fielding_train,
                                                                                                                                      y_train, test_size=0.2, random_state=98*2)

In [787]:
X_fielding_train.shape

(169, 25, 10)

In [788]:
combined_inputs = np.concatenate((X_batting_train, X_pitching_train, X_fielding_train), axis=-1)
n_samples, time_steps, features = combined_inputs.shape
X_flattened = combined_inputs.reshape(n_samples, -1)
smote = SMOTE(sampling_strategy='not majority')
X_resampled, y_resampled = smote.fit_resample(X_flattened, y_train)
X_resampled = X_resampled.reshape(-1, time_steps, features)

X_batting_resampled = X_resampled[:, :, :X_batting_train.shape[-1]]
print(0, X_batting_train.shape[-1])
X_pitching_resampled = X_resampled[:, :, X_batting_train.shape[-1]:X_batting_train.shape[-1]+X_pitching_train.shape[-1]]
print(X_batting_train.shape[-1], X_batting_train.shape[-1]+X_pitching_train.shape[-1])
X_fielding_resampled = X_resampled[:, :, X_batting_train.shape[-1]+X_pitching_train.shape[-1]:]
print(X_batting_train.shape[-1]+X_pitching_train.shape[-1], X_resampled.shape[-1])

0 19
19 35
35 45


In [789]:
X_fielding_resampled.shape

(375, 25, 10)

In [790]:
norm_batting_layer = Normalization()
norm_batting_layer.adapt(X_batting_resampled)

norm_pitching_layer = Normalization()
norm_pitching_layer.adapt(X_pitching_resampled)

norm_fielding_layer = Normalization()
norm_fielding_layer.adapt(X_fielding_resampled)

In [791]:
from tensorflow.keras.layers import LeakyReLU

batting_input = Input((X_batting_train.shape[1], X_batting_train.shape[2]))
pitching_input = Input((X_pitching_train.shape[1], X_pitching_train.shape[2]))
fielding_input = Input((X_fielding_train.shape[1], X_fielding_train.shape[2]))
batting_input = norm_batting_layer(batting_input)
pitching_input = norm_pitching_layer(pitching_input)
fielding_input = norm_fielding_layer(fielding_input)

x_b = Dense(256, activation=LeakyReLU(alpha=0.01), kernel_initializer=HeNormal())(batting_input)
x_p = Dense(256, activation=LeakyReLU(alpha=0.01), kernel_initializer=HeNormal())(pitching_input)
x_f = Dense(256, activation=LeakyReLU(alpha=0.01), kernel_initializer=HeNormal())(fielding_input)

# x_b = Dropout(0.1)(x_b)
# x_p = Dropout(0.1)(x_p)
# x_f = Dropout(0.1)(x_f)

for i in range(3):
    x_b = Conv1D(128, 3, activation=LeakyReLU(alpha=0.01))(x_b)
    x_p = Conv1D(128, 3, activation=LeakyReLU(alpha=0.01))(x_p)
    x_f = Conv1D(128, 3, activation=LeakyReLU(alpha=0.01))(x_f)

    # x_b = Dropout(0.1)(x_b)
    # x_p = Dropout(0.1)(x_p)
    # x_f = Dropout(0.1)(x_f)

x = concatenate((x_b, x_p, x_f), axis=-1)
# x = Conv1D(512, 3, activation=LeakyReLU(alpha=0.01))(x)
# # x = Dropout(0.1)(x)
# for i in range(2):
#     x = Conv1D(256, 3, activation=LeakyReLU(alpha=0.01))(x)
#     # x = Dropout(0.1)(x)

# x = Conv1D(128, 3, activation=LeakyReLU(alpha=0.01))(x)
x = Flatten()(x)

# x_b = Flatten()(x_b)
# x_p = Flatten()(x_p)
# x_f = Flatten()(x_f)
# x = concatenate([x_b, x_p, x_f])
x = Dense(128)(x)
# x = Dropout(0.1)(x)
x = Dense(64)(x)
x = Dense(32)(x)
out = Dense(y_all.shape[1], activation='softmax')(x)

player_model = Model([batting_input, pitching_input, fielding_input], out)

player_model.summary()



In [792]:
optimizer = Adam(learning_rate=0.00001, clipvalue=1.0)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

player_model.compile(optimizer=optimizer,
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])

In [793]:
player_model.fit([X_batting_resampled, X_pitching_resampled, X_fielding_resampled], y_resampled, batch_size=2, epochs=10, validation_batch_size=16, validation_data=([X_batting_val, X_pitching_val, X_fielding_val], y_val), callbacks=early_stopping)

Epoch 1/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.1597 - loss: 91.1490 - val_accuracy: 0.2558 - val_loss: 46.4051
Epoch 2/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.3383 - loss: 40.2851 - val_accuracy: 0.1395 - val_loss: 50.9021
Epoch 3/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.3771 - loss: 29.0367 - val_accuracy: 0.4186 - val_loss: 38.3017
Epoch 4/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.4433 - loss: 23.4117 - val_accuracy: 0.1628 - val_loss: 38.6024
Epoch 5/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.4323 - loss: 17.6389 - val_accuracy: 0.1860 - val_loss: 33.8689
Epoch 6/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.5268 - loss: 12.7267 - val_accuracy: 0.1860 - val_loss: 36.1084
Epoch 7/1

<keras.src.callbacks.history.History at 0x195981450>

In [794]:
player_model.evaluate([X_batting_test, X_pitching_test, X_fielding_test], y_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.3927 - loss: 38.6754


[38.398067474365234, 0.37037035822868347]

In [795]:
batting_all = batting_data.astype(np.float64)
batting_all = np.nan_to_num(batting_all)
pitching_all = pitching_data.astype(np.float64)
pitching_all = np.nan_to_num(pitching_all)
fielding_all = fielding_data.astype(np.float64)
fielding_all = np.nan_to_num(fielding_all)

X_all = [batting_all, pitching_all, fielding_all]
player_predictions = player_model.predict(X_all)
player_predictions = pd.DataFrame(player_predictions)
player_predictions.columns = categories
if map:
    player_predictions.to_csv('player_predictions_mapped.csv')
else:
    player_predictions.to_csv('player_predictions.csv')

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step


In [796]:
player_predictions = player_model.predict([X_batting_test, X_pitching_test, X_fielding_test])
player_predictions = pd.DataFrame(player_predictions)
player_predictions.columns = categories

y_test_temp = pd.DataFrame(y_test)
y_test_temp.columns = categories

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


In [797]:
player_predictions.head()

Unnamed: 0,C,DS,LS,WC,WS
0,0.0,1.888891e-38,0.0,0.0,0.9999999
1,0.0,0.9999999,3.0334930000000003e-29,0.0,3.9025110000000005e-33
2,0.0,0.9999999,1.9034370000000002e-23,3.073345e-26,1.9085490000000002e-22
3,0.0,0.9999999,8.398358000000001e-23,0.0,8.105898999999999e-19
4,5.080761999999999e-38,0.9999999,0.0,0.0,3.205643e-08


In [798]:
y_test_temp.head()

Unnamed: 0,C,DS,LS,WC,WS
0,1.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0
2,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0


In [799]:
if map:
    player_predictions['SUM'] = player_predictions['First Round'] + player_predictions['League Series'] + player_predictions['World Series']
else:
    player_predictions['SUM'] = player_predictions['C'] + player_predictions['DS'] + player_predictions['LS'] + player_predictions['WC'] + player_predictions['WS']

In [800]:
player_predictions.head()

Unnamed: 0,C,DS,LS,WC,WS,SUM
0,0.0,1.888891e-38,0.0,0.0,0.9999999,1.0
1,0.0,0.9999999,3.0334930000000003e-29,0.0,3.9025110000000005e-33,1.0
2,0.0,0.9999999,1.9034370000000002e-23,3.073345e-26,1.9085490000000002e-22,1.0
3,0.0,0.9999999,8.398358000000001e-23,0.0,8.105898999999999e-19,1.0
4,5.080761999999999e-38,0.9999999,0.0,0.0,3.205643e-08,1.0


In [801]:
y_test_temp.sum()

C      6.0
DS    21.0
LS    10.0
WC     8.0
WS     9.0
dtype: float64