In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from bisect import bisect_left
from tensorflow.keras import regularizers
from tensorflow.keras.layers import LeakyReLU, Dropout, BatchNormalization, Softmax
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer

In [26]:
data = pd.read_csv('final_matches_with_records.csv')
data.fillna(0)

#Need to tokenize player names
# Player name columns
player_name_columns = [col for col in data.columns if 'player' in col and 'name' in col]

# Create the tokenizer
tokenizer = Tokenizer(filters='', split='\t')  # Disable any filtering or splitting

# Fit the tokenizer on the whole names without splitting them
all_names = []
for col in player_name_columns:
    all_names.extend(data[col].fillna('').values)  # Collect all names

tokenizer.fit_on_texts(all_names)

# Tokenize each player name column, treating the entire name as a single token
for col in player_name_columns:
    # Map each name to its token
    data[col] = data[col].fillna('').map(lambda name: tokenizer.texts_to_sequences([name])[0][0] if name else 0)

# Check the output
print(data[player_name_columns].head())

features = data[['home_away_indicator', 'real_api_id', 'other_team_id', 'real_winprob', 'tie_prob', 'other_winprob', 'real_player_1_name', 'real_player_1_rating', 'real_player_1_potential', 'real_player_2_name', 'real_player_2_rating', 'real_player_2_potential', 'real_player_3_name', 'real_player_3_rating', 'real_player_3_potential', 'real_player_4_name', 'real_player_4_rating', 'real_player_4_potential', 'real_player_5_name', 'real_player_5_rating', 'real_player_5_potential', 'real_player_6_name', 'real_player_6_rating', 'real_player_6_potential', 'real_player_7_name', 'real_player_7_rating', 'real_player_7_potential', 'real_player_8_name', 'real_player_8_rating', 'real_player_8_potential', 'real_player_9_name', 'real_player_9_rating', 'real_player_9_potential', 'real_player_10_name', 'real_player_10_rating', 'real_player_10_potential', 'real_player_11_name', 'real_player_11_rating', 'real_player_11_potential', 'other_player_1_name', 'other_player_1_rating', 'other_player_1_potential', 'other_player_2_name', 'other_player_2_rating', 'other_player_2_potential', 'other_player_3_name', 'other_player_3_rating', 'other_player_3_potential', 'other_player_4_name', 'other_player_4_rating', 'other_player_4_potential', 'other_player_5_name', 'other_player_5_rating', 'other_player_5_potential', 'other_player_6_name', 'other_player_6_rating', 'other_player_6_potential', 'other_player_7_name', 'other_player_7_rating', 'other_player_7_potential', 'other_player_8_name', 'other_player_8_rating', 'other_player_8_potential', 'other_player_9_name', 'other_player_9_rating', 'other_player_9_potential', 'other_player_10_name', 'other_player_10_rating', 'other_player_10_potential', 'other_player_11_name', 'other_player_11_rating', 'other_player_11_potential', 'real_wins', 'real_draws', 'real_losses',  'other_wins', 'other_draws', 'other_losses']]
label = data [['result']]
label = to_categorical(label, num_classes=3)
print("Features shape: ", features.shape)
print('Target shape: ', label.shape)

X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.2, random_state=42)

scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

   real_player_1_name  real_player_2_name  real_player_3_name  \
0                   4                   1                   5   
1                   4                   1                  32   
2                   4                   1                  32   
3                   4                  42                   1   
4                   4                  42                   1   

   real_player_4_name  real_player_5_name  real_player_6_name  \
0                  36                   3                  66   
1                  36                   3                  28   
2                  36                   3                 386   
3                   5                   3                  43   
4                   5                   3                  43   

   real_player_7_name  real_player_8_name  real_player_9_name  \
0                  38                   9                  25   
1                  38                  31                  43   
2                  28  

In [27]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

input_shape = X_train.shape[1]
model = Sequential([
    Dense(64, input_shape=(input_shape,)),  # First layer specifies input shape and has 64 neurons
    Activation('relu'),                     # ReLU activation function
    Dense(32),                              # Second layer with 32 neurons
    Activation('relu'),                     # ReLU activation function
    Dense(3),                               # Output layer: number of neurons equals number of classes (3 in your case)
    Activation('softmax')                   # Softmax activation function for multi-class classification
])

model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=200, validation_split=0.2, batch_size=32)

test_loss = model.evaluate(X_test, y_test)

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.1620 - loss: nan - val_accuracy: 0.1224 - val_loss: nan
Epoch 2/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1442 - loss: nan - val_accuracy: 0.1224 - val_loss: nan
Epoch 3/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1565 - loss: nan - val_accuracy: 0.1224 - val_loss: nan
Epoch 4/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1755 - loss: nan - val_accuracy: 0.1224 - val_loss: nan
Epoch 5/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1684 - loss: nan - val_accuracy: 0.1224 - val_loss: nan
Epoch 6/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1422 - loss: nan - val_accuracy: 0.1224 - val_loss: nan
Epoch 7/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy