In [1]:
from sklearn import tree
import pandas as pd
import os

# Read the CSV and Perform Basic Data Cleaning

In [2]:
df = pd.read_csv("../../data/chess.csv")
df.head()

Unnamed: 0,id,format,victory_status,book_moves,opening_name,winner,turns,white_id,white_rating,black_id,black_rating
0,l1NXvwaE,Blitz,resign,4,Nimzowitsch Defense: Kennedy Variation,black,16,a-00,1322,skinnerua,1261
1,mIICvQHh,Blitz,mate,3,King's Pawn Game: Leonardis Variation,white,61,ischia,1496,a-00,1500
2,kWKvrqYL,Classical,mate,3,Queen's Pawn Game: Zukertort Variation,white,61,daniamurashov,1439,adivanov2009,1454
3,9tXo1AUZ,Classical,mate,5,Philidor Defense,white,95,nik221107,1523,adivanov2009,1469
4,qwU9rasv,Rapid,resign,10,Blackmar-Diemer Gambit: Pietrowsky Defense,white,33,capa_jr,1520,daniel_likes_chess,1423


# Select your features (columns)

In [3]:
# Set features. This will also be used as your x values.
X = df.drop(columns=['id', 'winner'])
X.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
0,Blitz,resign,4,Nimzowitsch Defense: Kennedy Variation,16,a-00,1322,skinnerua,1261
1,Blitz,mate,3,King's Pawn Game: Leonardis Variation,61,ischia,1496,a-00,1500
2,Classical,mate,3,Queen's Pawn Game: Zukertort Variation,61,daniamurashov,1439,adivanov2009,1454
3,Classical,mate,5,Philidor Defense,95,nik221107,1523,adivanov2009,1469
4,Rapid,resign,10,Blackmar-Diemer Gambit: Pietrowsky Defense,33,capa_jr,1520,daniel_likes_chess,1423


In [4]:
# label encode categorical data
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

from tensorflow.keras.utils import to_categorical

In [5]:
label_encoder.fit(df['format'])
label_encoder.classes_

X_format = label_encoder.transform(df['format'])
X_format = X_format.reshape(X.shape[0])

X['format'] = to_categorical(X_format)#.astype('int')

In [6]:
label_encoder.fit(df['victory_status'])
label_encoder.classes_

X_victory_status = label_encoder.transform(df['victory_status'])
X_victory_status = X_victory_status.reshape(X.shape[0])

X['victory_status'] = to_categorical(X_victory_status)#.astype('int')

In [7]:
label_encoder.fit(df['opening_name'])
label_encoder.classes_

X_opening_name = label_encoder.transform(df['opening_name'])
X_opening_name = X_opening_name.reshape(X.shape[0])

X['opening_name'] = to_categorical(X_opening_name)#.astype('int')

In [8]:
label_encoder.fit(df['white_id'])
label_encoder.classes_

X_white_id = label_encoder.transform(df['white_id'])
X_white_id = X_white_id.reshape(X.shape[0])

X['white_id'] = to_categorical(X_white_id)#.astype('int')

In [9]:
label_encoder.fit(df['black_id'])
label_encoder.classes_

X_black_id = label_encoder.transform(df['black_id'])
X_black_id = X_black_id.reshape(X.shape[0])

X['black_id'] = to_categorical(X_white_id)#.astype('int')

In [10]:
X.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
0,1.0,0.0,4,0.0,16,0.0,1322,0.0,1261
1,1.0,0.0,3,0.0,61,0.0,1496,0.0,1500
2,0.0,0.0,3,0.0,61,0.0,1439,0.0,1454
3,0.0,0.0,5,0.0,95,0.0,1523,0.0,1469
4,0.0,0.0,10,0.0,33,0.0,1520,0.0,1423


In [11]:
label_encoder.fit(df['winner'])
label_encoder.classes_

array(['black', 'draw', 'white'], dtype=object)

In [12]:
y_values = label_encoder.transform(df['winner'])#.astype('int')
y = y_values.reshape(-1, 1)

# y = to_categorical(y_values)
type(y)

numpy.ndarray

# Create a Train Test Split


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [14]:
X_train.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
7971,0.0,0.0,1,0.0,69,0.0,1695,0.0,1702
12751,0.0,0.0,6,0.0,93,0.0,1792,0.0,1688
14657,0.0,0.0,1,0.0,85,0.0,1486,0.0,1350
2347,0.0,0.0,1,0.0,25,0.0,1848,0.0,1186
8856,0.0,0.0,3,0.0,86,0.0,1576,0.0,1638


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [15]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler
X_minmax = MinMaxScaler().fit(X_train)
y_minmax = MinMaxScaler().fit(y_train)

X_train_scaled = X_minmax.transform(X_train)
X_test_scaled = X_minmax.transform(X_test)

print(f"Y Shape: {y_train.shape}")
print(f"X Shape: {X_train_scaled.shape}")

Y Shape: (12116, 1)
X Shape: (12116, 9)


# Train the Model



In [16]:
from tensorflow.keras.models import Sequential

model = Sequential()
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x21a749a7358>

In [17]:
from tensorflow.keras.layers import Dense
number_inputs = 9
number_hidden_nodes = 4
model.add(Dense(units=number_hidden_nodes, activation='relu', input_dim=number_inputs))

In [18]:
number_classes = 3
model.add(Dense(units=number_classes, activation='softmax'))

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4)                 40        
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 15        
Total params: 55
Trainable params: 55
Non-trainable params: 0
_________________________________________________________________


In [22]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(
    X_train_scaled,
    y_train,
    epochs=500,
    shuffle=True,
    verbose=2
)

Train on 12116 samples
Epoch 1/500
12116/12116 - 1s - loss: 0.8910 - accuracy: 0.5906
Epoch 2/500
12116/12116 - 0s - loss: 0.7285 - accuracy: 0.6326
Epoch 3/500
12116/12116 - 0s - loss: 0.6662 - accuracy: 0.6514
Epoch 4/500


In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

# Save the Model

In [None]:
# import joblib
# filename = 'LogisticRegression.sav'
# joblib.dump(grid, filename)