In [1]:
from sklearn import tree
import pandas as pd
import os

# Read the CSV and Perform Basic Data Cleaning

In [2]:
df = pd.read_csv("../../data/chess.csv")
df.head()

Unnamed: 0,id,format,victory_status,book_moves,opening_name,winner,turns,white_id,white_rating,black_id,black_rating
0,l1NXvwaE,Blitz,resign,4,Nimzowitsch Defense: Kennedy Variation,black,16,a-00,1322,skinnerua,1261
1,mIICvQHh,Blitz,mate,3,King's Pawn Game: Leonardis Variation,white,61,ischia,1496,a-00,1500
2,kWKvrqYL,Classical,mate,3,Queen's Pawn Game: Zukertort Variation,white,61,daniamurashov,1439,adivanov2009,1454
3,9tXo1AUZ,Classical,mate,5,Philidor Defense,white,95,nik221107,1523,adivanov2009,1469
4,qwU9rasv,Rapid,resign,10,Blackmar-Diemer Gambit: Pietrowsky Defense,white,33,capa_jr,1520,daniel_likes_chess,1423


# Select your features (columns)

In [3]:
# Set features. This will also be used as your x values.
X = df.drop(columns=['id', 'winner'])
X.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
0,Blitz,resign,4,Nimzowitsch Defense: Kennedy Variation,16,a-00,1322,skinnerua,1261
1,Blitz,mate,3,King's Pawn Game: Leonardis Variation,61,ischia,1496,a-00,1500
2,Classical,mate,3,Queen's Pawn Game: Zukertort Variation,61,daniamurashov,1439,adivanov2009,1454
3,Classical,mate,5,Philidor Defense,95,nik221107,1523,adivanov2009,1469
4,Rapid,resign,10,Blackmar-Diemer Gambit: Pietrowsky Defense,33,capa_jr,1520,daniel_likes_chess,1423


In [4]:
# label encode categorical data
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

from tensorflow.keras.utils import to_categorical

In [5]:
label_encoder.fit(df['format'])
label_encoder.classes_

X_format = label_encoder.transform(df['format'])
X_format = X_format.reshape(X.shape[0])

X['format'] = to_categorical(X_format)#.astype('int')

In [6]:
label_encoder.fit(df['victory_status'])
label_encoder.classes_

X_victory_status = label_encoder.transform(df['victory_status'])
X_victory_status = X_victory_status.reshape(X.shape[0])

X['victory_status'] = to_categorical(X_victory_status)#.astype('int')

In [7]:
label_encoder.fit(df['opening_name'])
label_encoder.classes_

X_opening_name = label_encoder.transform(df['opening_name'])
X_opening_name = X_opening_name.reshape(X.shape[0])

X['opening_name'] = to_categorical(X_opening_name)#.astype('int')

In [8]:
label_encoder.fit(df['white_id'])
label_encoder.classes_

X_white_id = label_encoder.transform(df['white_id'])
X_white_id = X_white_id.reshape(X.shape[0])

X['white_id'] = to_categorical(X_white_id)#.astype('int')

In [9]:
label_encoder.fit(df['black_id'])
label_encoder.classes_

X_black_id = label_encoder.transform(df['black_id'])
X_black_id = X_black_id.reshape(X.shape[0])

X['black_id'] = to_categorical(X_white_id)#.astype('int')

In [10]:
X.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
0,1.0,0.0,4,0.0,16,0.0,1322,0.0,1261
1,1.0,0.0,3,0.0,61,0.0,1496,0.0,1500
2,0.0,0.0,3,0.0,61,0.0,1439,0.0,1454
3,0.0,0.0,5,0.0,95,0.0,1523,0.0,1469
4,0.0,0.0,10,0.0,33,0.0,1520,0.0,1423


In [11]:
label_encoder.fit(df['winner'])
label_encoder.classes_

array(['black', 'draw', 'white'], dtype=object)

In [12]:
y_values = label_encoder.transform(df['winner'])#.astype('int')
y = y_values.reshape(-1, 1)

# y = to_categorical(y_values)
type(y)

numpy.ndarray

# Create a Train Test Split


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [14]:
X_train.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
7971,0.0,0.0,1,0.0,69,0.0,1695,0.0,1702
12751,0.0,0.0,6,0.0,93,0.0,1792,0.0,1688
14657,0.0,0.0,1,0.0,85,0.0,1486,0.0,1350
2347,0.0,0.0,1,0.0,25,0.0,1848,0.0,1186
8856,0.0,0.0,3,0.0,86,0.0,1576,0.0,1638


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [15]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler
X_minmax = MinMaxScaler().fit(X_train)
y_minmax = MinMaxScaler().fit(y_train)

X_train_scaled = X_minmax.transform(X_train)
X_test_scaled = X_minmax.transform(X_test)

print(f"Y Shape: {y_train.shape}")
print(f"X Shape: {X_train_scaled.shape}")

Y Shape: (12116, 1)
X Shape: (12116, 9)


# Train the Model



In [16]:
from tensorflow.keras.models import Sequential

model = Sequential()
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x28a2b44f9e8>

In [None]:
from tensorflow.keras.layers import Dense
number_inputs = 9
number_hidden_nodes = 4
model.add(Dense(units=number_hidden_nodes, activation='relu', input_dim=number_inputs))

In [None]:
number_classes = 3
model.add(Dense(units=number_classes, activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(
    X_train_scaled,
    y_train,
    epochs=500,
    shuffle=True,
    verbose=2
)

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Check 8 nodes and 12 nodes

In [24]:
from tensorflow.keras.layers import Dense
number_inputs = 9
# number_hidden_nodes = 8
number_hidden_nodes = 12
model.add(Dense(units=number_hidden_nodes, activation='relu', input_dim=number_inputs))

In [25]:
number_classes = 3
model.add(Dense(units=number_classes, activation='softmax'))

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 8)                 80        
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 27        
_________________________________________________________________
dense_2 (Dense)              (None, 12)                48        
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 39        
Total params: 107
Trainable params: 107
Non-trainable params: 0
_________________________________________________________________


In [27]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [28]:
model.fit(
    X_train_scaled,
    y_train,
    epochs=500,
    shuffle=True,
    verbose=2
)

Train on 12116 samples
Epoch 1/500
12116/12116 - 1s - loss: 0.8217 - accuracy: 0.5210
Epoch 2/500
12116/12116 - 0s - loss: 0.6901 - accuracy: 0.5447
Epoch 3/500
12116/12116 - 0s - loss: 0.6400 - accuracy: 0.6417
Epoch 4/500
12116/12116 - 0s - loss: 0.6082 - accuracy: 0.6635
Epoch 5/500
12116/12116 - 0s - loss: 0.6017 - accuracy: 0.6652
Epoch 6/500
12116/12116 - 0s - loss: 0.6001 - accuracy: 0.6640
Epoch 7/500
12116/12116 - 0s - loss: 0.5989 - accuracy: 0.6664
Epoch 8/500
12116/12116 - 0s - loss: 0.5987 - accuracy: 0.6632
Epoch 9/500
12116/12116 - 0s - loss: 0.5977 - accuracy: 0.6643
Epoch 10/500
12116/12116 - 0s - loss: 0.5977 - accuracy: 0.6666
Epoch 11/500
12116/12116 - 0s - loss: 0.5974 - accuracy: 0.6651
Epoch 12/500
12116/12116 - 0s - loss: 0.5973 - accuracy: 0.6641
Epoch 13/500
12116/12116 - 0s - loss: 0.5975 - accuracy: 0.6632
Epoch 14/500
12116/12116 - 0s - loss: 0.5975 - accuracy: 0.6623
Epoch 15/500
12116/12116 - 0s - loss: 0.5967 - accuracy: 0.6654
Epoch 16/500
12116/12116 -

Epoch 129/500
12116/12116 - 0s - loss: 0.5947 - accuracy: 0.6608
Epoch 130/500
12116/12116 - 0s - loss: 0.5946 - accuracy: 0.6647
Epoch 131/500
12116/12116 - 0s - loss: 0.5943 - accuracy: 0.6651
Epoch 132/500
12116/12116 - 0s - loss: 0.5936 - accuracy: 0.6611
Epoch 133/500
12116/12116 - 0s - loss: 0.5939 - accuracy: 0.6661
Epoch 134/500
12116/12116 - 0s - loss: 0.5940 - accuracy: 0.6649
Epoch 135/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6658
Epoch 136/500
12116/12116 - 0s - loss: 0.5940 - accuracy: 0.6669
Epoch 137/500
12116/12116 - 0s - loss: 0.5940 - accuracy: 0.6650
Epoch 138/500
12116/12116 - 0s - loss: 0.5940 - accuracy: 0.6647
Epoch 139/500
12116/12116 - 0s - loss: 0.5941 - accuracy: 0.6674
Epoch 140/500
12116/12116 - 0s - loss: 0.5936 - accuracy: 0.6642
Epoch 141/500
12116/12116 - 0s - loss: 0.5941 - accuracy: 0.6642
Epoch 142/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6649
Epoch 143/500
12116/12116 - 0s - loss: 0.5943 - accuracy: 0.6642
Epoch 144/500
12116/12116

12116/12116 - 0s - loss: 0.5939 - accuracy: 0.6652
Epoch 256/500
12116/12116 - 0s - loss: 0.5938 - accuracy: 0.6647
Epoch 257/500
12116/12116 - 0s - loss: 0.5936 - accuracy: 0.6633
Epoch 258/500
12116/12116 - 0s - loss: 0.5940 - accuracy: 0.6645
Epoch 259/500
12116/12116 - 0s - loss: 0.5942 - accuracy: 0.6634
Epoch 260/500
12116/12116 - 0s - loss: 0.5939 - accuracy: 0.6646
Epoch 261/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6679
Epoch 262/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6651
Epoch 263/500
12116/12116 - 0s - loss: 0.5939 - accuracy: 0.6615
Epoch 264/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6628
Epoch 265/500
12116/12116 - 0s - loss: 0.5938 - accuracy: 0.6642
Epoch 266/500
12116/12116 - 0s - loss: 0.5939 - accuracy: 0.6663
Epoch 267/500
12116/12116 - 0s - loss: 0.5945 - accuracy: 0.6678
Epoch 268/500
12116/12116 - 0s - loss: 0.5938 - accuracy: 0.6631
Epoch 269/500
12116/12116 - 0s - loss: 0.5938 - accuracy: 0.6652
Epoch 270/500
12116/12116 - 0s - loss: 

Epoch 382/500
12116/12116 - 0s - loss: 0.5938 - accuracy: 0.6664
Epoch 383/500
12116/12116 - 0s - loss: 0.5935 - accuracy: 0.6657
Epoch 384/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6644
Epoch 385/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6642
Epoch 386/500
12116/12116 - 0s - loss: 0.5945 - accuracy: 0.6638
Epoch 387/500
12116/12116 - 0s - loss: 0.5937 - accuracy: 0.6643
Epoch 388/500
12116/12116 - 0s - loss: 0.5934 - accuracy: 0.6624
Epoch 389/500
12116/12116 - 0s - loss: 0.5935 - accuracy: 0.6656
Epoch 390/500
12116/12116 - 0s - loss: 0.5941 - accuracy: 0.6666
Epoch 391/500
12116/12116 - 0s - loss: 0.5936 - accuracy: 0.6654
Epoch 392/500
12116/12116 - 0s - loss: 0.5939 - accuracy: 0.6647
Epoch 393/500
12116/12116 - 0s - loss: 0.5936 - accuracy: 0.6661
Epoch 394/500
12116/12116 - 0s - loss: 0.5936 - accuracy: 0.6641
Epoch 395/500
12116/12116 - 0s - loss: 0.5935 - accuracy: 0.6657
Epoch 396/500
12116/12116 - 0s - loss: 0.5939 - accuracy: 0.6677
Epoch 397/500
12116/12116

<tensorflow.python.keras.callbacks.History at 0x28a2b74fa90>

In [29]:
# print(grid.best_params_)
# print(grid.best_score_)
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

4039/4039 - 0s - loss: 0.5962 - accuracy: 0.6635
Loss: 0.5962027395589129, Accuracy: 0.6635305881500244


# Save the Model

In [None]:
# import joblib
# filename = 'LogisticRegression.sav'
# joblib.dump(grid, filename)