In [4]:
import pandas as pd # to load and manipulate data and for one-hot-encoding
import numpy as np # to calculate the mean and standard deviation
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn import svm
import pickle

df = pd.read_csv('connect-4.data', header = None) # pandas (pd) reads dataset and returns a data frame (df)

width = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] # column names changed in A to G 
height = range(1, 7) # row number is added
cols = []

for i in width:
    for j in height:
        cols.append(i + str(j))
cols.append('res')
df.columns = cols

# converting the characters x, b and o into numerical 1, 0 and -1
df[df == 'x'] = 1 
df[df == 'b'] = 0
df[df == 'o'] = -1

# splits the original data set into three separate data sets "win", "loss" and "draw" 
df_w = df[df['res'] == 'win']
df_l = df[df['res'] == 'loss']
df_d = df[df['res'] == 'draw']

# calculates the number of games a player has won/loss/draw and stores it in the variable
w_number = len(df_w)
l_number = len(df_l)
d_number = len(df_d)

# divide datasets into training and test datasets
# 70% of the dataset is used as the training dataset (sample)
w_train = df_w.sample(n = (int)(w_number * 0.7))
l_train = df_l.sample(n = (int)(l_number * 0.7))
d_train = df_d.sample(n = (int)(d_number * 0.7))

# 30% is used as the test dataset (drop)
w_test = df_w.drop(w_train.index)
l_test = df_l.drop(l_train.index)
d_test = df_d.drop(d_train.index)

# the resulting training and test datasets are merged into a single dataset
train = pd.concat([w_train, l_train, d_train], axis = 0)
test = pd.concat([w_test, l_test, d_test], axis = 0)

# training the multilayer perceptron classification (MLP) model and predicting the results
x_features = list(df.columns)
y_feature = 'res'
x_features.remove(y_feature)

X_train = train[x_features].values
X_test = test[x_features].values

Y_train = train[y_feature].values
Y_test = test[y_feature].values

MLP_clf = MLPClassifier(hidden_layer_sizes = (25, 10), verbose = 1)
MLP_clf.fit(X_train, Y_train)

prediction = MLP_clf.predict(X_test)
MLP_clf.score(X_test, Y_test)

# prediction = Y_predict[0][2] - Y_predict[0][0]

# evaluate classifier
accuracy = accuracy_score(prediction, Y_test)
precision = precision_score(prediction, Y_test, average = 'weighted')
recall = recall_score(prediction, Y_test, average = 'weighted')
f1 = f1_score(prediction, Y_test, average = 'weighted')
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F1 score: " + str(f1))

# serialize and save model
filename = 'MLP_clf.data'
pickle.dump(MLP_clf, open(filename, 'wb'))



Iteration 1, loss = 0.86298583
Iteration 2, loss = 0.62113627
Iteration 3, loss = 0.58507849
Iteration 4, loss = 0.56860185
Iteration 5, loss = 0.55726038
Iteration 6, loss = 0.54764096
Iteration 7, loss = 0.53872318
Iteration 8, loss = 0.53069612
Iteration 9, loss = 0.52369928
Iteration 10, loss = 0.51679232
Iteration 11, loss = 0.51030496
Iteration 12, loss = 0.50440629
Iteration 13, loss = 0.49916431
Iteration 14, loss = 0.49434510
Iteration 15, loss = 0.48987095
Iteration 16, loss = 0.48599049
Iteration 17, loss = 0.48263210
Iteration 18, loss = 0.47939368
Iteration 19, loss = 0.47644288
Iteration 20, loss = 0.47375694
Iteration 21, loss = 0.47099389
Iteration 22, loss = 0.46871423
Iteration 23, loss = 0.46695501
Iteration 24, loss = 0.46469321
Iteration 25, loss = 0.46282204
Iteration 26, loss = 0.46143608
Iteration 27, loss = 0.45896809
Iteration 28, loss = 0.45785087
Iteration 29, loss = 0.45610190
Iteration 30, loss = 0.45513750
Iteration 31, loss = 0.45363150
Iteration 32, los

Accuracy: 0.8214920071047958
Precision: 0.8702788141163886
Recall: 0.8214920071047958
F1 score: 0.8424197490860788
