- We are creating a very simple machine learning model.<br>
- Using dataset: tic-tac-toe.data.txt with user-defined columns.<br>
- We are treating this problem as a supervised learning problem.<br>

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# Loading data
data = pd.read_csv("../tic-tac-toe.data.txt", sep = ",")
data_copy = pd.read_csv("../tic-tac-toe.data.txt", sep = ",")

# Setting cols.
data.columns = ["first_row_left", "first_row_middle", "first_row_right", "center_row_left", "center_row_middle", "center_row_right", "bottom_row_left", "bottom_row_middle", "bottom_row_right", "is_win"]
data_copy.columns = ["first_row_left", "first_row_middle", "first_row_right", "center_row_left", "center_row_middle", "center_row_right", "bottom_row_left", "bottom_row_middle", "bottom_row_right", "is_win"]

In [3]:
# Viewing data
data.to_csv("data.csv")

In [4]:
# As we can see the the different move options, we perform label encoding.
mapping_for_moves = {'x':1, "o":0} # For b, we put mean of the data.
mapping_for_wins = {"positive":1, "negative":0} # Positive is win, negative is lose
data.is_win = data.is_win.map(mapping_for_wins)
data_copy.is_win = data_copy.is_win.map(mapping_for_wins)

data = data.drop(columns=["is_win"], axis=1)

In [5]:
data_copy.head()

Unnamed: 0,first_row_left,first_row_middle,first_row_right,center_row_left,center_row_middle,center_row_right,bottom_row_left,bottom_row_middle,bottom_row_right,is_win
0,x,x,x,x,o,o,o,x,o,1
1,x,x,x,x,o,o,o,o,x,1
2,x,x,x,x,o,o,o,b,b,1
3,x,x,x,x,o,o,b,o,b,1
4,x,x,x,x,o,o,b,b,o,1


In [6]:
for i in data.columns: # Applying map to all the columns except is_win.
    data[i] = data[i].map(mapping_for_moves)

In [7]:
# Extracting features and labels
features = data.values
labels = data_copy.is_win.values

In [8]:
# Filling missing values aka "b"
features = (Imputer().fit_transform(features))

In [9]:
len(features) 

957

In [10]:
len(labels)

957

In [11]:
# Changing type to int
features = features.astype(np.int)
labels = labels.astype(np.int)

In [12]:
features

array([[1, 1, 1, ..., 0, 1, 0],
       [1, 1, 1, ..., 0, 0, 1],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 1, 0, 1],
       [0, 1, 0, ..., 1, 0, 1],
       [0, 0, 1, ..., 0, 1, 1]])

In [13]:
labels

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

- Preprocessing is done.

In [14]:
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=3, shuffle=True)

In [15]:
data.corr()

Unnamed: 0,first_row_left,first_row_middle,first_row_right,center_row_left,center_row_middle,center_row_right,bottom_row_left,bottom_row_middle,bottom_row_right
first_row_left,1.0,-0.050132,-0.141443,-0.050132,-0.126583,-0.340581,-0.141443,-0.340581,-0.040388
first_row_middle,-0.050132,1.0,-0.050132,-0.259385,-0.137291,-0.255754,-0.3439,0.054459,-0.340492
first_row_right,-0.141443,-0.050132,1.0,-0.3439,-0.126583,-0.046933,-0.04345,-0.340581,-0.138409
center_row_left,-0.050132,-0.259385,-0.3439,1.0,-0.137291,0.054459,-0.050132,-0.255754,-0.340492
center_row_middle,-0.126583,-0.137291,-0.126583,-0.137291,1.0,-0.141296,-0.126583,-0.141296,-0.130312
center_row_right,-0.340581,-0.255754,-0.046933,0.054459,-0.141296,1.0,-0.340581,-0.259962,-0.050921
bottom_row_left,-0.141443,-0.3439,-0.04345,-0.050132,-0.126583,-0.340581,1.0,-0.046933,-0.138409
bottom_row_middle,-0.340581,0.054459,-0.340581,-0.255754,-0.141296,-0.259962,-0.046933,1.0,-0.050921
bottom_row_right,-0.040388,-0.340492,-0.138409,-0.340492,-0.130312,-0.050921,-0.138409,-0.050921,1.0


- Clearly it is a classification problem, we can use DecisionTree or SVC

In [16]:
# Trying different classifiers.
clf = DecisionTreeClassifier()
clf.fit(features_train, labels_train)
d_tree_score = clf.score(features_test, labels_test) # Good result!

In [17]:
clf2 = SVC() # Clearly the data is non linear.
clf2.fit(features_train, labels_train)
clf2.score(features_test, labels_test) # Not good!

0.8583333333333333

In [18]:
clf3 = KNeighborsClassifier(n_neighbors=1)
clf3.fit(features_train, labels_train)
k_score = clf3.score(features_test, labels_test)

In [19]:
d_tree_score > k_score

False

In [20]:
predictions = clf3.predict(features_test)

In [21]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(labels_test, predictions)

In [22]:
cm

array([[ 78,   3],
       [  2, 157]])

In [23]:
np.where(labels_test!=predictions)

(array([ 17,  74, 178, 212, 219]),)

In [24]:
d_tree_score


0.9791666666666666

In [25]:
k_score

0.9791666666666666

In [26]:
from sklearn.metrics import classification_report
c = classification_report(labels_test, predictions)

In [27]:
c

'             precision    recall  f1-score   support\n\n          0       0.97      0.96      0.97        81\n          1       0.98      0.99      0.98       159\n\navg / total       0.98      0.98      0.98       240\n'

In [28]:
from sklearn.ensemble import RandomForestClassifier
r = RandomForestClassifier(n_estimators=100) # With 100 decision tree
r.fit(features_train, labels_train)
r_forest = r.score(features_test, labels_test)
p = r.predict(features_test)
np.where(labels_test!=features_test) # Only one misclassified
r_forest

  


0.9833333333333333

In [29]:
cm  = confusion_matrix(labels_test, p)

In [30]:
cm

array([[ 79,   2],
       [  2, 157]])

In [31]:
from sklearn.externals import joblib

In [32]:
from sklearn.model_selection import GridSearchCV


In [33]:
param = [ {"n_estimators":[10,100 ,1000]}]
r = RandomForestClassifier()
clf = GridSearchCV(r, param)
clf.fit(features_train, labels_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'n_estimators': [10, 100, 1000]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [34]:
clf.best_score_

0.9748953974895398

In [35]:
data_copy.head()

Unnamed: 0,first_row_left,first_row_middle,first_row_right,center_row_left,center_row_middle,center_row_right,bottom_row_left,bottom_row_middle,bottom_row_right,is_win
0,x,x,x,x,o,o,o,x,o,1
1,x,x,x,x,o,o,o,o,x,1
2,x,x,x,x,o,o,o,b,b,1
3,x,x,x,x,o,o,b,o,b,1
4,x,x,x,x,o,o,b,b,o,1


In [36]:
import coremltools




In [43]:
coremltools.converters.sklearn.convert(clf, ,output_feature_names="is_win",input_features=["first_row_left","first_row_middle", "first_row_right", "center_row_left", "center_row_middle", "center_row_right", "bottom_row_left", "bottom_row_middle", "bottom_row_right"])

SyntaxError: invalid syntax (<ipython-input-43-b320d5dc12fe>, line 1)

In [46]:
import keras
from keras.layers import Dense
from keras import Sequential
model = Sequential()
model.add(Dense(100, input_dim=features_train.shape[1], activation="tanh"))
model.add(Dense(100, activation="relu"))
model.add(Dense(1, activation="softmax"))

model.compile(optimizer="adam", loss=keras.losses.binary_crossentropy, metrics=["accuracy"])
model.fit(features_train, labels_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1260ffe80>