## Tic Tac Toe Winner Classification 

Given *data about the end states of tic-tac-toe games*, let's try to predict the **winnner** of a given game.

We will use various classification models to make our predictions.

Data source: https://www.kaggle.com/datasets/rsrishav/tictactoe-endgame-data-set

### Importing Libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

In [2]:
data = pd.read_csv('archive/tic-tac-toe.data.csv')
data

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive
...,...,...,...,...,...,...,...,...,...,...
953,o,x,x,x,o,o,o,x,x,negative
954,o,x,o,x,x,o,x,o,x,negative
955,o,x,o,x,o,x,x,o,x,negative
956,o,x,o,o,x,x,x,o,x,negative


In [4]:
data.columns = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10']

In [5]:
data

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive
...,...,...,...,...,...,...,...,...,...,...
953,o,x,x,x,o,o,o,x,x,negative
954,o,x,o,x,x,o,x,o,x,negative
955,o,x,o,x,o,x,x,o,x,negative
956,o,x,o,o,x,x,x,o,x,negative


In [6]:
data['V10'].unique()

array(['positive', 'negative'], dtype=object)

In [7]:
df = data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 958 entries, 0 to 957
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   V1      958 non-null    object
 1   V2      958 non-null    object
 2   V3      958 non-null    object
 3   V4      958 non-null    object
 4   V5      958 non-null    object
 5   V6      958 non-null    object
 6   V7      958 non-null    object
 7   V8      958 non-null    object
 8   V9      958 non-null    object
 9   V10     958 non-null    object
dtypes: object(10)
memory usage: 75.0+ KB


### Preprocessing

In [10]:
df = data.copy()

In [16]:
def onehot_encode(df, columns):
    df = df.copy()
    for column in columns:
        dummies = pd.get_dummies(df[column], prefix=column, dtype=int)
        df = pd.concat([df, dummies], axis=1)
        df = df.drop(column, axis=1)
    return df

In [18]:
# One-hot encode board space columns
df = onehot_encode(df, ['V' + str(i) for i in range(1, 10)])
df

Unnamed: 0,V10,V1_b,V1_o,V1_x,V2_b,V2_o,V2_x,V3_b,V3_o,V3_x,...,V6_x,V7_b,V7_o,V7_x,V8_b,V8_o,V8_x,V9_b,V9_o,V9_x
0,positive,0,0,1,0,0,1,0,0,1,...,0,0,0,1,0,1,0,0,1,0
1,positive,0,0,1,0,0,1,0,0,1,...,0,0,1,0,0,0,1,0,1,0
2,positive,0,0,1,0,0,1,0,0,1,...,0,0,1,0,0,1,0,0,0,1
3,positive,0,0,1,0,0,1,0,0,1,...,0,0,1,0,1,0,0,1,0,0
4,positive,0,0,1,0,0,1,0,0,1,...,0,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953,negative,0,1,0,0,0,1,0,0,1,...,0,0,1,0,0,0,1,0,0,1
954,negative,0,1,0,0,0,1,0,1,0,...,0,0,0,1,0,1,0,0,0,1
955,negative,0,1,0,0,0,1,0,1,0,...,1,0,0,1,0,1,0,0,0,1
956,negative,0,1,0,0,0,1,0,1,0,...,1,0,0,1,0,1,0,0,0,1


In [19]:
# Encode label values as numbers
df['V10'] = df['V10'].replace({'positive': 1, "negative": 0})

  df['V10'] = df['V10'].replace({'positive': 1, "negative": 0})


In [20]:
df

Unnamed: 0,V10,V1_b,V1_o,V1_x,V2_b,V2_o,V2_x,V3_b,V3_o,V3_x,...,V6_x,V7_b,V7_o,V7_x,V8_b,V8_o,V8_x,V9_b,V9_o,V9_x
0,1,0,0,1,0,0,1,0,0,1,...,0,0,0,1,0,1,0,0,1,0
1,1,0,0,1,0,0,1,0,0,1,...,0,0,1,0,0,0,1,0,1,0
2,1,0,0,1,0,0,1,0,0,1,...,0,0,1,0,0,1,0,0,0,1
3,1,0,0,1,0,0,1,0,0,1,...,0,0,1,0,1,0,0,1,0,0
4,1,0,0,1,0,0,1,0,0,1,...,0,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953,0,0,1,0,0,0,1,0,0,1,...,0,0,1,0,0,0,1,0,0,1
954,0,0,1,0,0,0,1,0,1,0,...,0,0,0,1,0,1,0,0,0,1
955,0,0,1,0,0,0,1,0,1,0,...,1,0,0,1,0,1,0,0,0,1
956,0,0,1,0,0,0,1,0,1,0,...,1,0,0,1,0,1,0,0,0,1


In [21]:
# Split df into X and y
y = df['V10'].copy()
X = df.drop('V10', axis=1).copy()

In [22]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=42)

In [23]:
X_train.shape, X_test.shape

((670, 27), (288, 27))

### Training

In [24]:
models = {
    "   K-Nearest Neighbors": KNeighborsClassifier(),
    "   Logistic Regression": LogisticRegression(),
    "Support Vector Machine": SVC(),
    "         Decision Tree": DecisionTreeClassifier(),
    "        Neural Network": MLPClassifier()
}

In [25]:
for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

   K-Nearest Neighbors trained.
   Logistic Regression trained.
Support Vector Machine trained.
         Decision Tree trained.
        Neural Network trained.




### Results

In [27]:
for name, model in models.items():
    print(name + " Accuracy: {:.3f}%".format(model.score(X_test, y_test)*100))

   K-Nearest Neighbors Accuracy: 92.361%
   Logistic Regression Accuracy: 97.569%
Support Vector Machine Accuracy: 98.958%
         Decision Tree Accuracy: 92.708%
        Neural Network Accuracy: 98.611%


#### Examining the misclassified examples

In [28]:
best_model = models["Support Vector Machine"]

In [46]:
hard_examples = y_test.loc[(best_model.predict(X_test) != y_test)]
hard_examples

947    0
956    0
952    0
Name: V10, dtype: int64

In [49]:
hard_examples = data.loc[hard_examples.index].drop('V10', axis=1)
hard_examples

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9
947,x,o,x,x,o,o,o,x,x
956,o,x,o,o,x,x,x,o,x
952,o,x,x,x,o,o,x,o,x


In [50]:
print("Difficult Examples: ")
for i in hard_examples.index:
    print("\nExample " + str(i))
    print(hard_examples.loc[i, 'V1'] + " " + hard_examples.loc[i, 'V2'] + " " + hard_examples.loc[i, 'V3'])
    print(hard_examples.loc[i, 'V4'] + " " + hard_examples.loc[i, 'V5'] + " " + hard_examples.loc[i, 'V6'])
    print(hard_examples.loc[i, 'V7'] + " " + hard_examples.loc[i, 'V8'] + " " + hard_examples.loc[i, 'V9'])

Difficult Examples: 

Example 947
x o x
x o o
o x x

Example 956
o x o
o x x
x o x

Example 952
o x x
x o o
x o x
