# Final Boards Classification

In [137]:
import numpy as np

file_path = 'datasets-part1/tictac_final.txt'

# Assuming there are no missing values
dataset = np.loadtxt(file_path)

# If you need to separate the input features from the output feature:
X_final = dataset[:, :-1]  # All rows, all columns except the last one
y_final = dataset[:, -1]   # All rows, only the last column

# Extra Credit 2!
# Calculate the number of samples you want (10% of the full dataset)
num_samples = int(len(dataset) * 0.1)

# Generate random indices without replacement to sample 10% of the dataset
random_indices = np.random.choice(len(dataset), size=num_samples, replace=False)

# Use the generated indices to create the smaller dataset
small_dataset = dataset[random_indices, :]

# If you need to separate the input features from the output feature for the smaller dataset:
X_small = small_dataset[:, :-1]  # All rows, all columns except the last one for the small dataset
y_small = small_dataset[:, -1]   # All rows, only the last column for the small dataset


## Linear SVC

In [138]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state=42)

clf = LinearSVC(max_iter=10000)  # Increase max_iter if convergence warnings occur

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

#### Cross-Validation

In [139]:
from sklearn.model_selection import cross_val_score, ShuffleSplit

# Define the cross-validation strategy and ensure shuffling
cv_strategy = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)

accuracy_scores_svc = cross_val_score(clf, X_final, y_final, cv=cv_strategy)

# Report cross-validation results
print("LinearSVC Cross-validation accuracy scores:", accuracy_scores_svc)
print("Mean accuracy:", accuracy_scores_svc.mean())
print("Standard deviation:", accuracy_scores_svc.std())

LinearSVC Cross-validation accuracy scores: [0.96875    0.984375   0.97395833 0.97395833 0.97916667 0.984375
 0.99479167 0.99479167 0.97916667 0.97395833]
Mean accuracy: 0.9807291666666668
Standard deviation: 0.008414320011147641


#### Confusion Matrix

In [140]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, predictions, normalize='true')

array([[0.91044776, 0.08955224],
       [0.        , 1.        ]])

#### Extra Credit 2: 1/10 of training set

In [141]:
X_train_small, X_test_small, y_train_small, y_test_small = train_test_split(X_small, y_small, test_size=0.2, random_state=42)

clf = LinearSVC(max_iter=10000)  # Increase max_iter if convergence warnings occur

# Train the model
clf.fit(X_train_small, y_train_small)

# Make predictions on the test set
predictions = clf.predict(X_test_small)

accuracy_scores_svc = cross_val_score(clf, X_small, y_small, cv=cv_strategy)

# Report cross-validation results
print("LinearSVC Cross-validation accuracy scores w/ 1/10 dataset:", accuracy_scores_svc)
print("Mean accuracy w/ 1/10 dataset:", accuracy_scores_svc.mean())
print("Standard deviation w/ 1/10 dataset:", accuracy_scores_svc.std())

LinearSVC Cross-validation accuracy scores w/ 1/10 dataset: [0.94736842 1.         1.         0.94736842 1.         0.94736842
 1.         1.         1.         0.94736842]
Mean accuracy w/ 1/10 dataset: 0.9789473684210526
Standard deviation w/ 1/10 dataset: 0.025784102555612427


To my surprise, the mean accuracy did not really change. There were also instances within the cross-validation where it was 100% accurate, but when the entire dataset was used to train the model, it was never 100% accurate. I wonder if this is an instance of overfitting, where the model is fitted too closely to the limited data it has access to, since when provided the rest of the data, the accuracy is not quite as high.

## K-Nearest Neighbors

In [142]:
from sklearn.neighbors import KNeighborsClassifier
k = 3  # Number of neighbors
knn_clf = KNeighborsClassifier(n_neighbors=k)

knn_clf.fit(X_train, y_train)
predictions_knn = knn_clf.predict(X_test)

In [143]:
accuracy_scores_knn = cross_val_score(knn_clf, X_final, y_final, cv=cv_strategy)

print("KNN Cross-validation accuracy scores:", accuracy_scores_knn)
print("Mean accuracy:", accuracy_scores_knn.mean())
print("Standard deviation:", accuracy_scores_knn.std())

KNN Cross-validation accuracy scores: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Mean accuracy: 1.0
Standard deviation: 0.0


In [144]:
confusion_matrix(y_test, predictions_knn, normalize='true')

array([[1., 0.],
       [0., 1.]])

## Multilayer Perceptron

In [145]:
from sklearn.neural_network import MLPClassifier
mlp_clf = MLPClassifier(hidden_layer_sizes=(128, 64, 32), max_iter=1000, alpha=0.001, random_state=42)

mlp_clf.fit(X_train, y_train)
predictions_mlp = mlp_clf.predict(X_test)

In [146]:
accuracy_scores_mlp = cross_val_score(mlp_clf, X_final, y_final, cv=cv_strategy)

print("MLP Cross-validation accuracy scores:", accuracy_scores_mlp)
print("Mean accuracy:", accuracy_scores_mlp.mean())
print("Standard deviation:", accuracy_scores_mlp.std())

MLP Cross-validation accuracy scores: [0.98958333 0.984375   0.984375   0.98958333 0.98958333 0.99479167
 1.         1.         1.         0.98958333]
Mean accuracy: 0.9921875000000002
Standard deviation: 0.005823093691405694


In [147]:
mlp_cm = confusion_matrix(y_test, predictions_mlp, normalize='true')
mlp_cm

array([[0.97014925, 0.02985075],
       [0.        , 1.        ]])

In [148]:
print("MLP Confusion Matrix Accuracy: ", (mlp_cm[0][0] + mlp_cm[1][1]) / 2)

MLP Confusion Matrix Accuracy:  0.9850746268656716


# Intermediate Boards Optimal Play (Single Label)

In [149]:
file_path = 'datasets-part1/tictac_single.txt'

dataset = np.loadtxt(file_path)

# If you need to separate the input features from the output feature:
X_single = dataset[:, :-1]  # All rows, all columns except the last one
y_single = dataset[:, -1]   # All rows, only the last column

## Linear SVC

In [150]:
X_train, X_test, y_train, y_test = train_test_split(X_single, y_single, test_size=0.2, random_state=42)

clf = LinearSVC(max_iter=10000)  # Increase max_iter if convergence warnings occur

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

#### Cross-Validation

In [151]:
cv_strategy = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)

accuracy_scores_svc = cross_val_score(clf, X_single, y_single, cv=cv_strategy, scoring="accuracy")

# Report cross-validation results
print("LinearSVC Cross-validation accuracy scores:", accuracy_scores_svc)
print("Mean accuracy:", accuracy_scores_svc.mean())
print("Standard deviation:", accuracy_scores_svc.std())

LinearSVC Cross-validation accuracy scores: [0.2402746  0.25629291 0.26010679 0.24637681 0.2402746  0.23646072
 0.24713959 0.23493516 0.23951182 0.23493516]
Mean accuracy: 0.24363081617086194
Standard deviation: 0.008329295923550695


#### Confusion Matrix

In [152]:
confusion_matrix(y_test, predictions, normalize='true')

array([[0.9380805 , 0.        , 0.00309598, 0.        , 0.05882353,
        0.        , 0.        , 0.        , 0.        ],
       [0.94047619, 0.00595238, 0.0297619 , 0.        , 0.02380952,
        0.        , 0.        , 0.        , 0.        ],
       [0.97860963, 0.        , 0.00534759, 0.        , 0.01604278,
        0.        , 0.        , 0.        , 0.        ],
       [0.92307692, 0.        , 0.04273504, 0.        , 0.03418803,
        0.        , 0.        , 0.        , 0.        ],
       [0.92574257, 0.0049505 , 0.01980198, 0.        , 0.04950495,
        0.        , 0.        , 0.        , 0.        ],
       [0.93421053, 0.        , 0.03947368, 0.        , 0.02631579,
        0.        , 0.        , 0.        , 0.        ],
       [0.90909091, 0.01010101, 0.02020202, 0.        , 0.06060606,
        0.        , 0.        , 0.        , 0.        ],
       [0.84      , 0.        , 0.02      , 0.        , 0.14      ,
        0.        , 0.        , 0.        , 0.        ],


## K-Nearest Neighbors

In [153]:
k = 3  # Number of neighbors
knn_clf = KNeighborsClassifier(n_neighbors=k)

knn_clf.fit(X_train, y_train)
predictions_knn = knn_clf.predict(X_test)

#### Cross Validation

In [154]:
accuracy_scores_knn = cross_val_score(knn_clf, X_final, y_final, cv=cv_strategy, scoring="accuracy")

print("KNN Cross-validation accuracy scores:", accuracy_scores_knn)
print("Mean accuracy:", accuracy_scores_knn.mean())
print("Standard deviation:", accuracy_scores_knn.std())

KNN Cross-validation accuracy scores: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Mean accuracy: 1.0
Standard deviation: 0.0


#### Confusion Matrix

In [155]:
confusion_matrix(y_test, predictions_knn, normalize='true')

array([[0.86687307, 0.00309598, 0.03405573, 0.00928793, 0.05882353,
        0.        , 0.01547988, 0.0123839 , 0.        ],
       [0.07738095, 0.70238095, 0.04166667, 0.04166667, 0.06547619,
        0.01190476, 0.0297619 , 0.01190476, 0.01785714],
       [0.14438503, 0.04278075, 0.75935829, 0.01069519, 0.02139037,
        0.00534759, 0.01069519, 0.00534759, 0.        ],
       [0.11111111, 0.08547009, 0.06837607, 0.66666667, 0.03418803,
        0.00854701, 0.01709402, 0.        , 0.00854701],
       [0.0990099 , 0.05445545, 0.01980198, 0.01980198, 0.78217822,
        0.        , 0.00990099, 0.0049505 , 0.00990099],
       [0.10526316, 0.11842105, 0.02631579, 0.        , 0.01315789,
        0.69736842, 0.        , 0.        , 0.03947368],
       [0.12121212, 0.04040404, 0.07070707, 0.06060606, 0.04040404,
        0.01010101, 0.65656566, 0.        , 0.        ],
       [0.1       , 0.1       , 0.        , 0.06      , 0.02      ,
        0.02      , 0.        , 0.66      , 0.04      ],


## Multilayer Perceptron

In [156]:
mlp_clf = MLPClassifier(hidden_layer_sizes=(100,), max_iter=10000, alpha=0.001, solver='adam', random_state=42)

mlp_clf.fit(X_train, y_train)
predictions_mlp = mlp_clf.predict(X_test)

In [157]:
accuracy_scores_mlp = cross_val_score(mlp_clf, X_final, y_final, cv=cv_strategy, scoring="accuracy")

print("MLP Cross-validation accuracy scores:", accuracy_scores_mlp)
print("Mean accuracy:", accuracy_scores_mlp.mean())
print("Standard deviation:", accuracy_scores_mlp.std())

MLP Cross-validation accuracy scores: [0.97395833 0.984375   0.97395833 0.97916667 0.97916667 0.98958333
 0.99479167 1.         0.97916667 0.97916667]
Mean accuracy: 0.9833333333333332
Standard deviation: 0.00833333333333333


In [158]:
mlp_cm = confusion_matrix(y_test, predictions_mlp, normalize='true')
mlp_cm

array([[0.95975232, 0.        , 0.00619195, 0.00309598, 0.02167183,
        0.        , 0.00619195, 0.00309598, 0.        ],
       [0.01190476, 0.91666667, 0.01785714, 0.01190476, 0.02380952,
        0.        , 0.01190476, 0.        , 0.00595238],
       [0.04278075, 0.01069519, 0.89839572, 0.01604278, 0.01604278,
        0.        , 0.00534759, 0.00534759, 0.00534759],
       [0.02564103, 0.00854701, 0.        , 0.93162393, 0.00854701,
        0.        , 0.00854701, 0.00854701, 0.00854701],
       [0.04455446, 0.0049505 , 0.0049505 , 0.        , 0.93069307,
        0.00990099, 0.0049505 , 0.        , 0.        ],
       [0.        , 0.03947368, 0.01315789, 0.02631579, 0.        ,
        0.92105263, 0.        , 0.        , 0.        ],
       [0.01010101, 0.01010101, 0.01010101, 0.02020202, 0.        ,
        0.        , 0.93939394, 0.        , 0.01010101],
       [0.06      , 0.04      , 0.        , 0.06      , 0.02      ,
        0.        , 0.        , 0.82      , 0.        ],


# Intermediate boards optimal play (multi label)

In [159]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error

file_path = 'datasets-part1/tictac_multi.txt'
dataset = np.loadtxt(file_path)

X = dataset[:, :9]  # Board configurations
Y = dataset[:, 9:]  # Optimal moves, multi-label

In [160]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

## KNN Regressor

In [161]:
from sklearn.neighbors import KNeighborsRegressor

knn_regressor = MultiOutputRegressor(KNeighborsRegressor(n_neighbors=3, weights='distance'))
knn_regressor.fit(X_train, Y_train)

In [162]:
Y_pred_knn = knn_regressor.predict(X_test)

rmse = mean_squared_error(Y_test, Y_pred_knn) ** 0.5
print(f"KNN Regressor Root Mean Squared Error: {rmse}")

KNN Regressor Root Mean Squared Error: 0.18973017599190165


## Linear Regression (implemented w/ Normal equations)

In [163]:
# from sklearn.linear_model import LinearRegression
# linear_regressor = MultiOutputRegressor(LinearRegression())
# linear_regressor.fit(X_train, Y_train)

X_lin = dataset[:, :9]  # Board configurations
Y_lin = dataset[:, 9:]  # Optimal moves, multi-label

# Add Bias Term
X_b = np.c_[np.ones((X_lin.shape[0], 1)), X_lin]  # Add 1s column

X_train_lin, X_test_lin, Y_train_lin, Y_test_lin = train_test_split(X_b, Y_lin, test_size=0.2, random_state=42)

# Applying Normal Equation for each output label
theta_best = np.linalg.inv(X_train_lin.T.dot(X_train_lin)).dot(X_train_lin.T).dot(Y_train_lin)

Y_pred_lin = X_test_lin.dot(theta_best)

# Calculating RMSE for each output label and then taking the mean
rmse = np.sqrt(mean_squared_error(Y_test_lin, Y_pred_lin, multioutput='raw_values'))
mean_rmse = np.mean(rmse)
print(f"Mean RMSE using Normal Equations: {mean_rmse}")

Mean RMSE using Normal Equations: 0.4096186209204892


## Multilayer Perceptron

In [164]:
from sklearn.neural_network import MLPRegressor

mlp_regressor = MultiOutputRegressor(MLPRegressor(hidden_layer_sizes=(128, 64, 32), alpha=0.01))
mlp_regressor.fit(X_train, Y_train)

In [165]:
Y_pred_mlp = mlp_regressor.predict(X_test)

rmse = mean_squared_error(Y_test, Y_pred_mlp) ** 0.5
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 0.14349250306462075


In [167]:
# Save model for tictactoe .py file
from joblib import dump

# Assuming mlp_regressor is your trained model
dump(mlp_regressor, 'mlp_regressor.joblib')

['mlp_regressor.joblib']