In [1]:
#Import all libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the chess game dataset

data = pd.read_csv('games.csv')
 
# Selecting data to use for classification
X = data[['black_rating', 'white_rating']]
y = data['winner']

# Define the test sizes
test_sizes = [0.2, 0.3, 0.4]

# Function to print separator
def print_separator():
    print("\n" + "=" * 80 + "\n")

# Iterate through different test sizes
for test_size in test_sizes:
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # K-Nearest Neighbors
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)
    y_pred_knn = knn.predict(X_test)

    # Naive Bayes (Gaussian)
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    y_pred_gnb = gnb.predict(X_test)

    # Find mislabeled data points for K-Nearest Neighbors
    mislabel_knn = X_test[y_test != y_pred_knn]

    # Find mislabeled data points for Naive Bayes (Gaussian)
    mislabel_gnb = X_test[y_test != y_pred_gnb]

    # Calculate accuracy
    accuracy_knn = accuracy_score(y_test, y_pred_knn)
    accuracy_gnb = accuracy_score(y_test, y_pred_gnb)

    # Print the results for each test size
    print(f"\nTest Size: {test_size}")
    print_separator()
    
    #Prints the results for Naive Bayes K-Nearest Neighbours
    
    print("K-Nearest Neighbors:")
    print("Accuracy:", accuracy_knn)
    print("Total count of mislabeled data points for K-Nearest Neighbors:", len(mislabel_knn))
    print("Classification Report:")
    print(classification_report(y_test, y_pred_knn, zero_division=1))

    #Prints the results for Naive Bayes

    print("Naive Bayes (Gaussian):")
    print("Accuracy:", accuracy_gnb)
    print("Total count of mislabeled data points for Naive Bayes (Gaussian):", len(mislabel_gnb))
    print("Classification Report:")
    print(classification_report(y_test, y_pred_gnb, zero_division=1))

    print_separator()



Test Size: 0.2


K-Nearest Neighbors:
Accuracy: 0.5785144566301097
Total count of mislabeled data points for K-Nearest Neighbors: 1691
Classification Report:
              precision    recall  f1-score   support

       black       0.56      0.59      0.57      1816
        draw       0.17      0.05      0.08       192
       white       0.61      0.62      0.61      2004

    accuracy                           0.58      4012
   macro avg       0.44      0.42      0.42      4012
weighted avg       0.57      0.58      0.57      4012

Naive Bayes (Gaussian):
Accuracy: 0.6094217347956131
Total count of mislabeled data points for Naive Bayes (Gaussian): 1567
Classification Report:
              precision    recall  f1-score   support

       black       0.66      0.42      0.51      1816
        draw       1.00      0.00      0.00       192
       white       0.59      0.84      0.69      2004

    accuracy                           0.61      4012
   macro avg       0.75      0.42      0.