In [1]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.decomposition import PCA

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import numpy as np
warnings.filterwarnings('ignore')

In [None]:
from dataset import Dataset

In [None]:
path = r'E:\knns\data\cat_dog'
width = 64
height = 64

In [None]:
loader = Dataset(path=path, width=width, height=height)
X = loader.load_images(path=path)
y = loader.load_labels(12500, 12500)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
X_train = X_train.reshape(X_train.shape[0], -1)
# X_train = X_train.astype('float32')
X_test = X_test.reshape(X_test.shape[0], -1)
# X_test = X_test.astype('float32')

In [None]:
def _confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)

    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

    return cm

In [None]:
# pca = PCA(n_components=4096)
# X_train = pca.fit_transform(X_train)
# X_test = pca.transform(X_test)

In [None]:
print(np.mean(X_train))
print(np.std(X_train))
print(np.var(X_train))

In [None]:
def perform_grid_search_cv(model, param_grid, X_train, y_train, cv=5, verbose=2):
    grid_search = GridSearchCV(model, param_grid, cv=cv, verbose=verbose)
    grid_search.fit(X_train, y_train)

    print("Best parameters: ", grid_search.best_params_)
    print("Best score: ", grid_search.best_score_)

    return grid_search.best_estimator_

In [None]:
# KNN
# Best parameters:  {'metric': 'manhattan', 'n_neighbors': 9, 'weights': 'distance'}
# Best score:  0.62255
# SVM
# Best parameters:  {'C': 0.1, 'loss': 'hinge'}
# Best score:  0.53835


In [None]:
params_knn = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}
params_svm = {
    'C': [0.1, 1, 10],
    'loss': ['hinge', 'squared_hinge']
}
params_lr = {
    'C': [0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}
params_dt = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 2, 4, 6],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2, 4]
}
params_rf = {
    'n_estimators': [10, 50, 100],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 2, 4, 6],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
# perform_grid_search_cv(KNeighborsClassifier(), param_grid=params_knn, X_train=X_train, y_train=y_train)

In [None]:
knn = KNeighborsClassifier(n_neighbors=9, metric='euclidean', weights='distance')
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

In [None]:
accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn, average='binary')  
recall_knn = recall_score(y_test, y_pred_knn, average='binary') 
print(f'KNN Accuracy: {accuracy_knn * 100:.2f}%')
print(f'KNN Precision: {precision_knn * 100:.2f}%')
print(f'KNN Recall: {recall_knn * 100:.2f}%')
cm_knn = _confusion_matrix(y_test, y_pred_knn)

In [None]:
# perform_grid_search_cv(LinearSVC(), param_grid=params_svm, X_train=X_train, y_train=y_train)

In [None]:
svm = LinearSVC(loss='hinge', C=10)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [None]:
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='binary')  # Use 'binary' for binary classification
recall_svm = recall_score(y_test, y_pred_svm, average='binary') 
print(f'SVM Accuracy: {accuracy_svm * 100:.2f}%')
print(f'SVM Precision: {precision_svm * 100:.2f}%')
print(f'SVM Recall: {recall_svm * 100:.2f}%')
cm_svm = _confusion_matrix(y_test, y_pred_svm)

In [None]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)

In [None]:
accuracy_gnb = accuracy_score(y_test, y_pred_gnb)
precision_gnb = precision_score(y_test, y_pred_gnb, average='binary')
recall_gnb = recall_score(y_test, y_pred_gnb, average='binary') 
print(f'Gaussian Naive Bayes Accuracy: {accuracy_gnb * 100:.2f}%')
print(f'Gaussian Naive Bayes Precision: {precision_gnb * 100:.2f}%')
print(f'Gaussian Naive Bayes Recall: {recall_gnb * 100:.2f}%')
cm_svm = _confusion_matrix(y_test, y_pred_gnb)

In [None]:
# perform_grid_search_cv(LogisticRegression(), param_grid=params_lr, X_train=X_train, y_train=y_train)

In [None]:
reg = LogisticRegression()
reg.fit(X_train, y_train)
y_pred_reg = reg.predict(X_test)

In [None]:
accuracy_reg = accuracy_score(y_test, y_pred_reg)
precision_reg = precision_score(y_test, y_pred_reg, average='binary')  # Use 'binary' for binary classification
recall_reg = recall_score(y_test, y_pred_reg, average='binary') 
print(f'Logistic Regression Accuracy: {accuracy_reg * 100:.2f}%')
print(f'Logistic Regression Precision: {precision_reg * 100:.2f}%')
print(f'Logistic Regression Recall: {recall_reg * 100:.2f}%')
cm_reg = _confusion_matrix(y_test, y_pred_reg)

In [None]:
perform_grid_search_cv(DecisionTreeClassifier(), param_grid=params_dt, X_train=X_train, y_train=y_train)

In [None]:
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
y_pred_dtc = dtc.predict(X_test)

In [None]:
accuracy_dtc = accuracy_score(y_test, y_pred_dtc)
precision_dtc = precision_score(y_test, y_pred_dtc, average='binary')  # Use 'binary' for binary classification
recall_dtc = recall_score(y_test, y_pred_dtc, average='binary') 
print(f'Decision Tree Accuracy: {accuracy_dtc * 100:.2f}%')
print(f'Decision Tree Precision: {precision_dtc * 100:.2f}%')
print(f'Decision Tree Recall: {recall_dtc * 100:.2f}%')
cm_dtc = _confusion_matrix(y_test, y_pred_dtc)

In [None]:
perform_grid_search_cv(RandomForestClassifier(), param_grid=params_rf, X_train=X_train, y_train=y_train)

In [None]:
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_pred_rfc = rfc.predict(X_test)

In [None]:
accuracy_rfc = accuracy_score(y_test, y_pred_rfc)
precision_rfc = precision_score(y_test, y_pred_rfc, average='binary')  # Use 'binary' for binary classification
recall_rfc = recall_score(y_test, y_pred_rfc, average='binary') 
print(f'Random Forests Accuracy: {accuracy_rfc * 100:.2f}%')
print(f'Frandom Forests Precision: {precision_rfc * 100:.2f}%')
print(f'Random Forests Recall: {recall_rfc * 100:.2f}%')
cm_rfc = _confusion_matrix(y_test, y_pred_rfc)