In [None]:
import os
from PIL import Image

import numpy as np

import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as datasets
from torchvision import transforms

from tqdm import tqdm

In [None]:
# path to Git Repo from Google CoLab file
path = './'

root_path = path if os.path.isdir(path) else ''

data_dir = os.path.join(root_path, 'training')

In [None]:
# load dataset
initial_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = datasets.ImageFolder(data_dir, transform=initial_transforms)

def get_mean_std(loader):
    # Compute the mean and standard deviation of all pixels in the dataset
    num_pixels = 0
    mean = 0.0
    std = 0.0
    images: torch.Tensor
    for images, _ in loader:
        batch_size, num_channels, height, width = images.shape
        num_pixels += batch_size * height * width
        mean += images.mean(axis=(0, 2, 3)).sum()
        std += images.std(axis=(0, 2, 3)).sum()

    mean /= num_pixels
    std /= num_pixels

    return mean, std

batch_size = 32
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

mean, std = get_mean_std(loader)

data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

dataset = datasets.ImageFolder(data_dir, transform=data_transforms)

In [None]:
n = len(dataset)
X_train = np.zeros((n, 3, 224, 224))
y = np.zeros(n)

for i, (inputs, labels) in enumerate(tqdm(dataset)):
    X_train[i] = inputs.numpy()
    y[i] = labels

100%|██████████| 10601/10601 [00:17<00:00, 613.46it/s]


In [None]:
from sklearn.decomposition import PCA  # Import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

data_dir = os.path.join(root_path, 'validation')
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])
val_dataset = datasets.ImageFolder(data_dir, transform=data_transforms)

n = len(val_dataset)
X_val = np.zeros((n, 3, 224, 224))
y_val = np.zeros(n)

for i, (inputs, labels) in enumerate(tqdm(val_dataset)):
    X_val[i] = inputs.numpy()
    y_val[i] = labels

n, d1, d2, d3 = X_val.shape
X_val = X_val.reshape((n, d1 * d2 * d3))
n, d1, d2, d3 = X_train.shape
X_train = X_train.reshape((n, d1 * d2 * d3))

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# pca = PCA(0.90)  # Retain 95% of variance

# X_train = pca.fit_transform(X_train)
# X_val = pca.transform(X_val)

lda = LinearDiscriminantAnalysis()
X_train = lda.fit_transform(X_train, y)
X_val = lda.transform(X_val)


100%|██████████| 150/150 [00:00<00:00, 567.18it/s]


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

best_accuracy_validation = 0
best_n_neighbors_validation = 1

best_accuracy_training = 0
best_n_neighbors_training = 1
neighbors_range = range(1,100)

for n_neighbors in neighbors_range:
    model = KNeighborsClassifier(n_neighbors=n_neighbors)
    model.fit(X_train, y)

    yp = model.predict(X_train)
    training_accuracy = accuracy_score(y, yp)

    yp = model.predict(X_val)
    validation_accuracy = accuracy_score(y_val, yp)

    if training_accuracy > best_accuracy_training:
        best_accuracy_training = training_accuracy
        best_n_neighbors_training = n_neighbors

    if validation_accuracy > best_accuracy_validation:
        best_accuracy_validation = validation_accuracy
        best_n_neighbors_validation = n_neighbors

    print("Accuracy:",training_accuracy, validation_accuracy)

print("Best Accuracy:",best_accuracy_training, best_n_neighbors_training)
print("Best Accuracy:",best_accuracy_validation, best_n_neighbors_validation)



Accuracy: 1.0 0.7466666666666667
Accuracy: 0.9999056692764834 0.7333333333333333
Accuracy: 0.9998113385529667 0.7466666666666667
Accuracy: 0.9998113385529667 0.7333333333333333
Accuracy: 0.99971700782945 0.7466666666666667
Accuracy: 0.99971700782945 0.74
Accuracy: 0.99971700782945 0.74
Accuracy: 0.9996226771059334 0.74
Accuracy: 0.9995283463824167 0.74
Accuracy: 0.9994340156589001 0.76
Accuracy: 0.9993396849353835 0.76
Accuracy: 0.9988680313178002 0.76
Accuracy: 0.9990566927648336 0.76
Accuracy: 0.9987737005942836 0.76
Accuracy: 0.9987737005942836 0.76
Accuracy: 0.9983020469767003 0.7533333333333333
Accuracy: 0.9984907084237337 0.76
Accuracy: 0.998113385529667 0.76
Accuracy: 0.9980190548061504 0.76
Accuracy: 0.9974530704650505 0.76
Accuracy: 0.9972644090180172 0.76
Accuracy: 0.9970757475709838 0.76
Accuracy: 0.9971700782945006 0.7666666666666667
Accuracy: 0.9971700782945006 0.76
Accuracy: 0.9971700782945006 0.76
Accuracy: 0.9967927554004339 0.76
Accuracy: 0.9966984246769173 0.76
Accura