FIRST STEP: dataset loading and preprocessing

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Helper function to filter dataset and load required subsets
class FilteredDataset(Dataset):
    def __init__(self, dataset, num_per_class):
        self.dataset = dataset
        self.num_per_class = num_per_class
        self.class_counts = [0] * 10
        self.filtered_data = []
        self._filter_dataset()

    def _filter_dataset(self):
        for img, label in self.dataset:
            if self.class_counts[label] < self.num_per_class:
                self.filtered_data.append((img, label))
                self.class_counts[label] += 1
            if all(count >= self.num_per_class for count in self.class_counts):
                break

    def __len__(self):
        return len(self.filtered_data)

    def __getitem__(self, idx):
        return self.filtered_data[idx]

# Load CIFAR-10 dataset + filtering
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

filtered_trainset = FilteredDataset(trainset, num_per_class=500)
filtered_testset = FilteredDataset(testset, num_per_class=100)

#smaller batches
trainloader = DataLoader(filtered_trainset, batch_size=64, shuffle=False, num_workers=0)
testloader = DataLoader(filtered_testset, batch_size=64, shuffle=False, num_workers=0)

# Check dataset sizes
print(f"Filtered Train Dataset Size: {len(filtered_trainset)}")
print(f"Filtered Test Dataset Size: {len(filtered_testset)}")

# print output to make sure its good
train_images, train_labels = next(iter(trainloader))
print(f"Sample Train Batch: Images Shape {train_images.shape}, Labels {train_labels.shape}")


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:01<00:00, 100MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Filtered Train Dataset Size: 5000
Filtered Test Dataset Size: 1000
Sample Train Batch: Images Shape torch.Size([64, 3, 224, 224]), Labels torch.Size([64])


Feature Extraction with ResNet-18

In [4]:
import torch
import torchvision.models as models
from sklearn.decomposition import PCA
import numpy as np


resnet18 = models.resnet18(pretrained=True)
resnet18 = torch.nn.Sequential(*list(resnet18.children())[:-1])
resnet18 = resnet18.to(device)
resnet18.eval()  # Set model to evaluation mode

# features from a batch of images
def extract_features(dataloader, model, device):
    features, labels = [], []
    with torch.no_grad():
        for images, targets in dataloader:
            images = images.to(device)
            # Extract features and flatten the output
            output = model(images).view(images.size(0), -1)
            features.append(output.cpu().numpy())
            labels.append(targets.numpy())
    # Combine all batches into single arrays
    return np.vstack(features), np.hstack(labels)

# features for training and test sets
train_features, train_labels = extract_features(trainloader, resnet18, device)
test_features, test_labels = extract_features(testloader, resnet18, device)

print(f"Train Features Shape: {train_features.shape}")
print(f"Test Features Shape: {test_features.shape}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 66.3MB/s]


Train Features Shape: (5000, 512)
Test Features Shape: (1000, 512)


Dimensionality Reduction with PCA

In [5]:
# PCA to make dimensionality 50
pca = PCA(n_components=50)
train_features_pca = pca.fit_transform(train_features)
test_features_pca = pca.transform(test_features)

print(f"Reduced Train Features Shape: {train_features_pca.shape}")
print(f"Reduced Test Features Shape: {test_features_pca.shape}")


Reduced Train Features Shape: (5000, 50)
Reduced Test Features Shape: (1000, 50)


NAIVE BAISLES

In [6]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

# Gaus from scratch
class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.means = {}
        self.variances = {}
        self.priors = {}

        for cls in self.classes:
            X_cls = X[y == cls]
            self.means[cls] = np.mean(X_cls, axis=0)
            self.variances[cls] = np.var(X_cls, axis=0) + 1e-9  # To avoid division by zero
            self.priors[cls] = X_cls.shape[0] / X.shape[0]

    def predict(self, X):
        posteriors = []
        for cls in self.classes:
            mean, var = self.means[cls], self.variances[cls]
            prior = np.log(self.priors[cls])
            likelihood = -0.5 * np.sum(np.log(2 * np.pi * var)) - 0.5 * np.sum(((X - mean) ** 2) / var, axis=1)
            posterior = prior + likelihood
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors, axis=0)]

# Train
gnb_scratch = GaussianNaiveBayes()
gnb_scratch.fit(train_features_pca, train_labels)

# Predict
test_preds_scratch = gnb_scratch.predict(test_features_pca)

# See results and print
accuracy_scratch = accuracy_score(test_labels, test_preds_scratch)
precision_scratch = precision_score(test_labels, test_preds_scratch, average='macro')
recall_scratch = recall_score(test_labels, test_preds_scratch, average='macro')
f1_scratch = f1_score(test_labels, test_preds_scratch, average='macro')
conf_matrix_scratch = confusion_matrix(test_labels, test_preds_scratch)

print("Custom Naive Bayes Results:")
print(f"Accuracy: {accuracy_scratch:.2f}")
print(f"Precision: {precision_scratch:.2f}")
print(f"Recall: {recall_scratch:.2f}")
print(f"F1 Score: {f1_scratch:.2f}")
print("Confusion Matrix:")
print(conf_matrix_scratch)








# Train scikit-learn's GaussianNB
gnb_sklearn = GaussianNB()
gnb_sklearn.fit(train_features_pca, train_labels)

# Predict
test_preds_sklearn = gnb_sklearn.predict(test_features_pca)

# See results and print
accuracy_sklearn = accuracy_score(test_labels, test_preds_sklearn)
precision_sklearn = precision_score(test_labels, test_preds_sklearn, average='macro')
recall_sklearn = recall_score(test_labels, test_preds_sklearn, average='macro')
f1_sklearn = f1_score(test_labels, test_preds_sklearn, average='macro')
conf_matrix_sklearn = confusion_matrix(test_labels, test_preds_sklearn)

print("\nScikit-learn Naive Bayes Results:")
print(f"Accuracy: {accuracy_sklearn:.2f}")
print(f"Precision: {precision_sklearn:.2f}")
print(f"Recall: {recall_sklearn:.2f}")
print(f"F1 Score: {f1_sklearn:.2f}")
print("Confusion Matrix:")
print(conf_matrix_sklearn)


Custom Naive Bayes Results:
Accuracy: 0.79
Precision: 0.80
Recall: 0.79
F1 Score: 0.79
Confusion Matrix:
[[80  1  1  1  0  0  1  0 12  4]
 [ 3 88  0  2  1  0  0  0  0  6]
 [ 7  0 62  8  7  4 11  0  1  0]
 [ 1  0  4 75  4 10  6  0  0  0]
 [ 1  0  4  7 77  3  1  7  0  0]
 [ 0  1  5 15  3 73  2  1  0  0]
 [ 2  0  4  6  6  3 78  1  0  0]
 [ 1  1  0  5  6  5  0 81  1  0]
 [ 8  0  1  0  1  0  0  0 87  3]
 [ 5  2  0  2  0  0  0  1  1 89]]

Scikit-learn Naive Bayes Results:
Accuracy: 0.79
Precision: 0.80
Recall: 0.79
F1 Score: 0.79
Confusion Matrix:
[[80  1  1  1  0  0  1  0 12  4]
 [ 3 88  0  2  1  0  0  0  0  6]
 [ 7  0 62  8  7  4 11  0  1  0]
 [ 1  0  4 75  4 10  6  0  0  0]
 [ 1  0  4  7 77  3  1  7  0  0]
 [ 0  1  5 15  3 73  2  1  0  0]
 [ 2  0  4  6  6  3 78  1  0  0]
 [ 1  1  0  5  6  5  0 81  1  0]
 [ 8  0  1  0  1  0  0  0 87  3]
 [ 5  2  0  2  0  0  0  1  1 89]]


Decision Trees + Experimentation Changing depths

In [7]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# using Gini impurity
decision_tree = DecisionTreeClassifier(criterion='gini', max_depth=50, random_state=42)
decision_tree.fit(train_features_pca, train_labels)

# Predict
test_preds_tree = decision_tree.predict(test_features_pca)

# See results and print
accuracy_tree = accuracy_score(test_labels, test_preds_tree)
precision_tree = precision_score(test_labels, test_preds_tree, average='macro')
recall_tree = recall_score(test_labels, test_preds_tree, average='macro')
f1_tree = f1_score(test_labels, test_preds_tree, average='macro')
conf_matrix_tree = confusion_matrix(test_labels, test_preds_tree)

print("Default Decision Tree Results (Max Depth = 50):")
print(f"Accuracy: {accuracy_tree:.2f}")
print(f"Precision: {precision_tree:.2f}")
print(f"Recall: {recall_tree:.2f}")
print(f"F1 Score: {f1_tree:.2f}")
print("Confusion Matrix:")
print(conf_matrix_tree)

# Changing tree depths
depths = [10, 20, 30, 40, 50]
depth_results = []

print("\nDecision Tree Depth Experiments:")
for depth in depths:
    tree = DecisionTreeClassifier(criterion='gini', max_depth=depth, random_state=42)
    tree.fit(train_features_pca, train_labels)
    preds = tree.predict(test_features_pca)

    acc = accuracy_score(test_labels, preds)
    prec = precision_score(test_labels, preds, average='macro')
    rec = recall_score(test_labels, preds, average='macro')
    f1 = f1_score(test_labels, preds, average='macro')
    conf_matrix = confusion_matrix(test_labels, preds)

    depth_results.append((depth, acc, prec, rec, f1, conf_matrix))

    print(f"\nDepth: {depth}")
    print(f"Accuracy: {acc:.2f}")
    print(f"Precision: {prec:.2f}")
    print(f"Recall: {rec:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)


Default Decision Tree Results (Max Depth = 50):
Accuracy: 0.58
Precision: 0.59
Recall: 0.59
F1 Score: 0.58
Confusion Matrix:
[[56  4  8  4  3  0  1  1 19  4]
 [ 7 68  0  4  0  1  0  0  7 13]
 [ 3  1 42 11  8 10 17  7  1  0]
 [ 1  0  9 44  3 23 13  4  1  2]
 [ 5  1 12  6 45  7  4 18  0  2]
 [ 0  0  8 14  6 60  6  4  2  0]
 [ 1  0 11  5  3  2 76  2  0  0]
 [ 1  1  6 11 10 14  0 55  1  1]
 [22  3  2  0  0  0  2  1 65  5]
 [ 5  8  0  1  1  0  0  1 10 74]]

Decision Tree Depth Experiments:

Depth: 10
Accuracy: 0.61
Precision: 0.62
Recall: 0.61
F1 Score: 0.61
Confusion Matrix:
[[55  7  7  6  3  1  0  0 17  4]
 [ 6 70  0  3  0  2  2  0  5 12]
 [ 4  1 46 19  4 10 11  4  1  0]
 [ 1  0  9 61  4 17  6  1  0  1]
 [ 6  0  5  6 57 10  3 12  0  1]
 [ 0  0  6 25  4 57  3  3  2  0]
 [ 3  0 11 11  4  1 69  1  0  0]
 [ 1  1  4 13 11 13  0 55  1  1]
 [18  5  2  1  0  0  1  1 68  4]
 [ 7  9  0  2  1  0  0  1  8 72]]

Depth: 20
Accuracy: 0.59
Precision: 0.59
Recall: 0.59
F1 Score: 0.59
Confusion Matrix:
[[5

MLP Model

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(50, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 10)
        self.relu = nn.ReLU()
        self.batchnorm = nn.BatchNorm1d(512)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.batchnorm(self.fc2(x)))
        x = self.fc3(x)
        return x

# loss function, and optimizer
model = MLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# data prep
train_tensor = torch.tensor(train_features_pca, dtype=torch.float32).to(device)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.long).to(device)
test_tensor = torch.tensor(test_features_pca, dtype=torch.float32).to(device)
test_labels_tensor = torch.tensor(test_labels, dtype=torch.long).to(device)

# Train epoch loops
num_epochs = 20
batch_size = 64
for epoch in range(num_epochs):
    model.train()
    for i in range(0, len(train_tensor), batch_size):
        inputs = train_tensor[i:i+batch_size]
        labels = train_labels_tensor[i:i+batch_size]

        # Forward
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # make sure program is running iwth this
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


model.eval()
with torch.no_grad():
    test_outputs = model(test_tensor)
    _, test_preds = torch.max(test_outputs, 1)

# predictions need to be converted to numpy to calculate
test_preds_np = test_preds.cpu().numpy()
test_labels_np = test_labels_tensor.cpu().numpy()

# See results and print
accuracy_mlp = accuracy_score(test_labels_np, test_preds_np)
precision_mlp = precision_score(test_labels_np, test_preds_np, average='macro')
recall_mlp = recall_score(test_labels_np, test_preds_np, average='macro')
f1_mlp = f1_score(test_labels_np, test_preds_np, average='macro')
conf_matrix_mlp = confusion_matrix(test_labels_np, test_preds_np)

print(f"MLP Accuracy: {accuracy_mlp:.2f}")
print(f"Precision: {precision_mlp:.2f}")
print(f"Recall: {recall_mlp:.2f}")
print(f"F1 Score: {f1_mlp:.2f}")
print("Confusion Matrix:")
print(conf_matrix_mlp)


Epoch [1/20], Loss: 4.6075
Epoch [2/20], Loss: 2.7075
Epoch [3/20], Loss: 1.7726
Epoch [4/20], Loss: 1.1632
Epoch [5/20], Loss: 1.0814
Epoch [6/20], Loss: 0.7836
Epoch [7/20], Loss: 0.6552
Epoch [8/20], Loss: 0.5032
Epoch [9/20], Loss: 0.2960
Epoch [10/20], Loss: 0.1984
Epoch [11/20], Loss: 0.0992
Epoch [12/20], Loss: 0.0464
Epoch [13/20], Loss: 0.0360
Epoch [14/20], Loss: 0.0316
Epoch [15/20], Loss: 0.0285
Epoch [16/20], Loss: 0.0260
Epoch [17/20], Loss: 0.0240
Epoch [18/20], Loss: 0.0221
Epoch [19/20], Loss: 0.0206
Epoch [20/20], Loss: 0.0193
MLP Accuracy: 0.82
Precision: 0.83
Recall: 0.82
F1 Score: 0.83
Confusion Matrix:
[[80  0  5  1  0  0  2  0  8  4]
 [ 3 86  0  2  0  0  1  0  2  6]
 [ 4  0 76  4  3  4  6  1  2  0]
 [ 0  0  3 76  3  8  9  0  1  0]
 [ 2  0  2  7 80  3  0  6  0  0]
 [ 0  0  6 14  2 73  3  1  1  0]
 [ 1  0  0  3  5  1 89  1  0  0]
 [ 1  0  1  5  7  4  0 82  0  0]
 [ 5  1  1  0  0  0  0  1 92  0]
 [ 2  3  0  1  0  0  0  0  3 91]]


MLP Varying the Depth

In [8]:
# EXRRA LAYER THATS HIDDEN PART TEST

class MLP_Deep(nn.Module):
    def __init__(self):
        super(MLP_Deep, self).__init__()
        self.fc1 = nn.Linear(50, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 10)
        self.relu = nn.ReLU()
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(256)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.batchnorm1(self.fc2(x)))
        x = self.relu(self.batchnorm2(self.fc3(x)))
        x = self.fc4(x)
        return x

# train + eval
def train_and_evaluate(model, train_features, train_labels, test_features, test_labels, num_epochs=20):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    train_tensor = torch.tensor(train_features, dtype=torch.float32).to(device)
    train_labels_tensor = torch.tensor(train_labels, dtype=torch.long).to(device)
    test_tensor = torch.tensor(test_features, dtype=torch.float32).to(device)
    test_labels_tensor = torch.tensor(test_labels, dtype=torch.long).to(device)

    # expoch loops
    batch_size = 64
    for epoch in range(num_epochs):
        model.train()
        for i in range(0, len(train_tensor), batch_size):
            inputs = train_tensor[i:i+batch_size]
            labels = train_labels_tensor[i:i+batch_size]

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # make sure program is running
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


    model.eval()
    with torch.no_grad():
        test_outputs = model(test_tensor)
        _, test_preds = torch.max(test_outputs, 1)

    test_preds_np = test_preds.cpu().numpy()
    test_labels_np = test_labels_tensor.cpu().numpy()

    # convert like before
    accuracy = accuracy_score(test_labels_np, test_preds_np)
    precision = precision_score(test_labels_np, test_preds_np, average='macro')
    recall = recall_score(test_labels_np, test_preds_np, average='macro')
    f1 = f1_score(test_labels_np, test_preds_np, average='macro')
    conf_matrix = confusion_matrix(test_labels_np, test_preds_np)
# See results and print
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)

# call all functions
print("Training the deeper MLP model...")
mlp_deep = MLP_Deep()
train_and_evaluate(mlp_deep, train_features_pca, train_labels, test_features_pca, test_labels)


Training the deeper MLP model...
Epoch [1/20], Loss: 4.2734
Epoch [2/20], Loss: 2.9002
Epoch [3/20], Loss: 1.9091
Epoch [4/20], Loss: 1.2495
Epoch [5/20], Loss: 0.8969
Epoch [6/20], Loss: 0.7998
Epoch [7/20], Loss: 0.4060
Epoch [8/20], Loss: 0.1655
Epoch [9/20], Loss: 0.0903
Epoch [10/20], Loss: 0.0706
Epoch [11/20], Loss: 0.0591
Epoch [12/20], Loss: 0.0517
Epoch [13/20], Loss: 0.0462
Epoch [14/20], Loss: 0.0421
Epoch [15/20], Loss: 0.0387
Epoch [16/20], Loss: 0.0359
Epoch [17/20], Loss: 0.0335
Epoch [18/20], Loss: 0.0315
Epoch [19/20], Loss: 0.0297
Epoch [20/20], Loss: 0.0282
Accuracy: 0.83
Precision: 0.83
Recall: 0.83
F1 Score: 0.83
Confusion Matrix:
[[81  0  5  2  1  0  0  0  8  3]
 [ 2 87  0  1  0  0  0  0  3  7]
 [ 4  0 80  5  2  3  4  1  1  0]
 [ 0  0  7 70  5  9  7  1  1  0]
 [ 2  0  5  4 78  3  1  7  0  0]
 [ 0  0  5 15  3 73  2  1  1  0]
 [ 1  0  5  1  2  2 88  1  0  0]
 [ 1  0  1  3  6  2  0 87  0  0]
 [ 4  0  1  1  0  0  0  0 93  1]
 [ 2  1  0  1  0  1  0  0  4 91]]


Vary Hidden Layer Sizes

In [10]:
# SMALLER HIDDEN LAYERS HIDDEN TEST
class MLP_Smaller(nn.Module):
    def __init__(self):
        super(MLP_Smaller, self).__init__()
        self.fc1 = nn.Linear(50, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.batchnorm1 = nn.BatchNorm1d(256)
        self.batchnorm2 = nn.BatchNorm1d(128)

    def forward(self, x):
        x = self.relu(self.batchnorm1(self.fc1(x)))
        x = self.relu(self.batchnorm2(self.fc2(x)))
        x = self.fc3(x)
        return x

# See results and print
print("\nTraining the smaller MLP model...")
mlp_smaller = MLP_Smaller()
train_and_evaluate(mlp_smaller, train_features_pca, train_labels, test_features_pca, test_labels)



Training the smaller MLP model...
Epoch [1/20], Loss: 3.8533
Epoch [2/20], Loss: 2.6254
Epoch [3/20], Loss: 1.9595
Epoch [4/20], Loss: 1.5138
Epoch [5/20], Loss: 1.1935
Epoch [6/20], Loss: 0.9957
Epoch [7/20], Loss: 0.9294
Epoch [8/20], Loss: 0.8816
Epoch [9/20], Loss: 0.6595
Epoch [10/20], Loss: 0.5073
Epoch [11/20], Loss: 0.3525
Epoch [12/20], Loss: 0.3024
Epoch [13/20], Loss: 0.2333
Epoch [14/20], Loss: 0.1714
Epoch [15/20], Loss: 0.1269
Epoch [16/20], Loss: 0.1004
Epoch [17/20], Loss: 0.0857
Epoch [18/20], Loss: 0.0755
Epoch [19/20], Loss: 0.0683
Epoch [20/20], Loss: 0.0624
Accuracy: 0.82
Precision: 0.82
Recall: 0.82
F1 Score: 0.82
Confusion Matrix:
[[81  0  7  0  1  0  0  1  9  1]
 [ 2 85  1  1  0  0  1  0  3  7]
 [ 4  0 77  7  4  1  6  1  0  0]
 [ 0  0  3 72  5 12  7  1  0  0]
 [ 2  0  2  6 80  2  2  5  1  0]
 [ 0  0  4 19  2 70  2  2  1  0]
 [ 1  0  1  2  3  2 90  1  0  0]
 [ 0  0  2  5  9  6  0 78  0  0]
 [ 5  0  1  1  0  0  0  0 93  0]
 [ 2  1  0  1  0  0  1  0  4 91]]
