In [None]:
# Testing the feedforward biases in a non-trained, randomly initialized neural net
# Running this test on CIFAR-100

# Setup

In [None]:
# This cell imports modules
import torch
from torch import nn
from torch.utils.data import DataLoader, Subset
from torchvision import datasets
import torchvision.transforms as transforms
import numpy as np
import json

In [None]:
### Load the CIFAR-100 model

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

cifar100 = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)

# Create separate dataloaders for each class
class_dataloaders = []
for class_idx in range(100):
    class_indices = [i for i, (_, label) in enumerate(cifar100) if label == class_idx]
    class_subset = Subset(cifar100, class_indices)
    class_dataloader = DataLoader(class_subset, batch_size=100, shuffle=False)
    class_dataloaders.append(class_dataloader)
    print(f"Dataloader #{class_idx} done.")

In [None]:
labels = [
    "apple",
    "aquarium_fish",
    "baby",
    "bear",
    "beaver",
    "bed",
    "bee",
    "beetle",
    "bicycle",
    "bottle",
    "bowl",
    "boy",
    "bridge",
    "bus",
    "butterfly",
    "camel",
    "can",
    "castle",
    "caterpillar",
    "cattle",
    "chair",
    "chimpanzee",
    "clock",
    "cloud",
    "cockroach",
    "couch",
    "crab",
    "crocodile",
    "cup",
    "dinosaur",
    "dolphin",
    "elephant",
    "flatfish",
    "forest",
    "fox",
    "girl",
    "hamster",
    "house",
    "kangaroo",
    "keyboard",
    "lamp",
    "lawn_mower",
    "leopard",
    "lion",
    "lizard",
    "lobster",
    "man",
    "maple_tree",
    "motorcycle",
    "mountain",
    "mouse",
    "mushroom",
    "oak_tree",
    "orange",
    "orchid",
    "otter",
    "palm_tree",
    "pear",
    "pickup_truck",
    "pine_tree",
    "plain",
    "plate",
    "poppy",
    "porcupine",
    "possum",
    "rabbit",
    "raccoon",
    "ray",
    "road",
    "rocket",
    "rose",
    "sea",
    "seal",
    "shark",
    "shrew",
    "skunk",
    "skyscraper",
    "snail",
    "snake",
    "spider",
    "squirrel",
    "streetcar",
    "sunflower",
    "sweet_pepper",
    "table",
    "tank",
    "telephone",
    "television",
    "tiger",
    "tractor",
    "train",
    "trout",
    "tulip",
    "turtle",
    "wardrobe",
    "whale",
    "willow_tree",
    "wolf",
    "woman",
    "worm"
]

In [None]:
# Show one image from each class
from matplotlib import pyplot as plt
for class_idx in range(100):
    class_indices = [i for i, (_, label) in enumerate(cifar100) if label == class_idx]
    class_subset = Subset(cifar100, class_indices)
    figure = plt.figure(figsize=(1,1))
    sample_idx = torch.randint(len(class_subset), size=(1,)).item()
    img, label = class_subset[sample_idx]
    plt.axis("off")
    plt.imshow(img.T)
    print(labels[label])
    plt.show()

In [None]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Model

In [None]:
# Define a flexible neural network
class FlexibleNet(nn.Module):
    def __init__(self, hidden_layers):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(32*32*3, hidden_layers[0]))
        for i in range(1, len(hidden_layers)):
            self.layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
        self.layers.append(nn.Linear(hidden_layers[-1], 100))
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.flatten(x)
        for layer in self.layers[:-1]:
            x = self.relu(layer(x))
        return self.layers[-1](x)

In [None]:
# Evaluation function
def evaluate(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X, Y in dataloader:
            x, y = X.to(device), Y.to(device)
            outputs = model(x)
            _, predicted = torch.max(outputs.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
    return correct / total

# Evaluate

In [None]:
# Evaluation method
def experiment(hidden_layers, num_iterations):
    print("==========")
    print("Evaluating hidden layer architecture: {}".format(str(hidden_layers)))
    class_accuracies = np.zeros((num_iterations, 100))
    for i in range(num_iterations):
        model = FlexibleNet(hidden_layers).to(device)
        # Evaluate the model on each class
        for class_idx, dataloader in enumerate(class_dataloaders):
            accuracy = evaluate(model, dataloader)
            class_accuracies[i, class_idx] = accuracy
        if i % num_iterations//10 == 0:
            print(f"Iteration [{(i+1):03d}/{num_iterations:03d}]")

    class_accuracies = np.mean(class_accuracies, axis=0)
    return class_accuracies

In [None]:
# Plotting method
def plot_results(class_accuracies, hidden_layers, num_iterations, classes):
    plt.figure(figsize=(120, 6))
    plt.bar(classes, class_accuracies)
    plt.ylabel('Accuracy')
    plt.title('Feed-forward Prediction Accuracy for Each CIFAR-100 Class')
    plt.ylim(0, 1)  # Set y-axis limit from 0 to 1

    # Add value labels on top of each bar
    for i, v in enumerate(class_accuracies):
        plt.text(i, v + 0.01, f'{v:.2f}', ha='center', va='bottom')

    # Add Xtick for uniform distribution
    plt.axhline(y = 1/len(classes), color = 'r', linestyle = '--')
    plt.xticks(list(plt.xticks()[0]) + [1/len(classes)])

    # Print overall accuracy
    overall_accuracy = sum(class_accuracies) / len(class_accuracies)
    print(f"Overall accuracy: {overall_accuracy:.4f}")

    figname = f"results/cifar100/ff{str(hidden_layers)}_{num_iterations:06d}.png"
    plt.savefig(figname)

In [None]:
# Output data to JSON
def save_results(class_accuracies, hidden_layers, num_iterations, classes):
    data = {}
    for label, acc in zip(classes, class_accuracies):
        data[label] = acc
    with open(f"results/cirfa100/ff{str(hidden_layers)}_{num_iterations:06d}.json", 'w') as f:
        json.dump(data, f, indent=2)

In [None]:
# Experiment loop
# Test different hidden layer architectures
archs16 = [[16]*num_layers for num_layers in range(1,7)]
archs32 = [[32]*num_layers for num_layers in range(1,7)]
archs64 = [[64]*num_layers for num_layers in range(1,7)]
archs128 = [[128]*num_layers for num_layers in range(1,7)]

archs = archs16 + archs32 + archs64 + archs128
num_iterations = 100
for hidden_layers in archs:
    class_accuracies = experiment(hidden_layers, num_iterations)
    save_results(class_accuracies, hidden_layers, num_iterations, labels)
    plot_results(class_accuracies, hidden_layers, num_iterations, labels)