In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
import random

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
from torchvision import transforms

Import Datasets

In [None]:
#datasets and loaders - using torchvision; standard transformation

random.seed(28)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)


testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)




Files already downloaded and verified
Files already downloaded and verified


The provided CIFAR10 train and custom datasets were downloaded locally then uploaded to Google Drive for easier access. The code to download the train dataset has been commented out as we ultimately decided to use the torchvision datasets for training

In [None]:
# Connect to Google Drive

from google.colab import drive
drive.mount('/content/drive')


# Import data from Google Drive

# Read the test file, note that it has no labels and needs to be used with your model inference to predict outputs.

#Test Dataset (Custom for Kaggle)
def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        batch = pickle.load(fo, encoding='bytes')
    return batch

# Load the batch
# The path has been modified to reflect the location on our personal drive;

cifar10_batch = load_cifar_batch('/content/drive/MyDrive/Colab Notebooks/cifar10/deep-learning-mini-project-spring-24-nyu/cifar_test_nolabels.pkl')
#cifar10_batch_kaggle = load_cifar_batch('/kaggle/input/deep-learning-mini-project-spring-24-nyu/cifar_test_nolabels')

# Extract images
images = cifar10_batch[b'data']
images = images.reshape(-1, 3, 32, 32)
images_permuted = np.transpose(images, (0, 2, 3, 1)) # Kaggle Testset

# plt.figure(figsize=(25, 8))
# for i in range(20):
#     plt.subplot(1, 20, i+1)
#     plt.imshow(images_permuted[i])
#     plt.axis('off')
# plt.show()


# # Train Dataset - uploaded to Google Drive after downloading from Kaggle

# def load_cifar_batch(file):
#     with open(file, 'rb') as fo:
#         dict = pickle.load(fo, encoding='bytes')
#     return dict

# # Specify the folder where the CIFAR-10 batch files are

# cifar10_dir = '/content/drive/MyDrive/Colab Notebooks/cifar10/deep-learning-mini-project-spring-24-nyu/cifar-10-python/cifar-10-batches-py'
# #cifar10_dir_kaggle = '/kaggle/input/deep-learning-mini-project-spring-24-nyu/cifar-10-python/cifar-10-batches-py'

# # Load the label names
# meta_data_dict = load_cifar_batch(os.path.join(cifar10_dir, 'batches.meta'))
# label_names = meta_data_dict[b'label_names']

# # Load one batch for demonstration (e.g., data_batch_1)
# batch_1_dict = load_cifar_batch(os.path.join(cifar10_dir, 'data_batch_1'))
# batch_2_dict = load_cifar_batch(os.path.join(cifar10_dir, 'data_batch_2'))
# batch_3_dict = load_cifar_batch(os.path.join(cifar10_dir, 'data_batch_3'))
# batch_4_dict = load_cifar_batch(os.path.join(cifar10_dir, 'data_batch_4'))
# batch_5_dict = load_cifar_batch(os.path.join(cifar10_dir, 'data_batch_5'))

# # Combine the databatches into one training set
# train_images = np.concatenate((batch_1_dict[b'data'], batch_2_dict[b'data'],batch_3_dict[b'data'],batch_4_dict[b'data'],batch_5_dict[b'data']), axis=0)
# train_labels = np.concatenate((batch_1_dict[b'labels'], batch_2_dict[b'labels'],batch_3_dict[b'labels'],batch_4_dict[b'labels'],batch_5_dict[b'labels']), axis=0)

# # Reshape the images
# train_images = train_images.reshape((50000, 3, 32, 32)).transpose(0, 2, 3, 1)

# # # Display 10 (random range) images and labels

# # plt.figure(figsize=(20, 4))
# # for i in range(23444,23454):
# #     plt.subplot(1, 10, i-23443)
# #     plt.imshow(train_images[i])
# #     plt.title(label_names[train_labels[i]].decode('utf-8'))  # Decoding from bytes to string
# #     plt.axis('off')
# # plt.show()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Loader Functions for Custom Test Data

def create_test_loader(test_images, batch_size=64, shuffle=False):
    # Step 1: Normalize the pixel values to the range [0, 1]
    test_images = test_images / 255.0  # Assuming pixel values are in the range [0, 255]

    # Step 2: Convert Numpy Array to PyTorch Tensor
    test_images_tensor = torch.tensor(test_images, dtype=torch.float32)

    # Step 3: Permute the dimensions to match the expected format (if necessary)
    # For example, if the channel dimension is the first dimension, permute it to the last dimension
    test_images_tensor = test_images_tensor.permute(0, 3, 1, 2)  # Assuming channel dimension is the last dimension

    # Step 4: Create TensorDataset
    test_dataset = TensorDataset(test_images_tensor)

    # Step 5: Create DataLoader
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)

    return test_loader

In [None]:
# Create Custom Test Loader
testloader_custom = create_test_loader(images_permuted, batch_size=10000, shuffle=False)

ResNet Model:

Input custom parameters as ints/lists in the order of number of layers (int), number of residual blocks in each layer(list), number of channels in each layer (list), kernel sizes in each layer (list), skip connection kernels in each layer (list), and pool size (int)

Our model is based on the ResNet implementation from the [pytorch-cifar](https://github.com/kuangliu/pytorch-cifar) repository. Modifications have been made to adapt it to the specific requirements of this project.

In [None]:
class BasicBlock(nn.Module):

    def __init__(self, in_planes, planes, kernel_size, skip_kernel, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=kernel_size, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=kernel_size, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=skip_kernel, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):

    def __init__(self, N, B, C, F, K, P, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = C[0]
        self.block = BasicBlock
        self.N = N                # No. of Residual Layers
        self.B = B                # No. of Residual Blocks in Residual Layer i
        self.C = C                # No. of channels in Residual Layer i
        self.F = F                # Conv. kernel size in Residual Layer i
        self.K = K                # Skip connection kernel size in Residual Layer i
        self.P = P                # Average pool kernel size
        self.layers = []          # layers container
        self.S = [2] * N          # strides for layers
        self.S[0] = 1

        # Output Liner layer input dimension
        self.outLayerInSize = C[N-1]*(32//(P*2**(N-1)))*(32//(P*2**(N-1)))

        self.conv1 = nn.Conv2d(3, C[0], kernel_size=F[0], stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(C[0])

        for i in range(N):
            exec("self.layer{} = self._make_layer(self.block, self.C[{}], self.B[{}], self.F[{}], self.K[{}], self.S[{}])"\
                .format(i+1,i,i,i,i,i))
            exec("self.layers.append(self.layer{})".format(i+1))
        self.linear = nn.Linear(self.outLayerInSize, num_classes)


    def _make_layer(self, block, planes, num_blocks, kernel_size, skip_kernel, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, kernel_size, skip_kernel, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        for layer in self.layers:
            out = layer(out)
        out = F.avg_pool2d(out, self.P)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

Train and Test Functions - to be used with the torchvision datasets

In [None]:
def train(net, train_loader, criterion, optimizer, num_epochs=1):
    for epoch in range(num_epochs):
        net.train()  # Set the model to train mode
        running_loss = 0.0
        for inputs, labels in train_loader:

            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU if available
            optimizer.zero_grad()  # Zero the parameter gradients
            outputs = net(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute the loss
            loss.backward()  # Backward pass
            optimizer.step()  # Optimize
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")


In [None]:
def test(model, test_loader):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()  # Set the model to evaluation mode
    model.to(device)  # Move model to the same device as the data

    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)  # Move data to the same device as the model
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the test images: %d %%' % (
        100 * correct / total))


Predict on Custom Test file and export to CSV

The csv file will save directly to the Google Drive folder as a single column csv file. Using Google Sheets, we simply add the ID from 0 to 9999 and export as another csv file to upload to Kaggle

In [None]:
def save_csv(model, test_loader, output_csv):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.eval()  # Set the model to evaluation mode
    model.to(device)  # Move model to the GPU if available

    predictions = []
    with torch.no_grad():
        for data in test_loader:
            images = data[0].to(device)  # Move data to GPU if available
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            predictions.extend(predicted.cpu().numpy())  # Append predicted labels to the list

    # Save predictions to a CSV file - in this case, path is directly to Google Drive
    df = pd.DataFrame({"Labels": predictions})
    df.to_csv(f'/content/drive/MyDrive/Colab Notebooks/{output_csv}.csv', index=False)

    print(f"Predictions saved to {output_csv}")


Main

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

We tested a few different combinations of hyperparameters by specifying variable like below. Due to limited time and resources, we only trained for 2 epochs each with other parameters kept fixed. We have a recorded a few manually for the higher total parameter counts. Table can be found in our report.

In [None]:
N=3
B=[3,3,3]
C=[64,128,256]
F=[3,3,3]
K=[1,1,1]
P=4

testnet = ResNet(N, B, C, F, K, P)
testnet = testnet.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(testnet.parameters(), lr=0.001, momentum=0.9)


In [None]:
# Train the model
train(testnet, trainloader, criterion, optimizer, num_epochs=2)


  self.pid = os.fork()
  self.pid = os.fork()


Epoch 1/2, Loss: 1.4157421163621544
Epoch 2/2, Loss: 0.8524004436112195


In [None]:
test(testnet, testloader)


  self.pid = os.fork()


Accuracy of the network on the test images: 71 %


Due to limited time and resources, mainly from previous experimentation that we discarded, we chose the two most high performing hypoerparameter combinations with respect to training loss and accuracy to further train and observe. We initially trained for just 10 epochs added 5 more to make number of epochs 15. The accuracy improvement from 2 epochs to 10 was significant for both of our top parameters, but found improvement to be minor from 10 to 15; we expect similar results for even higher number of epochs.

In [None]:
B_1=[2,1,2,2]
C_1=[64,128,256,256]
F_1=[3,3,3,3]
K_1=[1,1,1,1]
P_1=4
N_1=4

first_resnet = ResNet(N_1, B_1, C_1, F_1, K_1, P_1)
first_resnet = first_resnet.to(device)


In [None]:
# Calculates the total number of parameters given hyperparameters; note that the summary is not present for all, but we have manually recorded results which can be found in our report.
summary(first_resnet, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Conv2d-3           [-1, 64, 32, 32]          36,864
       BatchNorm2d-4           [-1, 64, 32, 32]             128
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
        BasicBlock-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
           Conv2d-10           [-1, 64, 32, 32]          36,864
      BatchNorm2d-11           [-1, 64, 32, 32]             128
       BasicBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13          [-1, 128, 16, 16]          73,728
      BatchNorm2d-14          [-1, 128,

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(first_resnet.parameters(), lr=0.001, momentum=0.9)


In [None]:
train(first_resnet, trainloader, criterion, optimizer, num_epochs=10)


  self.pid = os.fork()
  self.pid = os.fork()


Epoch 1/10, Loss: 1.3429175821387769
Epoch 2/10, Loss: 0.8126889728840441
Epoch 3/10, Loss: 0.6086656902072206
Epoch 4/10, Loss: 0.4712365833407501
Epoch 5/10, Loss: 0.35921292084219864
Epoch 6/10, Loss: 0.2672442619249527
Epoch 7/10, Loss: 0.1942061690515018
Epoch 8/10, Loss: 0.13936921194193042
Epoch 9/10, Loss: 0.10031987528590668
Epoch 10/10, Loss: 0.06968409092166337


In [None]:
test(first_resnet, testloader)


Accuracy of the network on the test images: 82 %


In [None]:
file_name = "first_resnet_10_eps_csv_file_acc_82"

save_csv(first_resnet, testloader_custom,file_name ) #should be about 52% on Kaggle

Predictions saved to first_resnet_10_eps_csv_file_acc_82


In [None]:
B_2=[3,3,2,3]
C_2=[64,128,128,256]
F_2=[3,3,3,3]
K_2=[1,1,1,1]
P_2=4
N_2=4

second_resnet = ResNet(N_2, B_2, C_2, F_2, K_2, P_2)
second_resnet = second_resnet.to(device)

In [None]:
optimizer = torch.optim.SGD(second_resnet.parameters(), lr=0.001, momentum=0.9)


In [None]:
train(second_resnet, trainloader, criterion, optimizer, num_epochs=10)


  self.pid = os.fork()


Epoch 1/10, Loss: 1.4326070944470166
Epoch 2/10, Loss: 0.8585262776635587
Epoch 3/10, Loss: 0.6372525906962528
Epoch 4/10, Loss: 0.49743281851571053
Epoch 5/10, Loss: 0.38908295226437506
Epoch 6/10, Loss: 0.30441392069512163
Epoch 7/10, Loss: 0.2282999128790156
Epoch 8/10, Loss: 0.1725996220833392
Epoch 9/10, Loss: 0.12559819755841944
Epoch 10/10, Loss: 0.0938154758175362


In [None]:
test(second_resnet, testloader)


Accuracy of the network on the test images: 82 %


We purchasd 200 Compute Engines in total for Google Colab to use better performing GPUs. In this session alone, ran of out of memory per the error below, so we were unable to export the newer results as pdf.

In [None]:
file_name = "second_resnet_10_eps_csv_file_acc_82"

save_csv(first_resnet, testloader_custom,file_name ) #should be about 52%

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.44 GiB. GPU 0 has a total capacity of 15.77 GiB of which 2.07 GiB is free. Process 87562 has 13.70 GiB memory in use. Of the allocated memory 10.23 GiB is allocated by PyTorch, and 3.07 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
train(first_resnet, trainloader, criterion, optimizer, num_epochs=5) # 15th epoch in total


Epoch 1/5, Loss: 0.054807147495246684
Epoch 2/5, Loss: 0.055825584951267195
Epoch 3/5, Loss: 0.05404582598539179
Epoch 4/5, Loss: 0.0544479376459989
Epoch 5/5, Loss: 0.056161193695629774


In [None]:
test(first_resnet, testloader)


Accuracy of the network on the test images: 82 %


In [None]:
train(second_resnet, trainloader, criterion, optimizer, num_epochs=5) #15th epoch in total


  self.pid = os.fork()
  self.pid = os.fork()


Epoch 1/5, Loss: 0.07381486251885665
Epoch 2/5, Loss: 0.05470524840512127
Epoch 3/5, Loss: 0.04191658935609079
Epoch 4/5, Loss: 0.031857568946714904
Epoch 5/5, Loss: 0.024241891359194797


In [None]:
test(second_resnet, testloader)


Accuracy of the network on the test images: 84 %


The 84% accuracy for the second_resnet after 15 epochs shows improvement -- vs 82% after 10 epochs. If this model was further trained and tested against the custom test set, we believe results would've been better, but that remains a hypothesis for now.

We address this in our report in greater detail, but the biggest problem we encountered was the high discrepancy in our accuracy of the torchvision dataset and custom dataset for Kaggle, ~84% vs ~53%. This level of discrepancy occured throughout our entire project, which led us to discarding a bulk of our previous efforts. (our number of submission on Kaggle is low because we think we figured out the true labels for the test set, which we used for reference to not exhaust submissions)

We saw other groups experience similar issues and tried suggestions in the Slack channels but were not successful. Some discrepancy is obviously expected -- and welcomed -- but a 30% gap warrants further exploration which we unfortunately did not get to.