In [2]:
# Import necessary libraries
import torch.cuda as torch_cuda
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.utils import make_grid

In [3]:
print('===VERIFY GPU===')
print('CUDA IS AVAILABLE:', torch_cuda.is_available())
print('DEVICE COUNT:', torch_cuda.device_count())
print('DEVICE NAME:', torch_cuda.get_device_name(0))

# If CUDA is available, print GPU details
if torch.cuda.is_available():
    print(f"Number of GPUs: {torch_cuda.device_count()}")
    print(f"GPU Name: {torch_cuda.get_device_name(0)}")
    print(f"GPU Memory Allocated: {torch_cuda.memory_allocated(0)} bytes")
    print(f"GPU Memory Cached: {torch_cuda.memory_reserved(0)} bytes")

# Select device: GPU if available, otherwise CPU
device = torch.device('cuda:0' if torch_cuda.is_available() else 'cpu')
torch.cuda.manual_seed(42)
torch.manual_seed(42)

===VERIFY GPU===
CUDA IS AVAILABLE: True
DEVICE COUNT: 1
DEVICE NAME: NVIDIA RTX A4000
Number of GPUs: 1
GPU Name: NVIDIA RTX A4000
GPU Memory Allocated: 0 bytes
GPU Memory Cached: 0 bytes


<torch._C.Generator at 0x76148412f7f0>

## Dataset

In [None]:
# Define hyperparameters
batch_size = 32

# Define transformations: Convert to tensor and normalize
transformations = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

cifar_trainset = datasets.CIFAR10(
    root='../Data/CIFAR-10',
    train=True,
    download=True,
    transform=transformations
)

cifar_testset = datasets.CIFAR10(
    root='../Data/CIFAR-10',
    train=False,
    download=True,
    transform=transformations
)

trainloader = DataLoader(
    cifar_trainset,
    batch_size=batch_size,
    shuffle=True
)

testloader = DataLoader(
    cifar_testset,
    batch_size=batch_size,
    shuffle=False,
)

## CNN

In [5]:
momentum_coeff = 0.9
learning_rate = 0.01
learning_decay = 10**-9
epochs = 100

# Classes for MNIST digits
classes = (0,1,2,3,4,5,6,7,8,9)

In [9]:
# Define a multi-layer perceptron (MLP) mode

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # First convolutional block
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)  
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1) 
        self.pool1 = nn.MaxPool2d(kernel_size=2) 
        self.dropout1 = nn.Dropout(0.25) 

        # Second convolutional block
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)  
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)  
        self.pool2 = nn.MaxPool2d(kernel_size=2)  
        self.dropout2 = nn.Dropout(0.25)  

        # Fully connected layers
        self.fc1 = nn.Linear(4096, 4096)  
        self.dropout3 = nn.Dropout(0.5) 
        self.fc2 = nn.Linear(4096, 512)
        self.out = nn.Linear(512, 10) 

    def forward(self, x, output = 3):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        x = self.dropout1(x)

        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        x = self.dropout2(x)

        x = x.view(-1, 4096)

        x = F.relu(self.fc1(x))
        if output == 1:
            return x
        x = self.dropout3(x)
        
        x = F.relu(self.fc2(x))
        if output == 2:
            return x

        x = self.out(x) 
        return x


# Instantiate and move model to the selected device
cnn = CNN().to(device)

loss_function = nn.CrossEntropyLoss()  # CrossEntropyLoss expects raw logits
optimizer = optim.SGD(cnn.parameters(), lr=learning_rate,
                      momentum=momentum_coeff, weight_decay=learning_decay)


def train(epoch):

    cnn.train()
    for batch_idx, (data, target) in enumerate(trainloader):

        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()

        output = cnn(data, output=3)

        loss = loss_function(output, target)

        loss.backward()

        optimizer.step()

        if batch_idx % 25 == 0:
            print('Train Epoch: {:>3}\t[{:>5}/{:>5} ({:>3.0f}%)]\tLoss: {:>7.6f}'.format(
                epoch + 1, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))


def test():
    cnn.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in testloader:

            data, target = data.to(device), target.to(device)

            output = cnn(data, output=3)

            test_loss += loss_function(output, target).item()

            pred = output.argmax(dim=1, keepdim=True)

            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(testloader)

    print('\nTest set: Average loss: {:>10.4f}, Accuracy: {:>5}/{:>5} ({:>3.3f}%)\n'.format(
        test_loss, correct, len(testloader.dataset),
        100. * correct / len(testloader.dataset)))


for epoch in range(epochs):
    train(epoch)
    test()


Test set: Average loss:     1.3339, Accuracy:  5171/10000 (51.710%)


Test set: Average loss:     1.0406, Accuracy:  6355/10000 (63.550%)


Test set: Average loss:     0.8330, Accuracy:  7085/10000 (70.850%)


Test set: Average loss:     0.7500, Accuracy:  7321/10000 (73.210%)


Test set: Average loss:     0.7138, Accuracy:  7522/10000 (75.220%)


Test set: Average loss:     0.6890, Accuracy:  7649/10000 (76.490%)


Test set: Average loss:     0.6602, Accuracy:  7703/10000 (77.030%)


Test set: Average loss:     0.6559, Accuracy:  7763/10000 (77.630%)


Test set: Average loss:     0.7025, Accuracy:  7716/10000 (77.160%)


Test set: Average loss:     0.6817, Accuracy:  7773/10000 (77.730%)


Test set: Average loss:     0.6621, Accuracy:  7823/10000 (78.230%)


Test set: Average loss:     0.7166, Accuracy:  7801/10000 (78.010%)


Test set: Average loss:     0.6856, Accuracy:  7819/10000 (78.190%)


Test set: Average loss:     0.7035, Accuracy:  7863/10000 (78.630%)


Test set: Average l

In [10]:
torch.save(cnn.state_dict(), '../Saved/CIFAR-10_CNN_TRAINED.pt')

## Dimensionality reduction

In [None]:
def get_activations(layer = 2):
    activations = []
    labels = []
    predictions = []
    with torch.no_grad():

        #  Train Data Activations
        for data, target in trainloader:
            # Move data and targets to device
            data, target = data.to(device), target.to(device)

            # Forward pass
            output = cnn(data, output=layer)
            for t in output:
                activations.append(t.to('cpu').numpy())
            labels.append(target.to('cpu').numpy())

            output = cnn(data, output=3)
            for t in output:
                pred = np.argmax(t.to('cpu').numpy())
                predictions.append(pred)

        #  Test Data Activations
        for data, target in testloader:
            # Move data and targets to device
            data, target = data.to(device), target.to(device)

            # Forward pass
            output = cnn(data, output=layer)
            for t in output:
                activations.append(t.to('cpu').numpy())
            target_arr = target.to('cpu').numpy()
            if len(target_arr) != 32:
                target_arr = np.pad(target_arr, (0, 32 - len(target_arr)), mode='constant', constant_values=-1)
            labels.append(target_arr)

            output = cnn(data, output=3)
            for t in output:
                pred = np.argmax(t.to('cpu').numpy())
                predictions.append(pred)

    return activations, labels, predictions

activations_l2, labels_l2, predictions_l2 = get_activations(2)
activations_l1, labels_l1, predictions_l1 = get_activations(1)