In [None]:
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from torchvision.transforms import v2
from torch.utils.data import DataLoader

In [31]:
SET_ROOT = './data'
NUM_CHANNELS = 3 # RGB format
NUM_CLASSES_CIFAR10 = 10 # In CIFAR10 Dataset
EPOCHS = 5 #70
BATCH_SIZE = 50 # Required = 1, for now
ALLOW_CUDA_TF32_CORES = True # Enable tensor cores on nvidia gpus (ampere gen. and above)
ALLOW_CUDA = True # Overwrites all CUDA setup; enables standard CPU usage. 

In [32]:
def loadData():
    transf_test = v2.Compose([
        v2.RandomHorizontalFlip(),
        v2.RandomVerticalFlip(),
        v2.ToTensor()
    ])
    transf_train = v2.Compose([
        v2.ToTensor()
    ])

    testset = CIFAR10(root=SET_ROOT, 
                    train=False, 
                    download=True, 
                    transform=transf_test)

    trainset = CIFAR10(root=SET_ROOT, 
                    train=True, 
                    download=True, 
                    transform=transf_train)

    print(f"Len test: {len(testset)}, Len train: {len(trainset)}")
    train_size= int(0.8 * len(trainset))
    val_size = len(trainset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(trainset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    test_loader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
    return train_loader, val_loader, test_loader

In [33]:
# CUDA setup

if ALLOW_CUDA:
    cuda_available = torch.cuda.is_available()
    if not cuda_available:
        device = torch.device("cpu")
        print(device)

    else:
        device = torch.device("cuda")
        print(device, "devices: ", torch.cuda.device_count())
        print(torch.cuda.current_device(), ", name:", torch.cuda.get_device_name(torch.cuda.current_device()))

        # For my home GPU, for matrix multiplication speedup
        # If you dont have a nvidia 30-series or above disable 
        # via the ALLOW_CUDA_TF32_CORES in the top of this file
        # -Hannes
        if ALLOW_CUDA_TF32_CORES:
            print("Enabling TF32 matmul")
            torch.backends.cuda.matmul.allow_tf32 = True
            torch.backends.cudnn.allow_tf32 = True
        else:
            # Revert to default
            torch.backends.cuda.matmul.allow_tf32 = False
            torch.backends.cudnn.allow_tf32 = False
else:
    device = torch.device("cpu")
    print(device)

cuda devices:  1
0 , name: NVIDIA GeForce RTX 3070
Enabling TF32 matmul


In [38]:
class CNN1(nn.Module):
    def __init__(self, act_func=nn.LeakyReLU):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels=NUM_CHANNELS, out_channels=10, kernel_size=2, device=device)
        self.af1 = act_func()

        self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=2, device=device)
        self.af2 = act_func()

        self.pool1 = nn.MaxPool2d(kernel_size=2)

        # In-features is now hardcoded
        # Figure out how to calculate dynamically
        self.fc1 = nn.Linear(in_features=4500, out_features=NUM_CLASSES_CIFAR10, device=device)
        self.lsfm1 = nn.LogSoftmax(dim=0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.af1(x)
        x = self.conv2(x)
        x = self.af2(x)
        x = self.pool1(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc1(x)
        x = self.lsfm1(x)
        return x

    def train_model(self, criterion, optimizer, train_loader):
        for epoch in range(EPOCHS):
            self.train() # training mode       
            for batch_i, (images, labels) in enumerate(train_loader):
                images, labels = images.float().to(device), labels.to(device) 
                loss = criterion(self(images), labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            if epoch % 1 == 0:
                print(epoch, loss.item(), batch_i+1)


    def test_model(self, test_loader):
        with torch.no_grad():
            self.eval() # evaluation mode
            correct, total = 0,0 
            for batch_i, (images, labels) in enumerate(test_loader):
                images, labels = images.float().to(device), labels.to(device)
                pred = torch.argmax(self(images), dim=1)
                correct += (pred == labels).sum()
                total += len(pred)

            print(f"Correct: {correct}, Total: {total}, Acc: {correct/total*100}%")

In [39]:
train_loader, val_loader, test_loader = loadData()



Files already downloaded and verified
Files already downloaded and verified
Len test: 10000, Len train: 50000


In [41]:
# Default: LeakyReLU, SGD
model = CNN1()
if torch.cuda.is_available() and ALLOW_CUDA: 
    print("Using cuda")
    model.cuda()

optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
model.train_model(criterion, optimizer, train_loader)
model.test_model(test_loader)

Using cuda
0 2.300020456314087 800
1 2.3024566173553467 800
2 2.298409938812256 800
3 2.2984113693237305 800
4 2.2978098392486572 800
Correct: 1700, Total: 10000, Acc: 17.0%


In [None]:
# LeakyReLU, Adam
model2 = CNN1()
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.0001)
criterion2 = nn.CrossEntropyLoss()
model2.train_model(criterion2, optimizer2)
model2.test_model()

In [None]:
# Tanh, SGD
model3 = CNN1(act_func=nn.Tanh)
optimizer3 = torch.optim.SGD(model3.parameters(), lr=0.0001)
criterion3 = nn.CrossEntropyLoss()
model3.train_model(criterion3, optimizer3)
model3.test_model()