In [1]:
import torchvision.datasets as datasets
import torchvision.models as models
import torch
from torchvision import transforms
from keras.utils import to_categorical
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
import numpy as np
from PIL import Image
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names

2023-05-09 20:42:29.322883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-09 20:42:29.522750: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-05-09 20:42:30.197339: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: usr/local/cuda-11.8/lib64
2023-05-09 20:42:30.197531: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cann

Config

In [2]:
config = {
    'batch_size':64,
    'learning_rate':0.1,
    'num_epochs':200
}

Load Datasets

In [3]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [4]:
cifar10_trainset = datasets.CIFAR10(root='/home/user01/cifar10/train/', train=True,
                                    download=False, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    cifar10_trainset, batch_size=config['batch_size'], shuffle=True, num_workers=7)

In [5]:
cifar10_testset = datasets.CIFAR10(root='/home/user01/cifar10/test/', train=False,
                                   download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    cifar10_testset, batch_size=config['batch_size'], shuffle=False, num_workers=7)

Model Definition

In [6]:
# get_graph_node_names(model)

In [7]:
class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(3, 6, 5)
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(6, 16, 5)
        self.fc1 = torch.nn.Linear(16 * 5 * 5, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [8]:
new_model = Model()

In [9]:
for param in new_model.parameters():
    param.requires_grad = True

In [10]:
inp = np.random.rand(2,3,32,32)
new_model(torch.Tensor(inp[:2]))

tensor([[ 0.0960, -0.0022,  0.0923, -0.0065, -0.1348, -0.0568,  0.0930, -0.0711,
          0.0306,  0.0357],
        [ 0.0983, -0.0015,  0.0923, -0.0067, -0.1357, -0.0553,  0.0916, -0.0733,
          0.0281,  0.0430]], grad_fn=<AddmmBackward0>)

# Finetuning with CIFAR10

In [11]:
def label_to_out(label):
    out = np.zeros(10)
    out[label] = 1
    return out

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [13]:
new_model = new_model.to(device)

In [14]:
loss_fn = torch.nn.CrossEntropyLoss()

In [15]:
# optimizer = torch.optim.SGD(new_model.parameters(), lr=config['learning_rate'],
#                       momentum=0.9, weight_decay=5e-4)
optimizer = torch.optim.SGD(new_model.parameters(), lr=0.001, momentum=0.9)
# optimizer = torch.optim.AdamW(new_model.parameters())
#optimizer = torch.optim.Adam(new_model.parameters(), lr=config['learning_rate'])
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [16]:
def output_to_label(out):
    return out.argmax()

In [17]:
new_model(torch.Tensor(inp[:2]).cuda()).argmax(axis=1).cpu().numpy()

array([0, 0])

In [18]:
for batch, (features, labels) in enumerate(trainloader):
    if batch == 0:
        out = new_model(features.to(device))
        print(out.shape)
        print(features.shape)
        print(labels.shape)
        print(loss_fn(out, labels.to(device)))

torch.Size([64, 10])
torch.Size([64, 3, 32, 32])
torch.Size([64])
tensor(2.2879, device='cuda:0', grad_fn=<NllLossBackward0>)


In [27]:
def train_loop(dataloader, model, loss_fn, optimizer, epoch_num):
    num_points = len(dataloader.dataset)
    for batch, (features, labels) in enumerate(dataloader):        
        # Compute prediction and loss
        features, labels = features.to(device), labels.to(device)
        pred = model(features)
        loss = loss_fn(pred, labels)
        
        # Backpropagation
        optimizer.zero_grad() # sets gradients of all model parameters to zero
        loss.backward() # calculate the gradients again
        optimizer.step() # w = w - learning_rate * grad(loss)_with_respect_to_w

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(features)
            print(f"Epoch {epoch_num} - loss: {loss:>7f}  [{current:>5d}/{num_points:>5d}]\n")


def test_loop(dataloader, model, loss_fn, epoch_num, name):
    num_points = len(dataloader.dataset)
    sum_test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (features, labels) in enumerate(dataloader):
            features, labels = features.to(device), labels.to(device)
            pred = model(features)
            if batch == 0:
                global tmp_features
                global tmp_labels
                global tmp_pred
                tmp_features = features
                tmp_labels = labels
                tmp_pred = pred
            sum_test_loss += loss_fn(pred, labels).item() # add the current loss to the sum of the losses
            # convert the outputs of the model on the current batch to a numpy array
            pred_lst = list(pred.argmax(axis=1).cpu().numpy())
            # convert the original labels corresponding to the current batch to a numpy array
            true_lst = labels
            # determine the points for which the model is correctly predicting the label (add a 1 for each)
            match_lst = [1 if p==t else 0 for (p, t) in zip(pred_lst, true_lst)] 
            # count how many points are labeled correctly in this batch and add the number to the overall count of the correct labeled points
            correct += sum(match_lst) 
            
    sum_test_loss /= num_points
    correct /= num_points
    print(f"Epoch {epoch_num} - {name} Accuracy: {correct*100}%, Avg loss: {sum_test_loss}\n")

In [None]:
for epoch_num in range(1, config['num_epochs']+1):
    train_loop(trainloader, new_model, loss_fn, optimizer, epoch_num)
    test_loop(testloader, new_model, loss_fn, epoch_num, 'Test')

In [29]:
test_loop(testloader, new_model, loss_fn, epoch_num, 'Test')

Epoch 200 - Test Accuracy: 73.07000000000001%, Avg loss: 0.012353103795647621

