In [4]:
import torch
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch import nn
from torch.utils.data.sampler import SubsetRandomSampler

In [5]:
model = models.alexnet(pretrained=True)
for param in model.parameters():
    param.require_grad = False
model.classifier[6] = nn.Linear(4096, 10)


In [6]:
num_of_classes = 10
lr = 0.05
batch_size = 32
num_epochs = 10
criterion = nn.CrossEntropyLoss()

In [7]:
transform_train = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

transform_val = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
CIFAR10_train = torchvision.datasets.CIFAR10(".", train = True, transform = transform_train)
CIFAR10_val = torchvision.datasets.CIFAR10(".", train = True, transform = transform_train)
CIFAR10_test = torchvision.datasets.CIFAR10(".", train = False, transform = transform_val)

In [8]:
CIFAR10_train.data[0]

array([[[ 59,  62,  63],
        [ 43,  46,  45],
        [ 50,  48,  43],
        ...,
        [158, 132, 108],
        [152, 125, 102],
        [148, 124, 103]],

       [[ 16,  20,  20],
        [  0,   0,   0],
        [ 18,   8,   0],
        ...,
        [123,  88,  55],
        [119,  83,  50],
        [122,  87,  57]],

       [[ 25,  24,  21],
        [ 16,   7,   0],
        [ 49,  27,   8],
        ...,
        [118,  84,  50],
        [120,  84,  50],
        [109,  73,  42]],

       ...,

       [[208, 170,  96],
        [201, 153,  34],
        [198, 161,  26],
        ...,
        [160, 133,  70],
        [ 56,  31,   7],
        [ 53,  34,  20]],

       [[180, 139,  96],
        [173, 123,  42],
        [186, 144,  30],
        ...,
        [184, 148,  94],
        [ 97,  62,  34],
        [ 83,  53,  34]],

       [[177, 144, 116],
        [168, 129,  94],
        [179, 142,  87],
        ...,
        [216, 184, 140],
        [151, 118,  84],
        [123,  92,  72]]

In [9]:
import numpy as np
val_ratio = 0.1
N = len(CIFAR10_train)
np.random.seed(10)
idx = np.random.randint(0, N, size = N)
split = int(N * val_ratio)
train_idx, val_idx = idx[split:], idx[:split]
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)


In [10]:
val_ratio = 0.1 #
train_loader = torch.utils.data.DataLoader(CIFAR10_train, batch_size = batch_size, sampler = train_sampler)
val_loader = torch.utils.data.DataLoader(CIFAR10_train, batch_size = batch_size, sampler = val_sampler)
test_loader = torch.utils.data.DataLoader(CIFAR10_test, batch_size = 100)

In [11]:
print(model)



AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [12]:
from tqdm import tqdm
torch.cuda.set_device(0)
model = model.cuda(0)
# model = torch.nn.DataParallel(model).cuda(0)
optimizer = torch.optim.SGD(model.parameters(), lr)
log_train_loss = []
log_val_loss = []
log_val_acc = []
for epoch in tqdm(range(num_epochs)):
	# training
    model.train()  # if using batchnorm or dropout, use train mode setting! don't want to adjust normalization on non-train data
    epoch_train_loss = 0
    for batch_idx, (input, target) in enumerate(train_loader):
        input = input.cuda(0, non_blocking=True) # move to device and zero optimizer
        target = target.cuda(0, non_blocking=True)
        optimizer.zero_grad()
        ### train step ###
        output = model(input)   # forward
        loss = criterion(output, target)
        print(loss)
        ### end train step ###
        ### backward pass and optim step ###
        loss.backward()
        optimizer.step()
        ### logging
        epoch_train_loss += loss
    log_train_loss.append(epoch_train_loss/ (batch_idx + 1))


    # evaluation
    model.eval() # set batchnorm + dropout in eval so it doesn't adjust on validation data
    with torch.no_grad(): # turn off gradients
        epoch_val_loss = 0
        num_correct = 0
        highest_val_acc = 0
        for batch_idx, (input, target) in enumerate(val_loader):
            # do the same steps for train step as for val step but skip updates and backward pass (no gradients)
            input = input.cuda(0, non_blocking= True)
            target = target.cuda(0, non_blocking= True)
            # log val loss every val step
            output = model(input)
            loss = criterion(output, target)
            epoch_val_loss += loss
            # validation accuracy
            num_correct_per_batch = torch.sum(target == torch.argmax(output, axis=1))
            num_correct += num_correct_per_batch
        val_accuracy = num_correct.item() / split
        log_val_acc.append(val_accuracy)
        print("validation accuracy: ", val_accuracy)
        if val_accuracy > highest_val_acc:
            highest_val_acc = val_accuracy
        log_val_loss.append(epoch_val_loss/ (batch_idx + 1)) # average the loss


  0%|          | 0/10 [00:00<?, ?it/s]

tensor(2.4567, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(3.6966, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(5.8216, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(4.6389, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.8245, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3420, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.2586, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.4460, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3152, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3163, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3233, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.2504, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3575, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.2773, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3025, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3086, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(2.3308, device='cuda:0', grad_fn=<NllLossBackward

KeyboardInterrupt: 

In [None]:
M = len(CIFAR10_test)
def eval_test_acc(M, test_loader):
    num_correct = 0
    with torch.no_grad():
        for batch_idx, (input, target) in enumerate(test_loader):
            input = input.cuda(0, non_blocking= True)
            target = target.cuda(0, non_blocking= True)
            output = model(input)
            num_correct_per_batch = torch.sum(target == torch.argmax(output, axis=1))
            num_correct += num_correct_per_batch
    print("accuracy: ", num_correct.item()/M)

eval_test_acc(M, test_loader)

In [None]:
model2 = models.alexnet(pretrained=True)
model2 = model2.cuda(0)
for epoch in range(num_epochs):
    model2.train()
    epoch_train_loss
    for i, (input, target) in enumerate(train_loader):
        input = input.cuda(0, non_blocking=True)
        target = target.cuda(0, non_blocking= True)
        output = model(input)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_train_loss += loss
    log_epoch_train






In [None]:
import matplotlib.pyplot as plt
def report(name, loss1, loss2, val_acc_l, test_acc):
    plt.plot(loss1, label = "Train Loss")
    plt.plot(loss2, label = "Validation loss")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.legend()
    plt.savefig(name)
    plt.show()
    print("Highest accuracy: ", max(val_acc_l))
    print("Final test accuracy: ", test_acc)
    plt.savefig(name)


report("A4a Transfer Learning", log_train_loss, log_val_loss, log_val_acc, 1)

In [None]:
print(torch.version)
