In [1]:
# Encode output into dummy variables
import torch
from torch import nn
from torchvision.models import resnet50
from torch.utils.data import DataLoader, TensorDataset

# preprocess images via PyTorch docs
from torchvision import transforms
from sklearn.preprocessing import OneHotEncoder

# utils
import os
import numpy as np
from PIL import Image
from json import load

In [2]:
def get_raw_data(ann):
    X_raw = []
    y = []
    for img_data, ann_data in zip(ann['images'], ann['annotations']):
        assert ann_data['id'] == img_data['id']
        y.append(ann_data['category_id'])
        img = Image.open('decathlon-1.0/' + img_data['file_name'])
        X_raw.append(np.array(img))
        img.close()
    return X_raw, y

def preprocess(X_raw, transformer_fn):
    X = []
    for img in X_raw:
        img_pil = Image.fromarray(img)
        proc_img = transformer_fn(img_pil)
        X.append(proc_img)
    return torch.stack(X)

In [3]:
with open('decathlon-1.0/annotations/vgg-flowers_train.json') as f:
    ann_train = load(f)
    
with open('decathlon-1.0/annotations/vgg-flowers_val.json') as f:
    ann_val = load(f)

X_train_raw, y_train = get_raw_data(ann_train)
X_val_raw, y_val = get_raw_data(ann_val)


dummifyer = OneHotEncoder(sparse = False)
dummifyer.fit(np.array(y_train).reshape(-1, 1))

y_train_dummy = torch.from_numpy(dummifyer.transform(np.array(y_train).reshape(-1, 1)))
y_val_dummy = torch.from_numpy(dummifyer.transform(np.array(y_val).reshape(-1, 1)))

In [4]:
# hyperparams

# set the same value of hyperparameters (learning rate=0.001, momentum=0.9) for all the layers
momentum = 0.9
batch_size = 64

loss_fn = torch.nn.CrossEntropyLoss()

In [5]:
# preprocess images
transformer_fn = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

X_train = preprocess(X_train_raw, transformer_fn)
X_val = preprocess(X_val_raw, transformer_fn)

# make data loader
train_loader = DataLoader(TensorDataset(X_train, y_train_dummy), batch_size = batch_size, shuffle = True)
val_loader = DataLoader(TensorDataset(X_val, y_val_dummy), batch_size = batch_size, shuffle = True)

In [6]:
# move data to GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

torch.cuda.empty_cache()

cuda:0


In [7]:
# helper function for generating a fresh ResNet
def get_resnet_feature_extractor(num_classes):
    resnet = resnet50(pretrained = True)
    
    # freeze layers
    for params in resnet.parameters():
        params.requires_grad = False
    
    # change the final fully connected layer output to the number of classes in the target dataset.
    resnet.fc = nn.Linear(2048, num_classes)
    resnet.fc.requires_grad = True # unfreeze top layer
    
    return resnet

In [8]:
lrs = [1, 0.1, 0.01, 0.001]
#lrs = [1]

nets = [get_resnet_feature_extractor(102) for i in range(len(lrs))]

In [9]:
# train loop
def train(model, tloader, vloader, lf, optim, epochs = 10):
    
    train_loss_history = []
    val_loss_history = []
    
    for t in range(epochs):

        epoch_train_loss = 0
        epoch_val_loss = 0
        model.train()
        for i, (X_local, y_local) in enumerate(tloader):
            
            X_local = X_local.to(device)
            y_local = y_local.to(device)

            # Forward pass: compute predicted y by passing x to the model.
            y_pred_local = model(X_local)

            # batch loss
            loss = lf(y_pred_local, y_local)
            epoch_train_loss += loss.item()
            #print("batch: %d/%d\tbatch loss: %.2f" % (i + 1, n_batches, loss.item()))

            # compute gradient
            optim.zero_grad()
            loss.backward()

            # update params
            optim.step()

        # get validation loss
        model.eval()
        for i, (X_local, y_local) in enumerate(vloader):
            
            X_local = X_local.to(device)
            y_local = y_local.to(device)

            # disable autograd
            with torch.no_grad():
                y_pred_local = model(X_local)

                # batch loss
                loss = lf(y_pred_local, y_local)
                epoch_val_loss += loss.item()

        # show epoch validation loss
        train_loss_history.append(epoch_train_loss)
        val_loss_history.append(epoch_val_loss)

        print("\t\t\t\tEpoch: %d/%d\tTrain loss: %.2f\tValid loss: %.2f" 
              % (t + 1, epochs, epoch_train_loss, epoch_val_loss))
    
    return train_loss_history, val_loss_history

In [10]:
def accuracy(model, loader, encoder):

    correct = 0
    n = 0
    
    model.to(device)
    model.eval()
    for X_local, y_local in loader:
        
        X_local = X_local.to(device)
        y_local = y_local.to(device)
        
        with torch.no_grad():
            y_logits = model(X_local)

            y_pred = torch.argmax(y_logits, axis = 1)
            y_true = torch.argmax(y_local, axis = 1)
    
        correct += sum(y_pred == y_true).item()
        n += len(y_true)
    
    model.to('cpu')
    return correct / n

In [None]:
epochs = 200

for resnet, learning_rate in zip(nets, lrs):
    
    print('\n\n' + '=' * 40 + '\nlearning rate %.2e\n' % learning_rate + '=' * 40)
    
    resnet.to(device)
    optimizer = torch.optim.SGD(resnet.parameters(), lr = learning_rate, momentum = momentum)
    
    train_loss, val_loss = train(
        model = resnet, tloader =  train_loader, vloader = val_loader, 
        lf = loss_fn, epochs = epochs, optim = optimizer)
    
    print('final train loss: %.3f\tfinal val loss: %.3f' % (train_loss[-1], val_loss[-1]))
    resnet.to('cpu')



learning rate 1.00e+00


In [None]:
# Get accuracies

for resnet, learning_rate in zip(nets, lrs):
    print('\n\n' + '=' * 40 + '\nlearning rate %.2e\n' % learning_rate + '=' * 40)
    print("Training Accuracy:\t%.3f" % accuracy(resnet, train_loader, dummifyer))
    print("Validation Accuracy:\t%.3f" % accuracy(resnet, val_loader, dummifyer))