In [1]:
import requests, re, time
import torch, torchvision
from torch import nn, optim
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

In [2]:
#load dataset into directory from drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
xform = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor()])
dataset_full = datasets.ImageFolder('/content/drive/MyDrive/dataset', transform=xform)

In [4]:
dataset_full

Dataset ImageFolder
    Number of datapoints: 2259
    Root location: /content/drive/MyDrive/dataset
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear)
               ToTensor()
           )

In [5]:
dataset_full.class_to_idx

{'angry': 0, 'happy': 1, 'neutral': 2, 'sad': 3}

In [6]:
dataset_full.classes

['angry', 'happy', 'neutral', 'sad']

According to the second link cited in the proposal, a grayscale transofrm on top of resizing has given the best results when comparing sentiments on faces


In [None]:
#any possible data augmentation here:






In [7]:
n_all = len(dataset_full)
n_train = int(0.8 * n_all)
n_test = n_all - n_train
rng = torch.Generator().manual_seed(1549)
dataset_train, dataset_test = torch.utils.data.random_split(dataset_full, [n_train, n_test], rng)
loader_train = torch.utils.data.DataLoader(dataset_train, batch_size = 4, shuffle=True)
loader_test = torch.utils.data.DataLoader(dataset_test, batch_size = 4, shuffle=True) #prepare dataset by splitting, same as usual

In [8]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 4)
torch.nn.init.xavier_uniform_(model.fc.weight)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




Parameter containing:
tensor([[-0.0402,  0.0154,  0.0253,  ...,  0.0059,  0.0340, -0.0994],
        [ 0.0799,  0.0956,  0.1006,  ...,  0.0703,  0.0936, -0.0378],
        [-0.0796,  0.0365, -0.0266,  ...,  0.0475, -0.0928,  0.0309],
        [ 0.0837,  0.0777, -0.0160,  ..., -0.0751,  0.1006, -0.0474]],
       requires_grad=True)

In [9]:
torch.cuda.device_count()

1

In [10]:
device = torch.device('cuda:0')
model = model.to(device)

as done in A2, need to modify layers to attain better accuracies later on. could experiment on raw resnet first and modify fc layers later on.

In [11]:
criterion = nn.CrossEntropyLoss()

def run_test(model):
    nsamples_test = len(dataset_test)
    loss, correct = 0, 0
    model.eval()
    with torch.no_grad():
        for samples, labels in loader_test:
            samples = samples.to(device)
            labels = labels.to(device)
            outs = model(samples)
            loss += criterion(outs, labels)
            _, preds = torch.max(outs.detach(), 1)
            correct_mask = preds == labels
            correct += correct_mask.sum(0).item()
    return loss / nsamples_test, correct / nsamples_test

In [12]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [13]:
def run_train(model, opt, sched):
    nsamples_train = len(dataset_train)
    loss_sofar, correct_sofar = 0, 0
    model.train()
    with torch.enable_grad():
        for samples, labels in loader_train:
            samples = samples.to(device)
            labels = labels.to(device)
            opt.zero_grad()
            outs = model(samples)
            _, preds = torch.max(outs.detach(), 1)
            loss = criterion(outs, labels)
            loss.backward()
            opt.step()
            loss_sofar += loss.item() * samples.size(0)
            correct_sofar += torch.sum(preds == labels.detach())
    sched.step()
    return loss_sofar / nsamples_train, correct_sofar / nsamples_train

In [14]:
def run_all(model, optimizer, scheduler, n_epochs):
    for epoch in range(n_epochs):
        loss_train, acc_train = run_train(model, optimizer, scheduler)
        loss_test, acc_test = run_test(model)
        print(f"epoch {epoch}: train loss {loss_train:.4f} acc {acc_train:.4f}, test loss {loss_test:.4f} acc {acc_test:.4f}")

In [15]:
run_test(model)

(tensor(0.4595, device='cuda:0'), 0.2610619469026549)

In [16]:
run_all(model, optimizer, scheduler, 10)

  "Palette images with Transparency expressed in bytes should be "


epoch 0: train loss 1.3539 acc 0.5163, test loss 0.1912 acc 0.7124
epoch 1: train loss 0.8266 acc 0.7023, test loss 0.1674 acc 0.7788
epoch 2: train loss 0.5861 acc 0.7925, test loss 0.1967 acc 0.7522
epoch 3: train loss 0.4622 acc 0.8428, test loss 0.1465 acc 0.8230
epoch 4: train loss 0.3595 acc 0.8827, test loss 0.1292 acc 0.8319
epoch 5: train loss 0.1758 acc 0.9530, test loss 0.1217 acc 0.8562
epoch 6: train loss 0.1289 acc 0.9646, test loss 0.1197 acc 0.8606
epoch 7: train loss 0.1288 acc 0.9662, test loss 0.1135 acc 0.8606
epoch 8: train loss 0.1001 acc 0.9745, test loss 0.1128 acc 0.8695
epoch 9: train loss 0.1014 acc 0.9712, test loss 0.1146 acc 0.8650


In [None]:
# https://ieeexplore.ieee.org/document/8990979, maximum expected acc is 70% using resnet18 and kaggle dataset