In [None]:
from os import listdir
import cv2
from typing import List
import random

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

### CONFIG

In [None]:
n_images = None # out of 21083
data_dir = "../../data/WLASL/frames"
img_size = 224
batch_size= 16

### Input labels with images

In [None]:
input_labels = listdir(data_dir)

# Cut off the first n_labels labels.
input_labels = input_labels[:n_images]

input_labels[:5]

In [None]:
label = input_labels[random.randint(0, len(input_labels) - 1)]
label_images = listdir(data_dir + "/" + label)
label_images

print(label)

image = cv2.imread(data_dir + "/" + label + "/" + label_images[0])
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.show()


### Create dataloader for pytorch

In [None]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models, utils

from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader


In [None]:
def load_split_dataset(data_dir, test_size=0.2, batch_size=32, n_images=None):
    transform = transforms.Compose(
        [
            transforms.Resize(img_size),
            transforms.ToTensor(),
        ]
    )

    dataset = datasets.ImageFolder(data_dir, transform=transform)
    dataset_len = len(dataset)

    if n_images is None:
        n_images = dataset_len

    indices = list(range(dataset_len))
    np.random.shuffle(indices)
    indices = indices[:n_images]

    split = int(np.floor(test_size * n_images))
    train_idx, test_idx = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    train_loader = DataLoader(dataset, sampler=train_sampler, batch_size=batch_size)
    test_loader = DataLoader(dataset, sampler=test_sampler, batch_size=batch_size)

    return train_loader, test_loader


In [None]:
train_loader, test_loader = load_split_dataset(
    data_dir, batch_size=batch_size, n_images=n_images
)
print(len(train_loader.dataset.classes))
print(len(train_loader))
print(len(test_loader))
print(train_loader.batch_size)


In [None]:
classes = train_loader.dataset.classes

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

# show images
imshow(utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

### Use GPU and load pretrained resnet model

> Check also if GPU can store model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=True)
print(model)


### Freeze model and train only last layer

In [None]:
# import os

# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [None]:
lr = 0.003

In [None]:
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Sequential(
    nn.Linear(512, 512),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(512, len(train_loader.dataset.classes)),
    nn.LogSoftmax(dim=1),
)

criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=lr)
model.to(device)


### Train Model

In [None]:
epochs = 1
steps = 0
running_loss = 0
print_every = 10
train_losses, test_losses = [], []

for epoch in range(epochs):
    for inputs, labels in train_loader:
        steps += 1
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()

        if steps % print_every == 0:

            test_loss = 0
            accuracy = 0
            model.eval()

            with torch.no_grad():
                for inputs, labels in test_loader:
                    inputs, labels = inputs.to(device), labels.to(device)

                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    test_loss += batch_loss.item()

                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

            train_losses.append(running_loss / len(train_loader))
            test_losses.append(test_loss / len(test_loader))

            print(
                f"Epoch {epoch+1}/{epochs}.. "
                f"Train loss: {running_loss/print_every:.3f}.. "
                f"Test loss: {test_loss/len(test_loader):.3f}.. "
                f"Test accuracy: {accuracy/len(test_loader):.3f}"
            )

            running_loss = 0
            model.train()


### Save model

In [None]:
model_path = "../../models/resnet50_WLASL.pth"
torch.save(model, model_path)


### Plot losses

In [None]:
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)
plt.show()

### Test net

In [None]:
test_transforms = transforms.Compose(
    [
        transforms.Resize(img_size),
        transforms.ToTensor(),
    ]
)


### Put in evaluation mode

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load(model_path)
model.eval()


### Predict image

In [None]:
correct = 0
total = 0

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)

        # calculate outputs by running images through the network
        outputs = model(images)

        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the test images: {100 * correct // total} %")


In [None]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        
        _, predictions = torch.max(outputs, 1)
        
        # collect the correct predictions for each class
        
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
                
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    if correct_count > 0:
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print(f"Accuracy for class: '{classname:5s}' is {accuracy:.1f} %")

### Images

In [None]:
dataiter = iter(test_loader)
images, labels = dataiter.next()

# print images
imshow(utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))

### What the net thinks

In [None]:
outputs = model(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}'
                              for j in range(4)))