In [None]:
import os
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torchvision.transforms as tt
import torchvision.models as models
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torch.utils.data import random_split
from torchvision.utils import make_grid
from copy import copy
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!unzip gdrive/MyDrive/data/archive.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing19.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing190.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1900.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1901.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1902.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1903.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1904.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1905.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1906.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1907.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1908.jpg  
  inflating: asl_alphabet_train/asl_alphabet_train/nothing/nothing1909.jpg  
  inflating: a

In [None]:
classes = os.listdir( "./asl_alphabet_train/asl_alphabet_train")
print(classes)

x = 0
for letter in classes:
    x = x + 1

print(str(x) + " classes")

['D', 'O', 'G', 'R', 'space', 'H', 'M', 'Z', 'V', 'L', 'U', 'I', 'C', 'E', 'del', 'F', 'X', 'S', 'Y', 'W', 'J', 'Q', 'nothing', 'K', 'T', 'B', 'P', 'N', 'A']
29 classes


In [None]:
dataset = ImageFolder('./asl_alphabet_train/asl_alphabet_train')


In [None]:
# Data transforms (normalization and data augmentation)
#stats = ((0.5190, 0.4992, 0.5140),(0.2038, 0.2283, 0.2356))
train_tfms = tt.Compose([tt.RandomCrop(200, padding=25, padding_mode='reflect'),
                        tt.RandomHorizontalFlip(), 
                        tt.RandomRotation(10),
                        tt.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                        tt.RandomPerspective(distortion_scale=0.2),
                        tt.ToTensor(),
                        tt.Normalize([0.485, 0.456, 0.406],
                        [0.229, 0.224, 0.225])])

valid_tfms = tt.Compose([tt.Resize((225, 225)),
                         tt.ToTensor(),
                         tt.Normalize([0.485, 0.456, 0.406],
                        [0.229, 0.224, 0.225])])

In [None]:
val_size = int(0.15 * len(dataset))
train_size = len(dataset) - val_size

train_ds, valid_ds = random_split(dataset, [train_size, val_size])
len(train_ds), len(valid_ds)

(73950, 13050)

In [None]:
train_ds.dataset = copy(dataset)
train_ds.dataset.transform = train_tfms
valid_ds.dataset.transform = valid_tfms

In [None]:
# Pytorch Datasets
# train_ds = ImageFolder("./asl_alphabet_train/asl_alphabet_train", train_tfms)
# test_ds = ImageFolder("./asl_alphabet_test", valid_tfms)

In [None]:
# HyperParameters
batch_size = 50


In [None]:
random_seed = 23
torch.manual_seed(random_seed);

In [None]:
# Pytorch data loaders
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
valid_dl = DataLoader(valid_ds, batch_size*2, num_workers=4, pin_memory=True)

In [None]:
def to_device(data, device):
    # Move Tensors to a chosen device
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    # Move Data to the device
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    
    def __iter__(self):
        for batch in self.dl:
            yield to_device(batch, self.device)
            
    def __len__(self):
        # Number of batches
        return len(self.dl)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

train_dl = DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)

print(train_dl.device)
print(valid_dl.device)

cuda
cuda
cuda


In [None]:
# Create Network class and make helper methods for training and validation
class Network(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_acc': acc, 'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_acc': epoch_acc.item(), 'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_acc: {:.4f}, val_loss: {:.4f}".format(epoch, result['val_acc'], result['val_loss']))

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [None]:
# def conv_block(in_channels, out_channels, pool=False):
#     layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
#              nn.BatchNorm2d(out_channels),
#              nn.ReLU(inplace=True)]
#     if pool == True: 
#         layers.append(nn.MaxPool2d(2))
    
#     return nn.Sequential(*layers)
    
    # Create Residual Network with Resnet50 architecture
class ResNet152(Network):
    def __init__(self):
        super().__init__()
        # Use a pretrained model
        self.network = models.resnet152(pretrained=True)
        # Replace last layer
        num_ftrs = self.network.fc.in_features
        self.network.fc = nn.Linear(num_ftrs, 29)
    
    def forward(self, xb):
        return self.network(xb)

In [None]:
model = to_device(ResNet152(), device)
model

ResNet152(
  (network): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
        

In [None]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, 
                                                steps_per_epoch=len(train_loader))
    
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            
            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            
            optimizer.step()
            optimizer.zero_grad()
            
            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()
        
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
history = [evaluate(model, valid_dl)]
history

[{'val_acc': 0.04519083723425865, 'val_loss': 3.4266059398651123}]

In [None]:
#model.freeze()

In [None]:
epochs = 2
max_lr = 1e-5
grad_clip = 0.001
weight_decay = 1e-5
opt_func = torch.optim.SGD

In [None]:
%%time
history += fit_one_cycle(2, max_lr, model, train_dl, valid_dl, 
                             grad_clip=grad_clip, 
                             weight_decay=weight_decay, 
                             opt_func=torch.optim.AdamW)

Epoch [0], val_acc: 0.9973, val_loss: 0.1350
Epoch [1], val_acc: 1.0000, val_loss: 0.0105


KeyboardInterrupt: ignored

In [None]:
history = [evaluate(model, valid_dl)]
history

[{'val_acc': 0.999770998954773, 'val_loss': 0.00995089765638113}]

In [None]:
# %%time
# history += fit_one_cycle(5, 1e-5, model, train_dl, valid_dl, 
#                              grad_clip=grad_clip, 
#                              weight_decay=weight_decay, 
#                              opt_func=torch.optim.Adam)

In [None]:
# %%time
# history += fit_one_cycle(3, 1e-5, model, train_dl, valid_dl, 
#                              grad_clip=grad_clip, 
#                              weight_decay=weight_decay, 
#                              opt_func=torch.optim.SGD)

In [None]:
model.eval()

ResNet152(
  (network): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
        

In [None]:
# Save Pytorch Model
FILE = "gdrive/MyDrive/data/modelResNet9.pth"
torch.save(model.state_dict(), FILE)

In [None]:
onnx_model_path = "gdrive/MyDrive/data/modelResNet9.onnx"
x = torch.randn(1, 3, 225, 225, device=device) # Sample input in the shape that the model expects
torch.onnx.export(model, x, onnx_model_path, export_params=True, verbose=True,)

graph(%input.1 : Float(1, 3, 225, 225, strides=[151875, 50625, 225, 1], requires_grad=0, device=cuda:0),
      %network.fc.weight : Float(29, 2048, strides=[2048, 1], requires_grad=1, device=cuda:0),
      %network.fc.bias : Float(29, strides=[1], requires_grad=1, device=cuda:0),
      %1449 : Float(64, 3, 7, 7, strides=[147, 49, 7, 1], requires_grad=0, device=cuda:0),
      %1450 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
      %1452 : Float(64, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=0, device=cuda:0),
      %1453 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
      %1455 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cuda:0),
      %1456 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
      %1458 : Float(256, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=0, device=cuda:0),
      %1459 : Float(256, strides=[1], requires_grad=0, device=cuda:0),
      %1461 : Float(256, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=0, 

In [None]:
!unzip gdrive/MyDrive/data/archiveTest.zip

In [None]:
test_dataset = ImageFolder('./asl-alphabet-testR')
test_ds, _ = random_split(test_dataset, [len(test_dataset), 0])
test_ds.dataset.transform = tt.Compose([tt.Resize((225, 225)), tt.ToTensor()])
test_dl = DataLoader(test_ds, batch_size*2, num_workers=4, pin_memory=True)

In [None]:
test_dl = DeviceDataLoader(test_dl, device)


In [None]:
evaluate(model, test_dl)


{'val_acc': 0.8347618579864502, 'val_loss': 0.6744450926780701}

In [None]:
def predict_image(img, model):
    # Convert to a batch of 1
    xb = to_device(img.unsqueeze(0), device)
    # Get predictions from model
    yb = model(xb)
    # Pick index with highest probability
    _, preds  = torch.max(yb, dim=1)
    # Retrieve the class label
    return dataset.classes[preds[0].item()]

In [None]:
test_dataset = ImageFolder('./asl-alphabet-testR')
test_ds, _ = random_split(test_dataset, [len(test_dataset), 0])
test_ds.dataset.transform = tt.Compose([tt.ToTensor()])

In [None]:
img, label = test_ds[16]
print('Label:', test_dataset.classes[label])
print('Predicted:', predict_image(img, model))

Label: H
Predicted: H


In [None]:
img, label = test_ds[1]
print('Label:', test_dataset.classes[label])
print('Predicted:', predict_image(img, model))

Label: U
Predicted: U


In [None]:
img, label = test_ds[0]
print('Label:', test_dataset.classes[label])
print('Predicted:', predict_image(img, model))

Label: U
Predicted: U


In [None]:
x = 0
while x <= 20:
  img, label = test_ds[x]
  print('Label:', test_dataset.classes[label])
  print('Predicted:', predict_image(img, model))
  print()
  x = x + 1

Label: U
Predicted: U

Label: U
Predicted: U

Label: W
Predicted: W

Label: F
Predicted: F

Label: nothing
Predicted: nothing

Label: F
Predicted: F

Label: Q
Predicted: Q

Label: R
Predicted: R

Label: F
Predicted: F

Label: space
Predicted: space

Label: W
Predicted: W

Label: N
Predicted: N

Label: A
Predicted: W

Label: P
Predicted: P

Label: V
Predicted: V

Label: N
Predicted: M

Label: H
Predicted: H

Label: U
Predicted: U

Label: N
Predicted: M

Label: W
Predicted: W

Label: V
Predicted: V

