<a href="https://colab.research.google.com/github/anirbansaha96/AI-ML-Playground/blob/master/vgg16_asl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import torch

import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt


train_on_gpu = torch.cuda.is_available()

%matplotlib inline

In [None]:
data_dir = '../input/asl-alphabet/'
train_dir = os.path.join(data_dir, 'asl_alphabet_train/asl_alphabet_train/')

In [None]:
# VGG-16 Takes 224x224 images as input, so we resize all of them
data_transform = transforms.Compose([transforms.RandomResizedCrop(224), 
                                      transforms.ToTensor()])

train_data = datasets.ImageFolder(train_dir, transform=data_transform)

print('Num training images: ', len(train_data))

Num training images:  87000


In [None]:
classes = train_data.class_to_idx

In [None]:
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

In [None]:
vgg16 = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [None]:
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
    param.requires_grad = False  

In [None]:
import torch.nn as nn

vgg16.classifier[6] = nn.Linear(in_features=vgg16.classifier[6].in_features, out_features=29, bias=True)


# after completing your model, if GPU is available, move the model to GPU
if train_on_gpu:
    vgg16.cuda()

In [None]:
import torch.optim as optim

# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()

# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001)


In [None]:
n_epochs = 20

for epoch in range(1, n_epochs+1):
    train_loss = 0.0
    for batch_i, (data, target) in enumerate(train_loader):
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = vgg16(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
        if batch_i % 200 == 199:    # print training loss 
            print('Epoch %d, Batch %d loss: %.16f' %
                  (epoch, batch_i + 1, train_loss / 200))
            train_loss = 0.0

Epoch 1, Batch 200 loss: 3.3035264575481413
Epoch 1, Batch 400 loss: 3.1065577018260955
Epoch 1, Batch 600 loss: 2.9099041104316710
Epoch 1, Batch 800 loss: 2.7124437594413759
Epoch 1, Batch 1000 loss: 2.5316750633716585
Epoch 1, Batch 1200 loss: 2.3602901434898378
Epoch 2, Batch 200 loss: 2.1340176939964293
Epoch 2, Batch 400 loss: 2.0570934367179872
Epoch 2, Batch 600 loss: 1.9674622452259063
Epoch 2, Batch 800 loss: 1.8895116460323333
Epoch 2, Batch 1000 loss: 1.8530072426795960
Epoch 2, Batch 1200 loss: 1.7706041193008424
Epoch 3, Batch 200 loss: 1.6836629289388656
Epoch 3, Batch 400 loss: 1.6340192282199859
Epoch 3, Batch 600 loss: 1.6219015550613403
Epoch 3, Batch 800 loss: 1.5662782645225526
Epoch 3, Batch 1000 loss: 1.5261663013696671
Epoch 3, Batch 1200 loss: 1.5304266858100890
Epoch 4, Batch 200 loss: 1.4760438984632491
Epoch 4, Batch 400 loss: 1.4775691062211991
Epoch 4, Batch 600 loss: 1.4306568965315818
Epoch 4, Batch 800 loss: 1.4166228652000428
Epoch 4, Batch 1000 loss: 

In [None]:
torch.save(vgg16.state_dict(), './ASL20')

In [None]:
import json
with open('./classes.json', 'w') as fp:
    json.dump(classes, fp)

In [None]:
from collections import OrderedDict
idx_to_class = {value:key for key, value in classes.items()} 

In [None]:
import os
from PIL import Image
correct = 0
test_files = os.listdir('../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/')
vgg16.eval()
for file in test_files:
    label = file.strip('_')[0]
    path = os.path.join('../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/',file)
    img = Image.open(path)
    img_tensor = data_transform(img).to('cuda').unsqueeze(0)
    output = vgg16(img_tensor)
    _, index = torch.max(output, 1)
    index = index.cpu().tolist()
    if label == idx_to_class[index[0]]:
        correct += 1
print('Test Accuracy : ', round(correct/len(test_files)*100, 2), '%')

Test Accuracy :  82.14 %


# Predictions

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from torchvision import models, transforms
import torch
import torch.nn as nn

In [3]:
train_on_gpu = torch.cuda.is_available()

In [4]:
data_transform = transforms.Compose([transforms.RandomResizedCrop(224), 
                                      transforms.ToTensor()])

In [5]:
vgg16 = models.vgg16(pretrained=False)
vgg16.classifier[6] = nn.Linear(in_features=4096, out_features=29, bias=True)

In [6]:
if train_on_gpu:
    vgg16.cuda()

In [7]:
vgg16.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/ASL20'))

<All keys matched successfully>

In [22]:
import json
f = open('/content/drive/MyDrive/Colab Notebooks/classes.json')

class_to_idx = json.load(f)
idx_to_class = {value : key for key, value in class_to_idx.items()}

In [None]:
vgg16.eval()

In [9]:
from PIL import Image

def predict_image(path):
  img = Image.open(path)
  img_tensor = data_transform(img).to('cuda').unsqueeze(0)
  output = vgg16(img_tensor)
  _, index = torch.max(output, 1)
  index = index.cpu().tolist()
  return idx_to_class[index[0]]

In [24]:
predict_image('/content/A100.jpg')

'A'

In [25]:
predict_image('/content/E_test.jpg')

'E'

In [26]:
predict_image('/content/G10.jpg')

'G'

In [27]:
predict_image('/content/L1000.jpg')

'L'

### Some possible improvements:
1. Improve Training Data by Augmentation Transformations
2. Split Training Dataset into Train and Validation and use early stopping