# Imports

In [1]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data.sampler import SubsetRandomSampler
import os
from tqdm import tqdm
from time import sleep
import cv2

# DataLoader

In [2]:
data_dir = '../input/asl-alphabet/asl_alphabet_train/asl_alphabet_train'
def load_split_train_test(datadir, batch_size, valid_size = .2):
    train_transforms = transforms.Compose([transforms.Resize((224, 244)), transforms.ToTensor(),])
    test_transforms = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(),])

    train_data = datasets.ImageFolder(datadir, transform=train_transforms)
    test_data = datasets.ImageFolder(datadir, transform=test_transforms)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    np.random.shuffle(indices)

    train_idx, test_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)

    trainloader = torch.utils.data.DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    testloader = torch.utils.data.DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

    return trainloader, testloader

batch_size = 32
trainloader, testloader = load_split_train_test(data_dir, batch_size, .18)
print("Train Size:", len(trainloader) * batch_size, ", No of bacthes:", len(trainloader))
print("Test Size:", len(testloader) * batch_size, ", No of bacthes:", len(testloader))
print("Classes:", trainloader.dataset.classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:",device)

Train Size: 71360 , No of bacthes: 2230
Test Size: 15680 , No of bacthes: 490
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
device: cuda


# Model Building

In [3]:
import torch
class CNNClassifier(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.master = torch.nn.Sequential(torch.nn.Conv2d(3, 16, kernel_size=7, padding=3, stride=2),
                                          torch.nn.ReLU(),
                                          torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        torch.nn.init.xavier_normal_(self.master[0].weight)

        self.block1 = torch.nn.Sequential(torch.nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1),
                                          torch.nn.BatchNorm2d(16),
                                          torch.nn.ReLU(),
                                          torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
                                          torch.nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True),
                                          torch.nn.ReLU())

        self.block2 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True),
            torch.nn.ReLU())

        self.block3 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True),
            torch.nn.ReLU())

        self.relu = torch.nn.ReLU()
        self.maxpool = torch.nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
        self.avgpool = torch.nn.AdaptiveAvgPool2d(output_size=(1, 1))

        self.downsample1 = torch.nn.Sequential(torch.nn.Conv2d(16, 32, kernel_size=1),
                                               torch.nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True),
                                               torch.nn.ReLU())
        self.downsample2 = torch.nn.Sequential(torch.nn.Conv2d(32, 64, kernel_size=1),
                                               torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True),
                                               torch.nn.ReLU())
        self.downsample3 = torch.nn.Sequential(torch.nn.Conv2d(64, 128, kernel_size=1),
                                               torch.nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True),
                                               torch.nn.ReLU())

        self.classifier = torch.nn.Sequential(
            torch.nn.Dropout(p=0.5),
            torch.nn.Linear(128, 256),
            torch.nn.BatchNorm1d(256),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(p=0.5),
            torch.nn.Linear(256, 256),
            torch.nn.BatchNorm1d(256),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(p=0.5),
            torch.nn.Linear(256, len(trainloader.dataset.classes)),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        # print(x.shape)
        # normalize image

        mu = torch.mean(torch.mean(x, dim=2), dim=2).unsqueeze(-1).unsqueeze(-1)
        sigma = torch.sqrt(torch.mean((x - mu) ** 2)) + 1e-8
        x -= mu
        x /= 4 * sigma

        # print("image", identity.shape)
        res1 = self.master(x)

        res2 = self.block1(res1)
        res2 = res2 + self.downsample1(res1)
        res2 = self.maxpool(res2)

        res3 = self.block2(res2)
        res3 = res3 + self.downsample2(res2)
        res3 = self.maxpool(res3)

        res4 = self.block3(res3)
        # print("4 ", res4.shape ,self.downsample3(res3).shape )
        res4 = res4 + self.downsample3(res3)

        res = self.maxpool(res4)
        # print("final shape : ", res.shape)
        res = res.mean(dim=[2, 3])
        res = self.classifier(res)
        return res


In [4]:
backbone = 'mobilenet_v2'
addLayers = False
if backbone == 'resnet50':
    model = models.resnet50(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    if addLayers:
        model.fc = nn.Sequential(nn.Linear(2048, 1024),
          nn.ReLU(),
          nn.Dropout(0.2),
          nn.Linear(1024, len(trainloader.dataset.classes)),
          nn.LogSoftmax(dim=1)
          )
    else:
        model.fc = nn.Linear(2048, len(trainloader.dataset.classes))
elif backbone == 'mobilenet_v2':
    model = models.mobilenet_v2(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    if addLayers:
        model.classifier = nn.Sequential(nn.Linear(1280, 1024),
          nn.ReLU(),
          nn.Dropout(0.2),
          nn.Linear(1024, len(trainloader.dataset.classes)),
          nn.LogSoftmax(dim=1)
          )
    else:
        model.classifier = nn.Linear(1280, len(trainloader.dataset.classes))
# print(model)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

# Loss and Optimizer

In [5]:
#criterion = nn.NLLLoss()
model = CNNClassifier()
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-5)

# optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
model.to(device)

CNNClassifier(
  (master): Sequential(
    (0): Conv2d(3, 16, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (block1): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (block2): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (bloc

# Model Training

In [6]:
epochs = 10
running_loss = 0
train_losses, test_losses = [], []
min_val_loss = None
name = ''

In [7]:
for epoch in range(epochs):
    with tqdm(trainloader, unit="batch") as tepoch:
        accuracy = 0
        for i, data in enumerate(tepoch):
            inputs, labels = data
            tepoch.set_description(f"Training Epoch {epoch + 1}")
            inputs, labels = inputs.to(device), labels.to(device)
            size = labels.shape[0]
            optimizer.zero_grad()
            logps = model.forward(inputs)
            # print(torch.argmax(logps, dim=1).shape, labels.shape)
            loss = criterion(logps, labels)
            train_acc = torch.sum(torch.argmax(logps, dim=1) == labels).item() / size
            accuracy += train_acc
            loss.backward()

            optimizer.step()
            running_loss += loss.item()

            tepoch.set_postfix(loss=loss.item(), accuracy=100. * train_acc)
            sleep(0.005)
            if i == len(trainloader)-1:
                accuracy = accuracy / len(trainloader)
                tepoch.set_postfix(loss=running_loss/len(trainloader), accuracy=100. * accuracy)
    test_loss = 0
    accuracy = 0
    model.eval()
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as tepoch:
            for i, data in enumerate(tepoch):
                (inputs, labels) = data
                tepoch.set_description(f"Testing Epoch {epoch + 1}")
                inputs, labels = inputs.to(device), labels.to(device)
                size = labels.shape[0]
                logps = model.forward(inputs)
                batch_loss = criterion(logps, labels)
                test_loss += batch_loss.item()

                test_acc = torch.sum(torch.argmax(logps, dim=1) == labels).item() / size
                accuracy += test_acc
                tepoch.set_postfix(loss=batch_loss.item(), accuracy=100. * test_acc)
                # tepoch.set_postfix(loss=batch_loss.item())
                sleep(0.005)
                if i == len(testloader)-1:
                        accuracy = accuracy / len(testloader)
                        tepoch.set_postfix(loss=test_loss/len(testloader), accuracy=100. * accuracy)

    val_loss = test_loss/len(testloader)
    if min_val_loss is None:
        min_val_loss = val_loss
        name = 'sl_recognition_{}_{}_{}.pth'.format(str(epoch + 1), str(round(val_loss, 3)), str(round(accuracy, 3)))
        torch.save(model, name)
    elif min_val_loss > val_loss:
        min_val_loss = val_loss
        name = 'sl_recognition_{}_{}_{}.pth'.format(str(epoch + 1), str(round(val_loss, 3)), str(round(accuracy, 3)))
        torch.save(model, name)

    running_loss = 0
    model.train()
torch.save(model, 'final_sl.pth')

Training Epoch 1: 100%|██████████| 2230/2230 [08:37<00:00,  4.31batch/s, accuracy=45.3, loss=1.63]
Testing Epoch 1: 100%|██████████| 490/490 [01:48<00:00,  4.50batch/s, accuracy=77.6, loss=0.602]
Training Epoch 2: 100%|██████████| 2230/2230 [05:46<00:00,  6.43batch/s, accuracy=81.5, loss=0.526]
Testing Epoch 2: 100%|██████████| 490/490 [01:05<00:00,  7.53batch/s, accuracy=95.3, loss=0.14]
Training Epoch 3: 100%|██████████| 2230/2230 [05:55<00:00,  6.28batch/s, accuracy=90.8, loss=0.275]
Testing Epoch 3: 100%|██████████| 490/490 [01:03<00:00,  7.66batch/s, accuracy=97.3, loss=0.0836]
Training Epoch 4: 100%|██████████| 2230/2230 [05:48<00:00,  6.40batch/s, accuracy=94.7, loss=0.165]
Testing Epoch 4: 100%|██████████| 490/490 [01:03<00:00,  7.68batch/s, accuracy=98.5, loss=0.0419]
Training Epoch 5: 100%|██████████| 2230/2230 [05:55<00:00,  6.26batch/s, accuracy=96.3, loss=0.116]
Testing Epoch 5: 100%|██████████| 490/490 [01:04<00:00,  7.55batch/s, accuracy=99.6, loss=0.0147]
Training Epoch

In [8]:
with open('classes.txt', 'w') as f:
    for clas in trainloader.dataset.classes:
        f.write(clas+'\n')

In [9]:
onnx_model_path = "sl.onnx"
model = torch.load(name)
model.to("cpu")
model.eval()
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(model, dummy_input, onnx_model_path, verbose=True)

  "Passing an tensor of different rank in execution will be incorrect.")
  "Passing an tensor of different rank in execution will be incorrect.")


graph(%0 : Float(1:150528, 3:50176, 224:224, 224:1, requires_grad=0, device=cpu),
      %master.0.weight : Float(16:147, 3:49, 7:7, 7:1, requires_grad=1, device=cpu),
      %master.0.bias : Float(16:1, requires_grad=1, device=cpu),
      %classifier.1.weight : Float(256:128, 128:1, requires_grad=1, device=cpu),
      %classifier.1.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.2.weight : Float(256:1, requires_grad=1, device=cpu),
      %classifier.2.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.2.running_mean : Float(256:1, requires_grad=0, device=cpu),
      %classifier.2.running_var : Float(256:1, requires_grad=0, device=cpu),
      %classifier.5.weight : Float(256:256, 256:1, requires_grad=1, device=cpu),
      %classifier.5.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.6.weight : Float(256:1, requires_grad=1, device=cpu),
      %classifier.6.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.6.running

# Generate Classes.txt

In [10]:
with open('classes.txt', 'w') as f:
    for clas in trainloader.dataset.classes:
        f.write(clas+'\n')

# Convert to onnx

In [11]:
onnx_model_path = "sl.onnx"
model = torch.load(name)
model.to("cpu")
model.eval()
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(model, dummy_input, onnx_model_path, verbose=True)

graph(%0 : Float(1:150528, 3:50176, 224:224, 224:1, requires_grad=0, device=cpu),
      %master.0.weight : Float(16:147, 3:49, 7:7, 7:1, requires_grad=1, device=cpu),
      %master.0.bias : Float(16:1, requires_grad=1, device=cpu),
      %classifier.1.weight : Float(256:128, 128:1, requires_grad=1, device=cpu),
      %classifier.1.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.2.weight : Float(256:1, requires_grad=1, device=cpu),
      %classifier.2.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.2.running_mean : Float(256:1, requires_grad=0, device=cpu),
      %classifier.2.running_var : Float(256:1, requires_grad=0, device=cpu),
      %classifier.5.weight : Float(256:256, 256:1, requires_grad=1, device=cpu),
      %classifier.5.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.6.weight : Float(256:1, requires_grad=1, device=cpu),
      %classifier.6.bias : Float(256:1, requires_grad=1, device=cpu),
      %classifier.6.running

# Run Inference with opencv

In [12]:
import matplotlib.pyplot as plt

In [13]:
plt.figure(figsize=(20, 20))
imageDir = '../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test'
net =  cv2.dnn.readNetFromONNX(onnx_model_path) 
with open('classes.txt', 'r') as f:
    classes = f.read().split('\n')
for i, image_name in enumerate(os.listdir(imageDir)):
    image = cv2.imread(os.path.join(imageDir, image_name))
    blob = cv2.dnn.blobFromImage(image, 1.0 / 255, (224, 224),(0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    preds = net.forward()
    biggest_pred_index = np.array(preds)[0].argmax()
    ax = plt.subplot(6, 5, i + 1)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title("predicted: {}, True: {}".format(classes[biggest_pred_index], image_name.split('_test.jpg')[0]))
    plt.axis("off")

error: OpenCV(4.5.1) /tmp/pip-req-build-tk9iuyva/opencv/modules/dnn/src/onnx/onnx_importer.cpp:1887: error: (-2:Unspecified error) in function 'handleNode'
> Node [Sqrt]:(91) parse error: OpenCV(4.5.1) /tmp/pip-req-build-tk9iuyva/opencv/modules/dnn/src/dnn.cpp:614: error: (-2:Unspecified error) Can't create layer "91" of type "Sqrt" in function 'getLayerInstance'
> 

<Figure size 1440x1440 with 0 Axes>