In [3]:
#Downloading the dataset directly from kaggle

!pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json



In [4]:
#downloading the dataset

! kaggle datasets download ayuraj/asl-dataset

Downloading asl-dataset.zip to /content
 93% 53.0M/56.9M [00:01<00:00, 34.8MB/s]
100% 56.9M/56.9M [00:01<00:00, 45.3MB/s]


In [5]:
!unzip /content/asl-dataset.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: asl_dataset/0/hand2_0_dif_seg_1_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_dif_seg_2_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_dif_seg_3_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_dif_seg_4_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_dif_seg_5_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_left_seg_1_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_left_seg_2_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_left_seg_3_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_left_seg_4_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_left_seg_5_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_right_seg_1_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_right_seg_2_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_right_seg_3_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_right_seg_4_cropped.jpeg  
  inflating: asl_dataset/0/hand2_0_right_seg_5_cropped.jpeg  
  inflating: asl_dat

In [6]:
!rm -r /content/asl_dataset/asl_dataset

In [7]:
import os
import numpy as np
import torch
import glob
import cv2
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.autograd import Variable
from torchvision.transforms import transforms
import torchvision
import pathlib
import sys
from PIL import Image
import torch.nn.functional as F  # Parameterless functions, like (some) activation functions
from skimage import io

In [8]:


#checking for cuda

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
# #loading pretrained models


# class Identity(nn.Module):
#   def __init__(self):
#     super(Identity, self).__init__()

#   def forward(self, x):
#     return x
# model= torchvision.models.vgg16(pretrained=True)
# model.avgpool = Identity()
# model.classifier= nn.Linear(512, 35)
# model.to(device)
#CNN Network


class ConvNet(nn.Module):
    def __init__(self,num_classes=36):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape= (256,12,150,150)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape= (256,20,75,75)
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
            #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc(output)
            
        return output

In [10]:
#importing the data
from PIL import Image
class CustomDataset(Dataset):
  def __init__(self):
    self.data = []
    self.imgs_path= "/content/asl_dataset"
    file_list = os.listdir(self.imgs_path)
    for i in file_list:
      for j in os.listdir(os.path.join(self.imgs_path, i)):
        j= os.path.join( self.imgs_path,i, j)
        self.data.append([ j, i])
        

    self.class_map = {"0" : 0, "1": 1, "2": 2, "3":3, "4": 4,
                      "5": 5, "6":6, "7":7, "8":8, "9":9, "a": 10, "b": 11
                      , "c":12, "d":13, "e":14, "f":15, "g":16, "h":17, "i":18
                      , "j":19, "k":20, "l": 21, "m":22, "n":23,
                      "o":24, "p":25, "q":26, "r":27, "s":28, "t":29,
                      "u":30, "v":31, "w":32, "x":33, "y":34, "z":35}
    self.img_dim= (32, 32) 
  def __len__(self):
    
    return(len(self.data))

  def __getitem__(self, idx):
    img_path, class_name= self.data[idx]
    
    
    img= cv2.imread(img_path)
    img= Image.open(img_path)
    img= img.resize((150, 150))

    #normalizaing the data
    transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
        ),
      ])
    normalized_img = transform(img)
    img= np.array(img)
    #class_id= class_name
    
    class_id= self.class_map[class_name]
    #print("type class_id", type(class_id) ,class_id, class_name)
    img_tensor = torch.from_numpy(img)
    img_tensor = img_tensor.permute(2, 0, 1)
    class_id = torch.tensor([class_id])
    #print(class_id)
    #print("class id shape", class_id.shape)
    return img_tensor.float(), class_id



In [11]:
#load data
import random
import matplotlib.pyplot as plt 
dataset= CustomDataset()


#visualizaing the tensors
# tensor = dataset.__getitem__(10)[1]
# print(tensor)
# image = np.squeeze(tensor.numpy())
# image = (image - np.min(image)) / (np.max(image) - np.min(image))
# image = image.transpose((1, 2, 0))
# plt.imshow(image)
#splitting data into training and testing
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
# print(train_dataset)
train_loader= DataLoader(dataset= train_dataset, batch_size=10, shuffle=True)
test_loader= DataLoader(dataset= test_dataset, batch_size=10, shuffle=True)

# dataiter = iter(train_loader)
# images, labels = dataiter.next()
#print(type(images))
# labels= labels.flatten()
# print(labels.flatten())
#print("type of label", labels[1])
# for image, labels in train_loader:
#   print( labels.flatten())

In [24]:

model=ConvNet(num_classes=36).to(device)
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()
num_epochs=9
#calculating the size of training and testing images
train_count=len(train_dataset)
test_count=len(test_dataset)




#print(train_count)
# print(test_count)

In [25]:
best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        #print(labels)    
        labels= labels.flatten()
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy

Epoch: 0 Train Loss: tensor(11.1517) Train Accuracy: 0.658051689860835 Test Accuracy: 1.0337972166998013
Epoch: 1 Train Loss: tensor(0.8159) Train Accuracy: 0.9403578528827038 Test Accuracy: 1.1153081510934393
Epoch: 2 Train Loss: tensor(0.2871) Train Accuracy: 0.9721669980119284 Test Accuracy: 1.1809145129224652
Epoch: 3 Train Loss: tensor(0.2364) Train Accuracy: 0.9811133200795229 Test Accuracy: 1.0715705765407555
Epoch: 4 Train Loss: tensor(0.1531) Train Accuracy: 0.9845924453280318 Test Accuracy: 1.151093439363817
Epoch: 5 Train Loss: tensor(0.0852) Train Accuracy: 0.9900596421471173 Test Accuracy: 1.168986083499006
Epoch: 6 Train Loss: tensor(0.1277) Train Accuracy: 0.9910536779324056 Test Accuracy: 1.1789264413518887
Epoch: 7 Train Loss: tensor(0.0382) Train Accuracy: 0.9975149105367793 Test Accuracy: 1.1570576540755466
Epoch: 8 Train Loss: tensor(0.0083) Train Accuracy: 0.9985089463220675 Test Accuracy: 1.2087475149105367
