In [1]:
# Libraries for building network
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils import data

# Libraries for dataset
import torchvision
from torchvision import transforms, datasets

# Miscellaneous Libraries
import time

# For DataLoader
from torch.utils.data import Dataset

# For Dataset Files
import os

import PIL
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
# Define Hyperparameters

num_epochs = 3

batch = 8

learning_rate = 0.1

momentum = 0.5

In [4]:
if os.path.exists(os.getcwd() + "/ASL_Sorted_List/"):
    print("This is either sorted correctly or there is a mistake." 
         + "\nIf there is a mistake, delete the /ASL_Sorted_List"
         + "\nThen run this cell again.")
else:
    (_, _, files) = next(os.walk(os.getcwd() + '/ASL_Combined_Dataset'))
    (_, train_or_test, _) = next(os.walk(os.getcwd() + '/Combined_Dataset2'))
    
    if not os.path.exists(os.getcwd() + "/ASL_Sorted_List/"):
        os.mkdir(os.getcwd() + "/ASL_Sorted_List/")
        if not os.path.exists(os.getcwd() + "/ASL_Sorted_List/Training_Data/"):
            os.mkdir(os.getcwd() + "/ASL_Sorted_List/Training_Data/")
        if not os.path.exists(os.getcwd() + "/ASL_Sorted_List/Testing_Data/"):
            os.mkdir(os.getcwd() + "/ASL_Sorted_List/Testing_Data/")
    a = set()
    for file in files:
        if file[4] == '5':
            img = Image.open(os.getcwd() + '/ASL_Combined_Dataset/' + file)
            name = file
            name = name.split('_')
            label = name[1]
            if not os.path.exists(os.getcwd() + "/ASL_Sorted_List/Testing_Data/" + label + "/"):
                os.mkdir(os.getcwd() + "/ASL_Sorted_List/Testing_Data/" + label + "/")
            img.save(os.getcwd() +'/ASL_Sorted_List/Testing_Data/' + label + "/" + file)
        else:
            img = Image.open(os.getcwd() + '/ASL_Combined_Dataset/' + file)
            name = file
            name = name.split('_')
            label = name[1]
            if not os.path.exists(os.getcwd() + "/ASL_Sorted_List/Training_Data/" + label + "/"):
                os.mkdir(os.getcwd() + "/ASL_Sorted_List/Training_Data/" + label + "/")
            img.save(os.getcwd() + '/ASL_Sorted_List/Training_Data/' + label + "/"+ file)
            
    for setName in train_or_test:
        (_, labs, _) = next(os.walk(os.getcwd() + '/Combined_Dataset2/' + str(setName)))
        for labels2 in labs:
            (_, _, files) = next(os.walk(os.getcwd() 
                                           + '/Combined_Dataset2/' 
                                           + str(setName) + '/' + str(labels2)))
            for file2 in files:
                img = Image.open(os.getcwd() + '/Combined_Dataset2/' + str(setName) + "/"
                                 + str(labels2) + "/" + str(file2))
                if not os.path.exists(os.getcwd() + "/ASL_Sorted_List/" + str(setName) 
                                      + "/" + labels2 + "/"):
                    os.mkdir(os.getcwd() + "/ASL_Sorted_List/" + str(setName) + "/" + labels2 + "/")
                img.save(os.getcwd() + "/ASL_Sorted_List/" + str(setName) + "/" + labels2 + "/" + file2)
        

This is either sorted correctly or there is a mistake.
If there is a mistake, delete the /ASL_Sorted_List
Then run this cell again.


In [5]:
# Architecture as Given by the Paper
class aslModel(nn.Module):
    def __init__(self):
        """ Initialize all layers of model """
        super(aslModel, self).__init__()
        
        # Convolutional Network Layers 1 & 2
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3) # (200 - 3)/1 + 1 = 198
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3) # (198 - 3)/1 + 1 = 196
        
        # Max Pool Layer
        self.pool = nn.MaxPool2d(2, 2) # 194/2 = 98
        
        # Dropout Layer For Convolutions
        self.conv_dropout1 = nn.Dropout(0.25)
        
        # Convolutional Network Layers 3 & 4
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3) # (98 - 3)/1 + 1 = 96
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3) # (96 - 3)/1 + 1 = 94
        
        # Dropout Layer For Convolutions
        self.conv_dropout2 = nn.Dropout(0.25)
        
        #MaxPool Cut 94 / 2 = 47
        
        # Convolutional Network Layers 5 & 6
        self.conv5 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3) # (47 - 3)/1 + 1 = 44
        self.conv6 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3) # (44 - 3)/1 + 1 = 42
        
        #MaxPool Cut 42 / 2 = 21 => 128 * 21 * 21
        
        # Dropout Layer For Convolutions
        self.conv_dropout3 = nn.Dropout(0.25)
        
        # Dropout Layer for Fully Connected Layers
        self.fc_dropout1 = nn.Dropout(0.30)
        
        # Dropout Layer for Fully Connected Layers
        self.fc_dropout2 = nn.Dropout(0.30)
        
        # Fully Connected Layer 1
        self.fc1 = nn.Linear(in_features=128 * 21 * 21, out_features=128)
        
        # Fully Connected Layer 2
        self.fc2 = nn.Linear(in_features=128, out_features=128)
        
        #FCL 3
        self.fc3 = nn.Linear(in_features=128, out_features=36)
        
    def forward(self, x):
        """ Chain all layers together """
        # Conv1 -> RELU -> Conv2 -> RELU -> Pool -> Dropout1
        x = self.conv_dropout1(self.pool(F.relu(self.conv2(F.relu(self.conv1(x))))))
       
        # Conv3 -> RELU -> Conv4 -> RELU -> Pool -> Dropout2
        x = self.conv_dropout2(self.pool(F.relu(self.conv4(F.relu(self.conv3(x))))))
        
        # Conv5 -> RELU -> Conv6 -> RELU -> Pool -> Dropout3
        x = self.conv_dropout3(self.pool(F.relu(self.conv6(F.relu(self.conv5(x))))))
        
        # Flatten
        x = x.view(-1, 128 * 21 * 21)
        
        # Fully Connected Dropout Layer
        x = self.fc_dropout1(F.relu(self.fc1(x)))
        
        # Fully Connected Dropout Layer
        x = self.fc_dropout2(F.relu(self.fc2(x)))
        
        x = self.fc3(x)
        
        return F.log_softmax(x, dim=1)

In [6]:
data_transform = transforms.Compose([
        transforms.Resize((200, 200)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                              std=[0.229, 0.224, 0.225])
    ])

training_dataset = datasets.ImageFolder(root='ASL_Sorted_List/Training_Data/',
                                           transform=data_transform)
training_loader = torch.utils.data.DataLoader(training_dataset,
                                             batch_size=batch, shuffle=True)

testing_dataset = datasets.ImageFolder(root='ASL_Sorted_List/Testing_Data/',
                                           transform=data_transform)
testing_loader = torch.utils.data.DataLoader(testing_dataset,
                                            batch_size=batch, shuffle=False)

In [7]:
# to_pil = torchvision.transforms.ToPILImage()

# for sample in training_loader:
#     print('Image as a tensor:')
#     print(sample[0])
    
#     print('\nClassification:')
#     print(sample[1])
#     break

Image as a tensor:
tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0.

In [8]:
print(os.path.exists("Saved_Model/aslModel.pth"))
# Initialize previously defined model
model = aslModel()                                               
if os.path.exists("/Saved_Model/aslModel.pth"):
    model.load_state_dict(torch.load("Saved_Model/aslModel.pth"))
    model.eval()

# Cross Entropy Loss Function
criterion = nn.CrossEntropyLoss()                                      

# Initialize Optimizer (SGD)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)  
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Set model to training (updating weights)
model.train()

True


aslModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_dropout1): Dropout(p=0.25, inplace=False)
  (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv_dropout2): Dropout(p=0.25, inplace=False)
  (conv5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv_dropout3): Dropout(p=0.25, inplace=False)
  (fc_dropout1): Dropout(p=0.3, inplace=False)
  (fc_dropout2): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=56448, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=36, bias=True)
)

In [9]:
# Store time to calculate train time
start_time = time.time()

# Store loss and accuracy data
loss = []
accuracy = []

# Train the model
# Loop for number of epochs
for epoch in range(num_epochs):
    # Loop through data in batch sized increments
    for batch_idx, (X_train_batch, Y_train_batch) in enumerate(training_loader):
        if(Y_train_batch.shape[0]<batch):
            continue
        # Forward pass through network
        output = model(X_train_batch)                           
        # Calculate loss of predictions
        curr_loss = criterion(output, Y_train_batch)            
        # Store loss
        loss.append(curr_loss.item())                           

        
        # Clear last calculation
        optimizer.zero_grad()                                   
        # Calculate gradient based on loss
        curr_loss.backward()                                    
        # Update model weights
        optimizer.step()                                        

        # Extract model predictions
        _, predicted = torch.max(output.data, 1) 
        # Calculate number of correct predictions
        correct = (predicted == Y_train_batch).sum().item()     
        # Calculate/store accuracy
        accuracy.append(correct/Y_train_batch.size(0))          
        
        # Intermitently print statistics
        if batch_idx % 100 == 0:
            print('Epoch: ' + str(epoch+1) + '/' + str(num_epochs) + ', Step: ' 
                  + str(batch_idx+1) + '/' + str(len(training_loader)) + ', Loss: ' 
                  + str(curr_loss.item()) + ', Accuracy: ' 
                  + str(correct/Y_train_batch.size(0)*100) + '%')

    
# Store time to calculate train time
end_time = time.time()

# Print train time
print('Run Time: ' + str(end_time - start_time))

#Save Model For Future Usage
torch.save(model.state_dict(), "Saved_Model/aslModel.pth")



Epoch: 1/3, Step: 1/270, Loss: 3.588132381439209, Accuracy: 0.0%


KeyboardInterrupt: 

In [41]:
# Test the model
# Set model to testing (constant weights)
model.eval()

with torch.no_grad():
    # Store number of correct/total samples in test data
    correct = 0
    total = 0
    
    # Loop through test data
    for X_test_batch, Y_test_batch in testing_loader:
        # Forward pass through network
        output = model(X_test_batch)  
        
        # Extract prediction
        _, predicted = torch.max(output.data, 1)    
        
        # Update total number of sample
        total += Y_test_batch.size(0)  
        
        # Update number of correct predictions
        correct += (predicted == Y_test_batch).sum().item()     

print('Test Accuracy: ' + str((correct/total) * 100) + '%')

Test Accuracy: 2.7777777777777777%
