In [2]:
import torch
import os
import torch.nn as nn

import torch.nn.functional as F

import torch.optim as optim

from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision.models import resnet34

from PIL import Image
from torch.utils.data import Dataset, DataLoader

In [3]:
class SIGNSDataset(Dataset):
    def __init__(self, data_dir, transform):      
        self.filenames, self.labels = self.get_filenames(data_dir)
        image = Image.open(self.filenames[0])
        
        self.transform = transform

    def __len__(self):
        return len(self.filenames)
    
    def get_filenames(self, data_dir):
        filenames = []
        labels = []
        for class_folder in os.listdir(data_dir):
            class_path = os.path.join(data_dir, class_folder)
            folders_in_class = os.listdir(class_path)
            
            for filename in folders_in_class:
                labels.append(int(class_folder))
                path = os.path.join(class_path, filename)
                filenames.append(path)
        labels = torch.tensor(labels)
        return filenames, labels
            

    def __getitem__(self, idx):
        image = Image.open(self.filenames[idx])
        
        image = self.transform(image)
    
        return image, self.labels[idx]

# Part a

In [18]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes): # constructor of NN with its attributes
        super(NN, self).__init__() # calling constructor of base class
        
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)

        self.fc1 = nn.Linear(64*14*14, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))

        x = F.max_pool2d(F.relu(self.conv2(x)), 2)

        x = x.view(-1, self.num_flat_features(x)) #8*8

       
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [19]:
train_transformer = transforms.Compose([
    transforms.Resize(64),
    transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
              
batch_size = 32
train_data_path = "SignLanguage\\Dataset"
train_loader = DataLoader(dataset = SIGNSDataset(train_data_path, train_transformer), 
                   batch_size=batch_size, shuffle=True)


In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = 3*64*64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

In [21]:
neural_network = NN(input_size=input_size, num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(neural_network.parameters(), lr=learning_rate)

In [22]:
for batch_idx, (data, targets) in enumerate(train_loader):
    data = data.to(device=device)
    targets = targets.to(device=device)
        
    scores = neural_network(data) #automatically call the forward method,
                                #as model is a callable object
    loss = criterion(scores, targets) # compute cost/loss on 64 example
    
    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

In [23]:
neural_network.train()

NN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=12544, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [24]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval() # 1. our model deactivates all the layers (eg.batch normalization/dropout)
    with torch.no_grad(): #2.  not make computational graph
        for x, y in loader:
            #print (x.shape)
            x = x.to(device=device)
            y = y.to(device=device)
            #print (y.shape)
            
            scores = model(x)
             
            _, predictions = scores.max(1) #. it return max value and its index, 1 mean see column-wise 
            
#             print(predictions)
#             print(y)
            num_correct += (predictions == y).sum() # compare prediction with y, if equal sum them to count the number of same values
            num_samples += predictions.size(0)  #64, get no of samples
    print(f"Got {num_correct} / {num_samples} with accuracy"
      f" {float(num_correct) / float(num_samples) * 100:.2f}"
            )

In [25]:
neural_network.train()
print ("Train accuracy: ")
nn_accuracy = check_accuracy(train_loader, neural_network)

Train accuracy: 
Got 1705 / 2062 with accuracy 82.69


# Part b

In [28]:
class ResNetModel(nn.Module):
    def __init__(self, input_size, num_classes=10):
        super(ResNetModel, self).__init__()
        self.resnet = resnet34(pretrained=True)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

In [29]:
train_transformer = transforms.Compose([
    transforms.Resize(64),
    transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
              
batch_size = 32
train_data_path = "SignLanguage\\Dataset"
train_loader = DataLoader(dataset = SIGNSDataset(train_data_path, train_transformer), 
                   batch_size=batch_size, shuffle=True)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = 3*64*64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

In [33]:
model_resnet = ResNetModel(input_size=input_size, num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model_resnet.parameters(), lr=learning_rate)


In [34]:
for batch_idx, (data, targets) in enumerate(train_loader):
    data = data.to(device=device)
    targets = targets.to(device=device)
        
    scores = model_resnet(data) #automatically call the forward method,
                                #as model is a callable object
    loss = criterion(scores, targets) # compute cost/loss on 64 example
    
    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

In [35]:
model_resnet.train()

ResNetModel(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

In [37]:
model_resnet.train()
print ("Train accuracy: ")
resnet_accuracy = check_accuracy(train_loader, model_resnet)

Train accuracy: 
Got 1989 / 2062 with accuracy 96.46


# Performance Comparision

We can clearly see that the resnet model is more efficient then the regular neural network for the classification purpose. Resnet model proides 96.46% accuarcy where as Normal Neural Network has an accuracy of only 82.69%.