In [1]:
# All required python standard libraries
import os
import time

In [2]:
# All torch related imports 
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms
from torch import nn, optim

In [3]:
# All sci-kit related imports 
import skimage.io as sk
from skimage.transform import resize
from skimage.color import rgb2gray 

In [4]:
# using cv2 to read an image
import cv2

In [8]:
# All sci-kit related imports 
import pandas as pd
import numpy as np

In [9]:
# FUNCTION TO TRANSFORM TENSOR TO NUMPY IMAGE
def torch_to_numpy(tensor_item):
    return tensor_item.permute([1,2,0]).numpy()

In [10]:
# convert input from BMP to grayscale and add channel information and add channel
def custom_transform(input_image):
    #convert to grascale  
    input_image = rgb2gray(input_image)
    return input_image; 

In [11]:
def accuracy_finder(predictions , labels):
    values, max_indices = torch.max(predictions, dim=1)
    accuracy = ( max_indices == labels ).sum()/max_indices.size()[0]
    return accuracy

In [12]:
os.getcwd()

'F:\\Research\\Week 4'

In [4]:
train_directory = os.path.join(os.getcwd(),'guides\\isolated-dataset-csv\\IsolatedTrain.csv')
test_directory = os.path.join(os.getcwd(),'guides\\isolated-dataset-csv\\IsolatedTest.csv')

In [5]:
BATCH_SIZE = 8 

In [13]:
train_csv = pd.read_csv(train_directory, usecols=["labels","directory"])
test_csv = pd.read_csv(test_directory, usecols=["labels","directory"])

In [7]:
## data loader prepraration

In [14]:
DATA_NORMALIZER = transforms.Compose([transforms.ToTensor(),transforms.Resize(224),transforms.CenterCrop(224),transforms.Normalize(mean=[0.5,0.5,0.5],
                         std=[0.5,0.5,0.5]),])

In [15]:
type(DATA_NORMALIZER)

torchvision.transforms.transforms.Compose

### Isolated Dataset DataLoader

- [ ] Dataset images should be resized to 224 * 224 * 3

- [ ] Dataset labels should be one hot encoded 

- [ ] Apply torch transforms to images 


### Differences with Paper 

- [ ] Apply Dataset images should be resized to 224 * 224 * 3

In [16]:
class IsolatedCharacterDataset(Dataset):
    def __init__(self, csv_dir_path,  transforms=None, custom_transform=None ):
        ### complete dataset path
        self.dataset_csv = pd.read_csv(csv_dir_path, usecols=["labels","directory"])  
        self.dataset_csv_numpy = self.dataset_csv.to_numpy()
        
        ### labels
        self.labels = self.dataset_csv_numpy[:,0]
        
        ### images directories
        self.image_directories = self.dataset_csv_numpy[:,1]
        
        ### transformations to apply on images
        self.transforms = transforms
        
    def __getitem__(self, index):
        
        # convert labels to tensor 
        label = torch.tensor(self.labels[index])

        # get single image directory
        image_dir = self.image_directories[index]
        
        # load single image 
        image = cv2.imread(os.path.join(os.getcwd(),image_dir), cv2.COLOR_RGB2GRAY)
        
        if self.transforms:
            ## apply transforms 
            image = self.transforms(image)
            image = image.float()
    
        label = label.long()
         
        return image, label 
    
    def __len__(self):
        r,_ = self.dataset_csv_numpy.shape
        return r

In [17]:
TRAIN_DATASET = IsolatedCharacterDataset(csv_dir_path= train_directory ,transforms=DATA_NORMALIZER)
TRAIN_LOADER = DataLoader(dataset=TRAIN_DATASET, batch_size=BATCH_SIZE ,shuffle= True)

In [18]:
cnn_layers =  nn.Sequential(
            # Defining a 2D convolution layer
            # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
            # in_channels (int) – Number of channels in the input image. For B&W it is 1.
            # out_channels (int) – Number of channels produced by the convolution. 4 filters
            # kernel_size (int or tuple) – Size of the convolving kernel (3x3)
            # stride (int or tuple, optional) – Stride of the convolution
            # padding (int or tuple, optional) – Padding of 1 added to both sides of the input
            # example x1 = (n, c=3 , h=100 , w=100 )
            nn.Conv2d(3, 4, kernel_size=3, stride=1, padding=1), #in_channels = 1 is a data dependent hyperparameter. It is 1 because the images are in grayscale
            # x2 = (n, c=12 , h=100 , w=100 )
            nn.BatchNorm2d(4), # Normalize output from the activation function. 
            nn.ReLU(inplace=True), # negative elements to zero
            # x2 = (n, c=12 , h=100 , w=100 )
            nn.MaxPool2d(kernel_size=2, stride=2), #Stride is the number of pixels shifts over the input matrix. When the stride is 1 then we move the filters to 1 pixel at a time. When the stride is 2 then we move the filters to 2 pixels at a time and so on
            # x3 = (n, c=12 , h=49 , w=49 )
            # Defining another 2D convolution layer
            nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
            # x3 = (n, c=4 , h=49 , w=49 )
            nn.BatchNorm2d(4), # 4 features
            nn.ReLU(inplace=True), # inplace = True will modify the input directly, without allocating any additional output.
            # x3 = (n, c=48 , h=49 , w=49 )
            nn.MaxPool2d(kernel_size=2, stride=2), # Downsamples the input representation by taking the maximum value x4 = (n, c=48 , h=25 , w=25 )
            )
# outputSize = floor[(inputSize - filterSize + 2 * padding) / stride] + 1
print(cnn_layers)


linear_layers = nn.Sequential(
            nn.Linear(4 * 7 * 7, 172)
# 159 classes are available for the compound classes dataset. It is a data dependent hyperparameter
)

print(linear_layers)

Sequential(
  (0): Conv2d(3, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU(inplace=True)
  (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
Sequential(
  (0): Linear(in_features=196, out_features=172, bias=True)
)


In [19]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.cnn_layer = cnn_layers
        self.linear_layer = linear_layers
    
    def forward(self, x):
        x = self.cnn_layer(x)

        x = x.view(x.shape[0],-1)

        x = self.linear_layer(x)
        return x

In [20]:
net = Net()

In [21]:
optimizer =  optim.Adam(net.parameters(), lr=0.07) # learning rate 
# defining the loss function
criterion =  nn.CrossEntropyLoss(reduction='none')
if torch.cuda.is_available() and USE_CUDA:
    print('cuda is available')
    net = net.cuda()
    criterion = criterion.cuda()

cuda is available


In [22]:
from tqdm.notebook import tqdm

In [23]:
def training(epochs:int):
    all_training_losses = []
    all_training_accuracy = []

    for epoch in tqdm(range(epochs)):
        total_epoch_loss = 0
        total_accuracy_epoch = 0
        
        for i, data in enumerate(TRAIN_LOADER, 0): 
            image,label = data
            optimizer.zero_grad()
            if torch.cuda.is_available() and USE_CUDA:
                label = label.cuda()
                image = image.cuda()
            output = net(image)
            loss = criterion(output, label)
            loss = loss.sum()/loss.shape[0]
            loss.backward()
            
            optimizer.step()
            total_epoch_loss += loss
            batches_training_accuracy = accuracy_finder(predictions=output, labels=label)
            total_accuracy_epoch = total_accuracy_epoch  + batches_training_accuracy   
        # total epoch loss 
        total_epoch_loss = total_epoch_loss / len(TRAIN_LOADER)
        # total epoch accuracy 
        total_accuracy_epoch = total_accuracy_epoch /len(TRAIN_LOADER)
        
        # display the epoch training loss
        print("epoch : {}/{}, loss = {:.8f}, acc = {:.8f}".format(epoch + 1, epochs, total_epoch_loss, total_accuracy_epoch ))
        all_training_losses.append(total_epoch_loss)
        all_training_accuracy.append(total_accuracy_epoch)
        
    print("Training completed")
    return all_training_accuracy, all_training_losses 

In [24]:
t_acc, t_loss = training(10)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))

epoch : 1/10, loss = 6.95589685, acc = 0.00904082
epoch : 2/10, loss = 5.13757658, acc = 0.01224706
epoch : 3/10, loss = 5.14527845, acc = 0.00936204
epoch : 4/10, loss = 5.13959312, acc = 0.00928330
epoch : 5/10, loss = 5.13730860, acc = 0.00928330
epoch : 6/10, loss = 5.13678312, acc = 0.00908647
epoch : 7/10, loss = 5.13688517, acc = 0.00878278
epoch : 8/10, loss = 5.13649654, acc = 0.00932267
epoch : 9/10, loss = 5.13643551, acc = 0.00944077
epoch : 10/10, loss = 5.13660860, acc = 0.00940140

Training completed


![https://discuss.pytorch.org/t/indexerror-target-2-is-out-of-bounds/69614/7]
(Stack Overflow for Out of Bounds Problem)


