# QSTP Image Recognition with Deep Learning

Name: Shriraj Sawant

Contact: 9923690913

Email: sr.official@hotmail.com


In [3]:
import torch
import numpy as np
import torch.nn as nn
from torchvision import transforms as tforms
from torch.utils.data import Dataset, DataLoader
from mnist import MNIST
import matplotlib.pyplot as plt

%matplotlib inline

#Creating Dataset class for loading FashionMNIST dataset directly from given idx3-ubyte.gz format
class FashionMNIST(Dataset):
    """ Fashion MNIST Dataset """
    def __init__(self, root='.', train=True, transform=None):
        """
        Args:            
            root (string): Directory with all the dataset in .gz form.
            train (boolean): Choice for training or test dataset. By default loads training dataset.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root = root        
        self.transform = transform
        self.images = None
        self.labels = None        
        if train:
            self.images, self.labels = MNIST(path=root, return_type='numpy', gz=True).load_training()
        else:
            self.images, self.labels = MNIST(path=root, return_type='numpy', gz=True).load_testing()        
        self.mean = self.images.mean()
        self.std = self.images.std()
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        normTransform=tforms.Compose([tforms.ToTensor(), tforms.Normalize(mean=[self.mean], std=[self.std])])
        
        image = self.images[idx].astype('float32').reshape(28,28,1)
        image = normTransform(image)
        label = torch.tensor(self.labels[idx].item())
        if self.transform:
            image = self.transform(image)
            
        return image, label
    
train_dataset = FashionMNIST(root='fashion-mnist-master/data/fashion', train=True)
test_dataset = FashionMNIST(root='fashion-mnist-master/data/fashion', train=False)

batch_size = 100
n_iters = 24000
epochs = int(n_iters*batch_size/len(train_dataset))

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

#Deep Neural Network Architecture for training the model
class CNNmodel(nn.Module):
    def __init__(self):
        super(CNNmodel, self).__init__()             
        
        #self.bn1 = nn.BatchNorm2d(1)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.drop1 = nn.Dropout2d(0.1)
        self.ac1 = nn.ReLU()        
        
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.drop2 = nn.Dropout2d(0.1)
        self.ac2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)  
        
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        self.drop3 = nn.Dropout2d(0.1)
        self.ac3 = nn.ReLU()        
        
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2)
        self.bn4 = nn.BatchNorm2d(128)
        self.drop4 = nn.Dropout2d(0.1)
        self.ac4 = nn.ReLU()
        self.pool4 = nn.MaxPool2d(kernel_size=2)         
        
        #self.bno = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(128*7*7, 100)
        self.bno = nn.BatchNorm1d(100)
        self.dropo = nn.Dropout(0.5)
        self.aco = nn.ReLU()
        self.opl = nn.Linear(100, 10)
        
        nn.init.kaiming_normal_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv2.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv3.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv4.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.opl.weight, nonlinearity='relu')        
        
    def forward(self, x):
        out = x
        
        out = self.conv1(out)
        out = self.drop1(out)
        out = self.ac1(out)
        
        out = self.conv2(out)
        out = self.drop2(out)
        out = self.ac2(out)
        out = self.pool2(out)
        
        out = self.conv3(out)
        out = self.drop3(out)
        out = self.ac3(out)
        
        out = self.conv4(out)
        out = self.bn4(out)
        out = self.drop4(out)
        out = self.ac4(out)
        out = self.pool4(out)
        
        #out = self.bno(out)
        out = out.view(out.size(0), -1) 
        out = self.fc1(out)
        out = self.bno(out)
        out = self.dropo(out)
        out = self.aco(out)
        out = self.opl(out)
        return out
    
my_model = CNNmodel()
my_model.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(my_model.parameters(), lr = 0.05)

#Training the model
itr = 0
for epoch in range(epochs):
    for images, labels in train_loader:
        images.requires_grad_()
        labels.requires_grad_()
        images=images.cuda()
        labels=labels.cuda()
        
        my_model.train()
        optimizer.zero_grad()
        
        outputs = my_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        itr+=1
        if itr%500 == 0:
            correct = 0
            total = 0
            my_model.eval()
            
            for images, labels in test_loader:  
                images=images.cuda()
                outputs = my_model(images)
                _, predicted = torch.max(outputs.data, 1)
                
                total+=labels.size(0)
                correct+= (predicted.cpu() == labels.cpu()).sum()
    
            acc = 100*float(correct)/total
            print("Iter: {}, Loss: {:.4}, Accuracy: {:.4}".format(itr, loss, acc))    

my_model.eval()
correct = 0
total = 0
for images, labels in test_loader:  
    images=images.cuda()
    outputs = my_model(images)
    _, predicted = torch.max(outputs.data, 1)                
    total+=labels.size(0)
    correct+= (predicted.cpu() == labels.cpu()).sum()    
test_acc = 100*float(correct)/total
correct = 0
total = 0
for images, labels in train_loader:  
    images=images.cuda()
    outputs = my_model(images)
    _, predicted = torch.max(outputs.data, 1)                
    total+=labels.size(0)
    correct+= (predicted.cpu() == labels.cpu()).sum()  
train_acc = 100*float(correct)/total  

print("Training Set Accuracy: {:.4}, Test Set Accuracy: {:.4}".format(train_acc, test_acc))    
            

    

Iter: 500, Loss: 0.4102, Accuracy: 84.89
Iter: 1000, Loss: 0.43, Accuracy: 87.48
Iter: 1500, Loss: 0.3595, Accuracy: 88.81
Iter: 2000, Loss: 0.4474, Accuracy: 89.3
Iter: 2500, Loss: 0.4046, Accuracy: 90.13
Iter: 3000, Loss: 0.3612, Accuracy: 90.33
Iter: 3500, Loss: 0.2431, Accuracy: 90.71
Iter: 4000, Loss: 0.3065, Accuracy: 90.81
Iter: 4500, Loss: 0.3988, Accuracy: 91.29
Iter: 5000, Loss: 0.1271, Accuracy: 91.62
Iter: 5500, Loss: 0.183, Accuracy: 91.64
Iter: 6000, Loss: 0.147, Accuracy: 91.56
Iter: 6500, Loss: 0.341, Accuracy: 91.76
Iter: 7000, Loss: 0.264, Accuracy: 92.16
Iter: 7500, Loss: 0.1871, Accuracy: 91.7
Iter: 8000, Loss: 0.2085, Accuracy: 92.33
Iter: 8500, Loss: 0.1484, Accuracy: 92.05
Iter: 9000, Loss: 0.1545, Accuracy: 92.37
Iter: 9500, Loss: 0.2213, Accuracy: 92.34
Iter: 10000, Loss: 0.1257, Accuracy: 92.4
Iter: 10500, Loss: 0.1814, Accuracy: 92.41
Iter: 11000, Loss: 0.2549, Accuracy: 92.28
Iter: 11500, Loss: 0.1861, Accuracy: 92.39
Iter: 12000, Loss: 0.1386, Accuracy: 92.

In [4]:
torch.save(my_model,'cnn_93.2.pt')

  "type " + obj.__name__ + ". It won't be checked "
