<a href="https://colab.research.google.com/github/ajuhz/Artificial-Intelligence/blob/master/Convolutional_Neural_Network_using_pytorch_Character_Recognition_Problem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Importing all necessary libraries
import torch
import torch.nn as nn # because we will use module package to define the CNN
import torchvision.transforms as transforms # because we will use to_tensor to convert our image to torch tensor
import torchvision.datasets as datasets  # we will use MNIST dataset
from torch.autograd import Variable  # we will wrap dataset values / not required from Pytorch version 0.4
import torch.utils.data as data # to user DataLoader utility to make dataset iterable

In [3]:
torch.cuda.is_available()

True

In [4]:
#Donloading the test and train datasets from pytorch library
#converting it to tensor and normaizing it
mean_gray = 0.1307  # ??
stddev_gray = 0.3081 # ??

#Transform the images to tensors
#Normalize a tensor image with mean and standard deviation. Given mean: (M1,...,Mn) and std: (S1,..,Sn) 
#for n channels, this transform will normalize each channel of the input torch.Tensor
#i.e. input[channel] = (input[channel] - mean[channel]) / std[channel]

# Compose :Composes several transforms together.
# Normalize : we need to pass mean and std for each channel, 
#as we are using grey scale image so we are passing here only one value for mean and std
transforms = transforms.Compose([transforms.ToTensor (),transforms.Normalize((mean_gray,), (stddev_gray,))])

#load datasets
train_dataset = datasets.MNIST(root='./',
                               train=True,
                               download=True,
                               transform=transforms
                               )
test_dataset = datasets.MNIST(root='./',
                               train=False,
                               download=True,
                               transform=transforms
                               )

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw
Processing...
Done!







In [5]:
# Make the dataset iterable
batch_size = 100

train_load=data.DataLoader(dataset=train_dataset,
                           batch_size=batch_size,
                           shuffle=True)
test_load=data.DataLoader(dataset=test_dataset,
                           batch_size=batch_size,
                           shuffle=False)

In [6]:
print('There are {} images in the training set'.format(len(train_dataset)))
print('There are {} images in the test set'.format(len(test_dataset)))
print('There are {} batches in the train loader'.format(len(train_load)))
print('There are {} batches in the testloader'.format(len(test_load)))

There are 60000 images in the training set
There are 10000 images in the test set
There are 600 batches in the train loader
There are 100 batches in the testloader


In [7]:
# Defining the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        #Same Padding = [(filter size - 1) / 2] (Same Padding--> input size = output size)
        # How we are deriving that 8 filters needed/ filter size should be 3x3 ??
        # for grey scale image channel is 1 and for RGB 3
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3,stride=1, padding=1)
        #The output size of each of the 8 feature maps is 
        #[(input_size - filter_size + 2(padding) / stride) +1] --> [(28-3+2(1)/1)+1] = 28 (padding type is same)
        #Batch normalization
        self.batchnorm1 = nn.BatchNorm2d(8) # passing no of filters
        #RELU
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2)
         #After max pooling, the output of each feature map is now 28/2 = 14
        self.cnn2 = nn.Conv2d(in_channels=8, out_channels=32, kernel_size=5,stride=1, padding=2)
        self.batchnorm2 = nn.BatchNorm2d(32)
        #After max pooling, the output of each feature map is 14/2 = 7
        #Flatten the feature maps. You have 32 feature maps, each of them is of size 7x7 --> 32*7*7 = 1568
        self.fc1 = nn.Linear(in_features=1568, out_features=600)
        self.droput = nn.Dropout(p=0.5) #50% neurons will be dropped during training phase
        self.fc2 = nn.Linear(in_features=600, out_features=10)
        
    def forward(self,x):
        out=self.cnn1(x)
        out=self.batchnorm1(out)
        out=self.relu(out)
        out=self.maxpool(out)
        out=self.cnn2(out)
        out=self.batchnorm2(out)
        out=self.relu(out)
        out=self.maxpool(out)
        #Now we have to flatten the output. This is where we apply the feed forward neural network as learned before! 
        #It will take the shape (batch_size, 1568) = (100, 1568)
        out=out.view(-1,1568)
        out=self.fc1(out)
        out=self.relu(out)
        out=self.droput(out)
        out=self.fc2(out)
        return out        

In [8]:
model=CNN()
CUDA = torch.cuda.is_available()
if CUDA:
    model = model.cuda()
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=0.01)

In [9]:
#Understand what's happening
iteration = 0
correct = 0

for i,(inputs,labels) in enumerate (train_load):

        
    print("For one iteration, this is what happens:")
    print("Input Shape:",inputs.shape)
    print("Labels Shape:",labels.shape)
    output = model(inputs)
    print("Outputs Shape",output.shape)
    _, predicted = torch.max(output, 1)
    print("Predicted Shape",predicted.shape)
    print("Predicted Tensor:")
    print(predicted)
    correct += (predicted == labels).sum()
    break

For one iteration, this is what happens:
Input Shape: torch.Size([100, 1, 28, 28])
Labels Shape: torch.Size([100])


RuntimeError: ignored

In [10]:
# Training the model 
num_epochs = 10

#list to hold loss and accuracy 

train_loss=[]
train_accuracy=[]
test_loss=[]
test_accuracy=[]

for epoch in range(num_epochs):
    #Reset these below variables to 0 at the begining of every epoch
    correct = 0
    iterations = 0
    iter_loss = 0.0
    model.train()
    for i,(inputs,labels) in enumerate(train_load):
        if CUDA:
            inputs = inputs.cuda()
            labels = labels.cuda()
        outputs=model(inputs)
        loss=loss_fn(outputs,labels)
        iter_loss+=loss.item()
        optimizer.zero_grad()            # Clear off the gradient in (w = w - gradient)
        loss.backward()                 # Backpropagation 
        optimizer.step()                # Update the weights
        
        #record the correct prediction 
        _,predicted=torch.max(outputs,1)
        correct += (predicted==labels).sum()
        iterations+=1
        # Record the training loss
    train_loss.append(iter_loss/iterations)
    # Record the training accuracy
    train_accuracy.append((100 * correct / len(train_dataset)))
    print ('Epoch {}/{}, Training Loss: {:.3f}, Training Accuracy: {:.3f}'
           .format(epoch+1, num_epochs, train_loss[-1], train_accuracy[-1]))



Epoch 1/10, Training Loss: 0.495, Training Accuracy: 86.000
Epoch 2/10, Training Loss: 0.161, Training Accuracy: 95.000
Epoch 3/10, Training Loss: 0.117, Training Accuracy: 96.000
Epoch 4/10, Training Loss: 0.095, Training Accuracy: 97.000
Epoch 5/10, Training Loss: 0.081, Training Accuracy: 97.000
Epoch 6/10, Training Loss: 0.070, Training Accuracy: 97.000
Epoch 7/10, Training Loss: 0.064, Training Accuracy: 98.000
Epoch 8/10, Training Loss: 0.058, Training Accuracy: 98.000
Epoch 9/10, Training Loss: 0.054, Training Accuracy: 98.000
Epoch 10/10, Training Loss: 0.051, Training Accuracy: 98.000


In [11]:
#Testing the Model
for epoch in range(5):
    correct = 0
    iterations = 0
    testing_loss = 0.0
    model.eval()
    for i, (inputs, labels) in enumerate(test_load):
        if CUDA:
            inputs = inputs.cuda()
            labels = labels.cuda()
        outputs=model(inputs)
        loss=loss_fn(outputs,labels)
        testing_loss += loss.item()
        _,predicted=torch.max(outputs,1)
        correct+=(predicted==labels).sum()
        iterations+=1
    test_loss.append(testing_loss/iterations)
    test_accuracy.append((100 * correct / len(test_dataset)))
    print('Epoch : {}/{}, Test Loss {:.3f}, Test Accuracy {:.3f}'.format(epoch+1,num_epochs,test_loss[-1],test_accuracy[-1]))
        

Epoch : 1/10, Test Loss 0.039, Test Accuracy 98.000
Epoch : 2/10, Test Loss 0.039, Test Accuracy 98.000
Epoch : 3/10, Test Loss 0.039, Test Accuracy 98.000
Epoch : 4/10, Test Loss 0.039, Test Accuracy 98.000
Epoch : 5/10, Test Loss 0.039, Test Accuracy 98.000


In [12]:
len(test_dataset)

10000

AttributeError: ignored