In [9]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms

In [None]:
mean_gray = 0.1307
stddev_gray = 0.3081

# Data normalization
# input[channel] = (input[channel] - mean[channel]) / std[channel]

trs = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean_gray,), (stddev_gray,))
])

train_dataset = datasets.MNIST( root = './data', train = True, transform = trs, download=True)
test_dataset = datasets.MNIST( root = './data', train = False, transform = trs)

In [None]:
# Check a random image
import matplotlib.pyplot as plt
import numpy
random_img = train_dataset[20][0].numpy()*stddev_gray + mean_gray #Denormalize
plt.imshow(random_img.reshape(28,28), cmap = 'gray')
print(train_dataset[20][1])

In [16]:
# Dataloader
import torch.utils
import torch.utils.data


batch_size = 100
train_load = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_load = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [27]:
# Create CNN
# CMNN1: 8 feature maps, 3x3 filters --> Max Pooling (feature maps size /2) --> CNN2: 32 feature maps --> Max Pooling --> FC1 600 output neurons --> FC2 Classifcation layer (10 classes)

class CNN(nn.Module):
    def __init__(self):
        super( CNN, self ).__init__()
        #**** CNN1
        #grayscale images have 1 channel, kernel -> filter size, padding --> same padding of input_size == output_size (filter_size-1)/2
        #output_size of each of the eight feature maps= [(input_size - filter_size +2*padding)/stride+1 ]= (28-3+2)/1+1 =28
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding =1) 
        #batch normalization
        self.batchnorm1 = nn.BatchNorm2d(8)
        self.relu = nn.ReLU()
        #max pooling
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        #output size = 28/2 =14
        #**** CNN2
        #same padding = (filetr_size-1) / 2 = (5-1)/2 =2
        self.cnn2 = nn.Conv2d(in_channels=8, out_channels=32, kernel_size=5, stride=1, padding =2) 
        #output size of each of 32 feature maps [(14-5+2*2)/1+1] = 14
        self.batchnorm2 = nn.BatchNorm2d(32)
        #Linear layers
        #Flatten 32 feature maps, each feature map is of size 7*7 : 7*7*32 = 1568
        self.fc1 = nn.Linear(1568, 600)
        self.dropout = nn.Dropout(p = 0.5) # 0.5->> drop the neurons by 50%
        self.fc2 = nn.Linear(600,10) #classification layer

    
    def forward(self,x):
        out = self.cnn1(x)
        out = self.batchnorm1(out)
        out = self.relu(out)
        out = self.maxpool(out)
        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu(out)
        out = self.maxpool(out)

        #Flatten the 32 feature maps from MaxPooling to feed it to the FC1 layer
        #out = self.view(batch_size, 1568)
        out = out.view(-1, 1568)

        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)

        return out




In [28]:
#Defining the model
model = CNN()
CUDA = torch.cuda.is_available() # check for GPU

if CUDA:
    model.cuda()

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.01)


In [32]:
#Train the CNN

num_epochs =10
train_loss =[]
train_accuracy = []
test_loss =[]
test_accuracy = []

for epoch in range(num_epochs):
    correct = 0
    iterations = 0
    iter_loss = 0
    model.train()

    for i,(inputs,labels) in enumerate(train_load):

        if CUDA:
            inputs = inputs.cuda()
            labels = labels.cuda()

        outputs = model(inputs) #forward propagation
        loss = loss_func(outputs,labels)
        iter_loss += loss.item() #extract the value from the tensor

        #clear the gradience
        optimizer.zero_grad() # w <-- w -lr*gradient

        #back propagation
        loss.backward()
        #update the weights
        optimizer.step()

        #calculate accuracy
        _, prediccted = torch.max(outputs,1)
        correct += (prediccted == labels).sum().item()
        iterations +=1
    
    train_loss.append(iter_loss/iterations)
    train_accuracy.append(correct/ len(train_dataset))

    #Testing phase for every epoch
    testing_loss = 0.0
    correct = 0
    iterations = 0

    model.eval()

    for i,(inputs,labels) in enumerate(test_load):

        if CUDA:
            inputs = inputs.cuda()
            labels = labels.cuda()

        outputs = model(inputs) #forward propagation
        loss = loss_func(outputs,labels)
        testing_loss += loss.item() #extract the value from the tensor

        #calculate accuracy
        _, prediccted = torch.max(outputs,1)
        correct += (prediccted == labels).sum().item()
        iterations +=1    


    test_loss.append(iter_loss/iterations)
    test_accuracy.append(correct/ len(test_dataset))


    print("Epoch{}/{}, Training Loss: {:.3f}, Testing Loss: {:.3f}, Train Accuracy: {:.3f}, Test Accuracy: {:.3f}  ". format(
        epoch+1, num_epochs, train_loss[-1], test_loss[-1], train_accuracy[-1],test_accuracy[-1]))

Epoch1/10, Training Loss: 0.137, Testing Loss: 0.824, Train Accuracy: 0.961, Test Accuracy: 0.984  
Epoch2/10, Training Loss: 0.094, Testing Loss: 0.564, Train Accuracy: 0.972, Test Accuracy: 0.985  
Epoch3/10, Training Loss: 0.074, Testing Loss: 0.444, Train Accuracy: 0.980, Test Accuracy: 0.988  
Epoch4/10, Training Loss: 0.065, Testing Loss: 0.387, Train Accuracy: 0.982, Test Accuracy: 0.986  
Epoch5/10, Training Loss: 0.065, Testing Loss: 0.389, Train Accuracy: 0.982, Test Accuracy: 0.989  
Epoch6/10, Training Loss: 0.059, Testing Loss: 0.356, Train Accuracy: 0.983, Test Accuracy: 0.985  
Epoch7/10, Training Loss: 0.055, Testing Loss: 0.332, Train Accuracy: 0.984, Test Accuracy: 0.989  
Epoch8/10, Training Loss: 0.052, Testing Loss: 0.315, Train Accuracy: 0.985, Test Accuracy: 0.989  
Epoch9/10, Training Loss: 0.055, Testing Loss: 0.330, Train Accuracy: 0.985, Test Accuracy: 0.989  
Epoch10/10, Training Loss: 0.050, Testing Loss: 0.300, Train Accuracy: 0.985, Test Accuracy: 0.989  

In [None]:
# plot the loss and accuracy curve
f = plt.figure( figsize=(10,10))
plt.plot(train_loss, label = "Train loss")
plt.plot(test_loss, label = "Test loss")
plt.legend()
plt.show()

f = plt.figure( figsize=(10,10))
plt.plot(train_accuracy, label = "Train accuracy")
plt.plot(test_accuracy, label = "Test accuracy")
plt.legend()
plt.show()

In [35]:
# Prediction
img = test_dataset[30][0].resize_((1,1,28,28)) #(batch_size,num_channels,height,width)
label = test_dataset[30][1]
model.eval()

if CUDA:
    model.cuda()
    img = img.cuda()


outputs = model(img)
_, predicted = torch.max(outputs,1)
print("Prediction is : {}".format(predicted.item()))
print("Actual is : {}".format(label))


Prediction is : 3
Actual is : 3


In [36]:
# Prediction on my own image
import cv2
from torch.autograd import Variable


def predict(image_name, model):
    image = cv2.imread(image_name, 0) # 0 transform to gray scale
    ret, thresholded = cv2.threshold(image,127,255,cv2.THRESH_BINARY) #black background and white writting, whatever pixel are over 127 will be equal to 255 and pixels below 127 will be 0
    img = 255 - thresholded #background -> write , invert the image
    cv2.imshow("Original", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    img = Image.formarray(img)
    img = trs(img)
    img = img.view(1,1,28,28) #resize the image to the expected size of cnn
    img = Variable(img) #wrap image to a variable

    model.eval()

    if CUDA:
        model = model.cuda()
        img = img.cuda()

    output = model(img)
    _, predicted = torch.max(output,1)
    return predicted.item()

image_name = test_dataset[30][0].resize_((1,1,28,28))
pred = predict(image_name, model)
print(pred)


ModuleNotFoundError: No module named 'cv2'