<a href="https://colab.research.google.com/github/We1rdguy/MNIST/blob/master/CNN_with_Pytorch_for_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np # to handle matrix and data operation
import pandas as pd # to read csv and handle dataframe
import os


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable
from torchsummary import summary

from sklearn.model_selection import train_test_split

In [0]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
from sklearn.metrics import confusion_matrix
%matplotlib inline

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:

dataset_path="gdrive/My Drive/Dataset/MNIST"
os.chdir(dataset_path)

In [4]:
! ls

digit-recognizer.zip  model.pth		     test.csv
graph.png	      sample_submission.csv  train.csv


In [6]:
df = pd.read_csv("train.csv")
print(df.shape)

(42000, 785)


In [0]:
y = df['label'].values
X = df.drop(['label'],1).values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

In [8]:
print(y_test.shape)

(6300,)


In [0]:
BATCH_SIZE = 32

torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) # data type is long



torch_X_train = torch_X_train.view(-1, 1,28,28).float().cuda()
torch_X_test = torch_X_test.view(-1,1,28,28).float().cuda()

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

In [10]:
print(torch_X_train.shape)
print(torch_X_test.shape)

torch.Size([35700, 1, 28, 28])
torch.Size([6300, 1, 28, 28])


In [11]:

class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
    self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
    self.conv3 = nn.Conv2d(32,64, kernel_size=5)
    self.fc1 = nn.Linear(3*3*64, 256)
    self.fc2 = nn.Linear(256, 10)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.dropout(x, p=0.5, training=self.training)
    x = F.relu(F.max_pool2d(self.conv2(x), 2))
    x = F.dropout(x, p=0.5, training=self.training)
    x = F.relu(F.max_pool2d(self.conv3(x),2))
    x = F.dropout(x, p=0.5, training=self.training)
    x = x.view(-1,3*3*64 )
    x = F.relu(self.fc1(x))
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    return F.log_softmax(x, dim=1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

summary(model, (1, 28, 28))
optimizer = torch.optim.Adam(model.parameters())#,lr=0.001, betas=(0.9,0.999))

# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])      

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 24, 24]             832
            Conv2d-2           [-1, 32, 20, 20]          25,632
            Conv2d-3             [-1, 64, 6, 6]          51,264
            Linear-4                  [-1, 256]         147,712
            Linear-5                   [-1, 10]           2,570
Total params: 228,010
Trainable params: 228,010
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.26
Params size (MB): 0.87
Estimated Total Size (MB): 1.13
----------------------------------------------------------------
Model's state_dict:
conv1.weight 	 torch.Size([32, 1, 5, 5])
conv1.bias 	 torch.Size([32])
conv2.weight 	 torch.Size([32, 32, 5, 5])
conv2.bias 	 torch.Size([32])
conv3.weight 	 torch.Size([64, 32, 5, 5])
conv3.bias 	 torch.Size([64])

In [12]:
it = iter(train_loader)
X_batch, y_batch = next(it)
print(model.forward(X_batch).shape)

torch.Size([32, 10])


In [0]:
def fit(model, train_loader):
    
    error = nn.CrossEntropyLoss()
    EPOCHS = 50
    model.train()
    for epoch in range(EPOCHS):
      correct = 0
      for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
        var_X_batch = Variable(X_batch).float()
        var_y_batch = Variable(y_batch).cuda()
        optimizer.zero_grad()
        output = model(var_X_batch)
        loss = error(output, var_y_batch)
        loss.backward()
        optimizer.step()
        
        # Total correct predictions
        predicted = torch.max(output.data, 1)[1] 
        correct += (predicted == var_y_batch).sum().cuda()
        
        #print(correct)
        if batch_idx % 500 == 0:
          print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
              epoch, 
              batch_idx*len(X_batch), 
              len(train_loader.dataset), 
              100.*batch_idx / len(train_loader), 
              loss.data, 
              float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))

In [0]:
fit(model,train_loader)



In [14]:
PATH=dataset_path+"/model.pth"
print(PATH)
# torch.save(model.state_dict(), "model.pth")

gdrive/My Drive/Dataset/MNIST/model.pth


In [15]:


model = CNN()
model.load_state_dict(torch.load( "model.pth"))
model.eval()


CNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [0]:
def evaluate(model):
  correct = 0
  correct=correct
  for test_imgs, test_labels in test_loader:
    #print(test_imgs.shape)
    test_imgs = Variable(test_imgs).float().cpu()
    output = model(test_imgs)
    predicted = torch.max(output,1)[1]
    correct += (predicted == test_labels).sum()
    
  print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))

  


In [23]:
evaluate(model)

Test accuracy:0.986% 
