In [18]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [3]:
device

device(type='cuda')

In [29]:
input_size = 784 # 28x28
num_classes = 10
epochs = 10
batch_size = 100
lr = 0.001

In [7]:
train_dataset = torchvision.datasets.MNIST(root="./data", train= True, 
                                           transform=transforms.ToTensor(),
                                          download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw



In [9]:
test_dataset = torchvision.datasets.MNIST(root="./data", train= False, 
                                           transform=transforms.ToTensor())

In [11]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size = batch_size,
                                         shuffle = False)
test_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size = batch_size,
                                         shuffle = False)

In [12]:
examples = iter(train_loader)

In [13]:
samples, labels = examples.next() #dataloader 1 batch

In [15]:
samples, labels

(tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         ...,
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ..

In [17]:
samples.shape #very important to understand the NN parameters

torch.Size([100, 1, 28, 28])

In [43]:
import torch.nn.functional as F

In [44]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
      
        self.conv1 = nn.Conv2d(1, 32, 3, padding = 1) #  Output channels: this is the number of channels in the output feature map (i.e., how many filters are being
        self.conv2 = nn.Conv2d(32, 64, 3, padding = 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 5, padding = 2)
        self.hidden = nn.Linear(128,64)
        self.output = nn.Linear(64, 10)
        # self.dropout = nn.Dropout(0.3)

        
    def forward(self, x):

        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))       
        x = F.relu(self.conv3(x)) 
        x = F.avg_pool2d(x, [x.size(2), x.size(3)], stride=1)
        x = x.reshape(x.shape[0],x.shape[1])
        x = self.hidden(x)
        # x = self.dropout(x)
        x = self.output(x)

        return x
        

In [45]:
model = CNN()
model.to(device)
model

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (hidden): Linear(in_features=128, out_features=64, bias=True)
  (output): Linear(in_features=64, out_features=10, bias=True)
)

In [46]:
criterion = nn.CrossEntropyLoss()
# Optimizer
optimizer = torch.optim.Adam(model.parameters(),lr=0.001) #remember to put the model parameters

In [47]:
#TRAINING

for epoch in range(epochs):
        for batch_num, (feats, labels) in enumerate(train_loader):
            avg_loss = 0.0
            feats, labels = feats.to(device), labels.to(device)
            
            #pred
            outputs = model(feats)
            loss = criterion(outputs, labels)
            
            #compute gradients
            loss.backward()
            #update weights
            optimizer.step()
            
            avg_loss += loss.item()
            #empty grads
            optimizer.zero_grad()
            
            if batch_num % 50 == 49:
                print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/50))

Epoch: 1	Batch: 50	Avg-Loss: 0.0364
Epoch: 1	Batch: 100	Avg-Loss: 0.0371
Epoch: 1	Batch: 150	Avg-Loss: 0.0267
Epoch: 1	Batch: 200	Avg-Loss: 0.0264
Epoch: 1	Batch: 250	Avg-Loss: 0.0165
Epoch: 1	Batch: 300	Avg-Loss: 0.0227
Epoch: 1	Batch: 350	Avg-Loss: 0.0151
Epoch: 1	Batch: 400	Avg-Loss: 0.0110
Epoch: 1	Batch: 450	Avg-Loss: 0.0098
Epoch: 1	Batch: 500	Avg-Loss: 0.0090
Epoch: 1	Batch: 550	Avg-Loss: 0.0115
Epoch: 1	Batch: 600	Avg-Loss: 0.0122
Epoch: 2	Batch: 50	Avg-Loss: 0.0069
Epoch: 2	Batch: 100	Avg-Loss: 0.0031
Epoch: 2	Batch: 150	Avg-Loss: 0.0046
Epoch: 2	Batch: 200	Avg-Loss: 0.0060
Epoch: 2	Batch: 250	Avg-Loss: 0.0039
Epoch: 2	Batch: 300	Avg-Loss: 0.0069
Epoch: 2	Batch: 350	Avg-Loss: 0.0053
Epoch: 2	Batch: 400	Avg-Loss: 0.0030
Epoch: 2	Batch: 450	Avg-Loss: 0.0032
Epoch: 2	Batch: 500	Avg-Loss: 0.0049
Epoch: 2	Batch: 550	Avg-Loss: 0.0059
Epoch: 2	Batch: 600	Avg-Loss: 0.0066
Epoch: 3	Batch: 50	Avg-Loss: 0.0041
Epoch: 3	Batch: 100	Avg-Loss: 0.0014
Epoch: 3	Batch: 150	Avg-Loss: 0.0035
Epoc

In [49]:
import numpy as np

In [50]:
#EVALUATION

def test_classify(model, test_loader):
model.eval()
test_loss = []
accuracy = 0
total = 0


for batch_num, (feats, labels) in enumerate(test_loader):
    feats, labels = feats.to(device), labels.to(device)
    outputs = model(feats)

    _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
    pred_labels = pred_labels.view(-1)

    loss = criterion(outputs, labels.long())

    accuracy += torch.sum(torch.eq(pred_labels, labels)).item()
    total += len(labels)
    test_loss.extend([loss.item()]*feats.size()[0])


np.mean(test_loss), accuracy/total

(0.04601605488734398, 0.98575)