In [1]:
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import math

In [2]:
import numpy as np

In [3]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f9e6c66aab0>

In [4]:
device=torch.device(0)
print(device)

# device='cuda'

cuda:0


## Data loading

In [15]:
# https://medium.com/@nutanbhogendrasharma/pytorch-convolutional-neural-network-with-mnist-dataset-4e8a4265e118
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [48]:
train_data.data.shape

torch.Size([18623, 28, 28])

In [None]:
train_data.data[0].dtype

torch.uint8

In [16]:
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

In [17]:
train_idx = (train_data.targets==0) | (train_data.targets==1) | (train_data.targets==2)
test_idx = (test_data.targets==0) | (test_data.targets==1) | (test_data.targets==2)

train_data.data = train_data.data[train_idx]
train_data.targets = train_data.targets[train_idx]
test_data.data = test_data.data[test_idx]
test_data.targets = test_data.targets[test_idx]

In [18]:
from torch.utils.data import DataLoader
loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),

    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=0),
}
loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7f9e61ff37f0>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7f9e61ff3ca0>}

In [38]:
sample = next(iter(loaders['train']))
imgs, lbls = sample
imgs.shape

torch.Size([100, 1, 28, 28])

## GenerateCNN class and test cases

In [63]:
# TODO: Account for PADDING -> assuming 0 for now
# TODO: Make work for list params for image size, filter size etc (i.e. non square inputs) -> assuming square inputs

class GenerateCNN(nn.Module):
    def __init__(self, network_tuples, n_init_channels, n_classes=3):
      '''
      network_tuples: list of tuples defining network. Params:
        layer_type,        # String -- conv, pool, fc, softmax
        layer_depth,       # Current depth of network
        filter_depth,      # Used for conv, 0 when not conv
        filter_size,       # Used for conv and pool, 0 otherwise
        stride,            # Used for conv and pool, 0 otherwise
        image_size,        # Used for any layer that maintains square input (conv and pool), 0 otherwise
        fc_size,           # Used for fc and softmax -- number of neurons in layer
        terminate,
        state_list 
      n_init_channels: no of channels in dataset images
      '''
      super(GenerateCNN, self).__init__()
      self.layers=nn.ModuleList()

      in_filter=None
      running_image_size=None
      firstFC=True
      running_fc_size=None

      for i,state in enumerate(network_tuples):
        
        layer_type, layer_depth, filter_depth, filter_size, stride, image_size, fc_size, terminate, state_list = state
        
        # print(running_image_size, in_filter)
        if layer_type=='conv':
          assert filter_depth != 0, "filter_depth to conv layer must be non-zero"

          if i==0:
            self.layers.append(nn.Conv2d(n_init_channels,filter_depth, filter_size, stride))
            running_image_size=image_size
          else:
            self.layers.append(nn.Conv2d(in_filter,filter_depth, filter_size, stride))
          in_filter=filter_depth
          running_image_size=self._calc_new_image_size(running_image_size, filter_size, stride)


        elif layer_type=='pool':
          if i==0:
            # self.layers.append(nn.MaxPool2d(filter_size, stride))
            running_image_size=image_size
            in_filter=n_init_channels

          self.layers.append(nn.MaxPool2d(filter_size, stride))
          running_image_size=self._calc_new_image_size(running_image_size, filter_size, stride)


        elif layer_type=='fc':

          # assert image_size == 0, "Image size to fc layer must be 0, is {}".format(image_size)
          if firstFC:
            firstFC=False
            image_size_unroll=running_image_size**2 # NOTE: assumption of only square images
            image_size_unroll*=in_filter
            # print('Unrolled img size:',image_size_unroll)
            # self.layers.append(nn.Flatten())
            self.layers.append(nn.Linear(image_size_unroll,fc_size))
            
          else:
            self.layers.append(nn.Linear(running_fc_size,fc_size))
          running_fc_size=fc_size
      
      # get the last layer and check
      # if lastlayer == FC AND has 3 out: Do nothing
      # if lastlayer == FC But not 3 out: Add FC with 3 Out
      # if lastlayer != FC -> Flatten -> Add FC with 3 Out
      lastlayer=self.layers[-1]
      if (isinstance(lastlayer, nn.Linear) and lastlayer.out_features!=n_classes):
          print(lastlayer.out_features)
          # self.layers.append(nn.Flatten())
          self.layers.append(nn.Linear(lastlayer.out_features, n_classes))
          
      elif isinstance(lastlayer, nn.Conv2d) or isinstance(lastlayer, nn.MaxPool2d):
          self.layers.append(nn.Flatten())
          image_size_unroll=running_image_size**2 # NOTE: assumption of only square images
          image_size_unroll*=in_filter
          # print('Unrolled img size:',image_size_unroll)

          self.layers.append(nn.Linear(image_size_unroll,n_classes))         
        
        


      # https://github.com/bowenbaker/metaqnn/blob/a25847f635e9545455f83405453e740646038f7a/libs/grammar/state_enumerator.py#L207
    def _calc_new_image_size(self, image_size, filter_size, stride):
      '''Returns new image size given previous image size and filter parameters'''
      new_size = int(math.ceil(float(image_size - filter_size + 1) / float(stride)))
      return new_size

    def forward(self, x):
      for layer in self.layers:
        if isinstance(layer, nn.Conv2d):
          layer_type='conv'
        elif isinstance(layer, nn.MaxPool2d):
          layer_type='mp'
        elif isinstance(layer, nn.Linear):
          x=x.view(x.shape[0],-1) #flatten before a linear layer
        x=layer(x)
        # print(layer_type, x.shape)
      
      return x


### 1. (conv>pool>conv>pool>fc>fc)

In [None]:
#testcase 1: last layer correct
model = GenerateCNN([('conv',1,5,3,1,28,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('conv',1,10,3,1,26,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('fc',0,0,0,0,0,100,0,[]),('fc',0,0,0,0,0,10,0,[])], 1, n_classes=10)
print(model)
model.to(device)

### 2. Last layer FC out_features != n_classes

In [9]:
#test case 2: last layer fc, incorrect
model = GenerateCNN([('conv',1,5,3,1,28,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('conv',1,10,3,1,26,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('fc',0,0,0,0,0,100,0,[])], 1, n_classes=10)
print(model)
model.to(device)

100
GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Linear(in_features=250, out_features=100, bias=True)
    (5): Linear(in_features=100, out_features=10, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Linear(in_features=250, out_features=100, bias=True)
    (5): Linear(in_features=100, out_features=10, bias=True)
  )
)

### 3. Last layer Conv

In [10]:
#test case 3: last layer conv
model = GenerateCNN([('conv',1,5,3,1,28,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('conv',1,10,3,1,26,0,0,[])], 1, n_classes=3)
print(model)
model.to(device)

GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): Flatten(start_dim=1, end_dim=-1)
    (4): Linear(in_features=1210, out_features=3, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): Flatten(start_dim=1, end_dim=-1)
    (4): Linear(in_features=1210, out_features=3, bias=True)
  )
)

### 4. Last layer pool

In [11]:
#test case 4: last layer pool
model = GenerateCNN([('conv',1,5,3,1,28,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('conv',1,10,3,1,26,0,0,[]),('pool',1,0,2,2,28,0,0,[])], 1)
print(model)
model.to(device)

GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): Linear(in_features=250, out_features=3, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): Linear(in_features=250, out_features=3, bias=True)
  )
)

### 5. pool stride 3

In [29]:
#test case 5: diff stride
model = GenerateCNN([('conv',1,5,3,2,28,0,0,[]),('pool',1,0,2,3,28,0,0,[]),('conv',1,10,3,1,26,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('fc',0,0,0,0,0,100,0,[]),('fc',0,0,0,0,0,3,0,[])], 1)
print(model)
model.to(device)

GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Linear(in_features=10, out_features=100, bias=True)
    (5): Linear(in_features=100, out_features=3, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Linear(in_features=10, out_features=100, bias=True)
    (5): Linear(in_features=100, out_features=3, bias=True)
  )
)

### 6. consecutive pools

In [37]:
#test case 6: pool after pool
model = GenerateCNN([('conv',1,5,3,2,28,0,0,[]),('pool',1,0,2,3,28,0,0,[]),('pool',1,0,2,2,28,0,0,[]),('fc',0,0,0,0,0,100,0,[]),('fc',0,0,0,0,0,3,0,[])], 1)
print(model)
model.to(device)

GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Linear(in_features=20, out_features=100, bias=True)
    (4): Linear(in_features=100, out_features=3, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Linear(in_features=20, out_features=100, bias=True)
    (4): Linear(in_features=100, out_features=3, bias=True)
  )
)

### 7. single conv

In [87]:
#test case 7
model = GenerateCNN([('conv',1,20,3,2,28,0,0,[])], 1)
print(model)
model.to(device)

GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 20, kernel_size=(3, 3), stride=(2, 2))
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): Linear(in_features=3380, out_features=3, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 20, kernel_size=(3, 3), stride=(2, 2))
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): Linear(in_features=3380, out_features=3, bias=True)
  )
)

### 8. Single pool

In [65]:
#test case 8
model = GenerateCNN([('pool',1,0,3,3,28,0,0,[])], 1)
print(model)
model.to(device)

GenerateCNN(
  (layers): ModuleList(
    (0): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): Linear(in_features=81, out_features=3, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): Linear(in_features=81, out_features=3, bias=True)
  )
)

### 9. ('conv',1,20,3,2,28,0,0,[]), ('fc',0,0,0,0,0,512,0,[])

In [98]:
#test case 7
model = GenerateCNN([('conv',1,20,3,2,28,0,0,[]), ('fc',0,0,0,0,0,512,0,[])], 1)
print(model)
model.to(device)

512
GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 20, kernel_size=(3, 3), stride=(2, 2))
    (1): Linear(in_features=3380, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=3, bias=True)
  )
)


GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 20, kernel_size=(3, 3), stride=(2, 2))
    (1): Linear(in_features=3380, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=3, bias=True)
  )
)

## Testing on 10 imgs

In [99]:
actual_number = lbls[:10].numpy()
actual_number

array([1, 0, 2, 0, 1, 0, 2, 0, 1, 2])

In [100]:
# imgs[:1]
test_output = model(imgs[:10].to(device))
pred_y = torch.max(test_output.cpu(), 1)[1].data.numpy().squeeze()
# print(f'Prediction number: {pred_y}')
# print(f'Actual number: {actual_number}')

In [101]:
test_output

tensor([[-0.0139, -0.0496, -0.1107],
        [-0.0263, -0.0539, -0.0559],
        [-0.0463,  0.0517, -0.0119],
        [-0.0457, -0.1892, -0.0524],
        [-0.0638,  0.0604, -0.0582],
        [-0.0226, -0.0927, -0.0198],
        [-0.0602,  0.0359, -0.0627],
        [-0.0259, -0.0839, -0.0497],
        [-0.0331,  0.0801, -0.0812],
        [-0.0796, -0.0460, -0.0515]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [102]:
pred_y

array([0, 0, 1, 0, 1, 2, 1, 0, 1, 1])

In [103]:
print(f'Prediction number: {pred_y}')
print(f'Actual number: {actual_number}')

Prediction number: [0 0 1 0 1 2 1 0 1 1]
Actual number: [1 0 2 0 1 0 2 0 1 2]


## Testing on MNIST 3 classes

In [104]:
loss_func = nn.CrossEntropyLoss()   
loss_func

CrossEntropyLoss()

In [105]:
from torch import optim
optimizer = optim.Adam(model.parameters(), lr = 0.01)   
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)

In [106]:
from torch.autograd import Variable
num_epochs = 1
def train(num_epochs, cnn, loaders):
    
    cnn.train()
        
    # Train the model
    total_step = len(loaders['train'])
        
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images).to(device)   # batch x
            b_y = Variable(labels).to(device)   # batch y
            # print('y batch shape,',b_y.shape)

            output = cnn(b_x)

            loss = loss_func(output, b_y)
            
            # clear gradients for this training step   
            optimizer.zero_grad()           
            
            # backpropagation, compute gradients 
            loss.backward()    
            # apply gradients             
            optimizer.step()                
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
                pass
        
        pass
    
    
    pass
train(num_epochs, model, loaders)

Epoch [1/1], Step [100/187], Loss: 0.0171


In [107]:
def test():
    # Test the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        accuracy=0
        for images, labels in loaders['test']:
            images=images.to(device)
            labels=labels.to(device)
            test_output= model(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy += (pred_y == labels).sum().item()
            total+=float(labels.size(0))
            
    print('Test Accuracy of the model on the 10000 test images: %.4f' % (accuracy/total))
    return accuracy/total
test()

Test Accuracy of the model on the 10000 test images: 0.9838


0.9837940896091516

In [108]:
print(model)

GenerateCNN(
  (layers): ModuleList(
    (0): Conv2d(1, 20, kernel_size=(3, 3), stride=(2, 2))
    (1): Linear(in_features=3380, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=3, bias=True)
  )
)
