In [1]:
import numpy as np
import torch.nn.functional as F
import torch
import torch.nn as nn
from torchsummary import summary

In [2]:
# for now just generate some random data, don't have access to kaggle at work
CLASSES = ['fine_concrete', 'concrete', 'soft_tiles', 'tiled', 'soft_pvc',
           'hard_tiles_large_space', 'carpet', 'hard_tiles', 'wood']
y_train = np.random.choice(CLASSES, size=(3810,), replace=True)
x_train = np.random.rand(3810,10,128)

In [79]:
class ShallowCNN(nn.Module):
    def __init__(self):
        super(ShallowCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=40, kernel_size=(1,10))
        self.conv2 = nn.Conv2d(in_channels=40, out_channels=40, kernel_size=(10,1))
        self.conv2_bn = nn.BatchNorm2d(40)
        self.max_pool = nn.MaxPool2d(kernel_size=(1,25), stride=(1,5))
        self.linear = nn.Linear(in_features=40*19, out_features=10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.elu(self.conv2_bn(x))
        x = x.view((-1,40,119))
        x = self.max_pool(x)
        x = x.view((x.size(0), -1))
        x = self.linear(x)
        return x

In [None]:
'''
class ShallowCNN(nn.Module):
    def __init__(self, n_conv_channels=40, 
                 n_signal_channels=10, 
                 pool_size=25, 
                 pool_stride=15,
                 n_out=10):
        
        super(ShallowCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, 
                               out_channels=n_conv_channels, 
                               kernel_size=(1,n_signal_channels))
        
        self.conv2 = nn.Conv2d(in_channels=n_conv_channels, 
                               out_channels=n_conv_channels, 
                               kernel_size=(n_signal_channels,1))
        
        self.conv2_bn = nn.BatchNorm2d(n_signal_channels)
        
        self.max_pool = nn.MaxPool2d(kernel_size=(1,pool_size), stride=(1,pool_stride))
        # Need to determine how to calculate 7
        self.linear = nn.Linear(in_features=40*7, out_features=n_out)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.elu(self.conv2_bn(x))
        # need to determine how to calculate this size
        x = x.view((-1,40,119))
        x = self.max_pool(x)
        x = x.view((x.size(0), -1))
        x = self.linear(x)
        return x
'''

In [80]:
foo = ShallowCNN()
foo.cuda()

ShallowCNN(
  (conv1): Conv2d(1, 40, kernel_size=(1, 10), stride=(1, 1))
  (conv2): Conv2d(40, 40, kernel_size=(10, 1), stride=(1, 1))
  (conv2_bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool): MaxPool2d(kernel_size=(1, 25), stride=(1, 5), padding=0, dilation=1, ceil_mode=False)
  (linear): Linear(in_features=760, out_features=10, bias=True)
)

In [81]:
summary(foo, input_size=x_train[0:1].shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 40, 10, 119]             440
            Conv2d-2           [-1, 40, 1, 119]          16,040
       BatchNorm2d-3           [-1, 40, 1, 119]              80
         MaxPool2d-4               [-1, 40, 19]               0
            Linear-5                   [-1, 10]           7,610
Total params: 24,170
Trainable params: 24,170
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.44
Params size (MB): 0.09
Estimated Total Size (MB): 0.54
----------------------------------------------------------------


In [15]:
x_train[0]

array([[0.33380402, 0.10470838, 0.00606746, ..., 0.64213272, 0.20018625,
        0.11932674],
       [0.18274947, 0.39512242, 0.20858455, ..., 0.81923354, 0.84594072,
        0.78474825],
       [0.95338324, 0.05145966, 0.72145848, ..., 0.14998406, 0.43317054,
        0.15265526],
       ...,
       [0.62304765, 0.9616338 , 0.13721331, ..., 0.23972829, 0.51161284,
        0.86802373],
       [0.31695187, 0.39817989, 0.35514663, ..., 0.70770494, 0.30177923,
        0.42327213],
       [0.57366071, 0.71538512, 0.35295542, ..., 0.1904571 , 0.99964906,
        0.60595204]])

In [46]:
foo.conv1(torch.tensor(x_train[0]))

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [5, 1, 2, 2], but got 2-dimensional input of size [10, 128] instead

In [3]:
class SimpleCNN(torch.nn.Module):
    
    #Our batch shape for input x is (3, 32, 32)
    
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        #Input channels = 3, output channels = 18
        self.conv1 = torch.nn.Conv2d(3, 18, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        #4608 input features, 64 output features (see sizing flow below)
        self.fc1 = torch.nn.Linear(18 * 16 * 16, 64)
        
        #64 input features, 10 output features for our 10 defined classes
        self.fc2 = torch.nn.Linear(64, 10)
        
    def forward(self, x):
        
        #Computes the activation of the first convolution
        #Size changes from (3, 32, 32) to (18, 32, 32)
        x = F.relu(self.conv1(x))
        
        #Size changes from (18, 32, 32) to (18, 16, 16)
        x = self.pool(x)
        
        #Reshape data to input to the input layer of the neural net
        #Size changes from (18, 16, 16) to (1, 4608)
        #Recall that the -1 infers this dimension from the other given dimension
        x = x.view(-1, 18 * 16 *16)
        
        #Computes the activation of the first fully connected layer
        #Size changes from (1, 4608) to (1, 64)
        x = F.relu(self.fc1(x))
        
        #Computes the second fully connected layer (activation applied later)
        #Size changes from (1, 64) to (1, 10)
        x = self.fc2(x)
        return(x)

In [10]:
bar = SimpleCNN()
bar.cuda()

SimpleCNN(
  (conv1): Conv2d(3, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=4608, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [11]:
summary(bar, input_size=(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 18, 32, 32]             504
         MaxPool2d-2           [-1, 18, 16, 16]               0
            Linear-3                   [-1, 64]         294,976
            Linear-4                   [-1, 10]             650
Total params: 296,130
Trainable params: 296,130
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.18
Params size (MB): 1.13
Estimated Total Size (MB): 1.32
----------------------------------------------------------------
