### Import libraries

In [1]:
import numpy as np
import torch
import torch.nn as nn                   # all neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.nn.functional as F         # parametersless functions, like (some) activation functions
import torch.optim as optim             # all optimization algorithms, SGD, Adam, etc
from torch.utils.data import DataLoader # gives easier dataset management and creates mini batches
import torchvision
from torchvision import datasets        # has standard datasets we can import in a nice and easy way
from torchvision import transforms      # transformations we can perform on our dataset (data processing)
import nbimporter
from torchsummary import summary

import matplotlib.pyplot as plt

import preproc

print("Pytorch version:", torch.__version__)

Pytorch version: 1.11.0


In [2]:
x = preproc.get_data("data/sign_mnist_train.csv")

In [51]:
print(type(x))
single_batch = iter(x)
Xs, ys = single_batch.next()
print(Xs.shape)
print(ys.shape)
print(type(ys[0]))

<class 'torch.utils.data.dataloader.DataLoader'>
torch.Size([64, 1, 28, 28])
torch.Size([64])
<class 'torch.Tensor'>


### Set device

In [4]:
# device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


### Hyperparameters

In [5]:
#in_channels = 1    # 28x28 = 784, size of MNIST images (grayscale)
#hidden_size = 100
num_classes = 24
learning_rate = 0.001
batch_size = 64
num_epochs = 2

### Create Network

In [39]:
class ConvNN(nn.Module):
    def __init__(self):
        super(ConvNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=(3,3))
        self.pool1 = nn.MaxPool2d(kernel_size=(2,2))

        self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=(3,3))
        self.pool2 = nn.MaxPool2d(kernel_size=(2,2))

        self.conv3 = nn.Conv2d(in_channels=20, out_channels=30, kernel_size=(3,3))
        self.dropout1 = nn.Dropout2d()

        self.fc1 = nn.Linear(30*3*3, 270)
        self.fc2 = nn.Linear(270, num_classes)

        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        output = self.conv1(x)
        output = F.relu(output)
        output = self.pool1(output)

        output = self.conv2(output)
        output = F.relu(output)
        output = self.pool2(output)

        output = self.conv3(output)
        output = F.relu(output)
        output = self.dropout1(output)

        output = output.view(-1, 30*3*3)
        output = F.relu(self.fc1(output))
        output = F.relu(self.fc2(output))
        return self.softmax(output)

### 2nd Architecture

### Initialize network

In [40]:
# Create the network and look at it's text representation
net = ConvNN().to(device)
summary(net, (1, 28, 28))
#print(net)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 26, 26]             100
         MaxPool2d-2           [-1, 10, 13, 13]               0
            Conv2d-3           [-1, 20, 11, 11]           1,820
         MaxPool2d-4             [-1, 20, 5, 5]               0
            Conv2d-5             [-1, 30, 3, 3]           5,430
         Dropout2d-6             [-1, 30, 3, 3]               0
            Linear-7                  [-1, 270]          73,170
            Linear-8                   [-1, 24]           6,504
        LogSoftmax-9                   [-1, 24]               0
Total params: 87,024
Trainable params: 87,024
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.09
Params size (MB): 0.33
Estimated Total Size (MB): 0.43
---------------------------------------------

### Loss and optimizer

In [48]:
# loss_function
criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()

# optimizers require the parameters to optimize and a learning rate
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [42]:
for index, (images, labels) in enumerate(x):
    print(images.shape)
    outputs = net(images)
    print(type(outputs))
    print(outputs)
    #one_batch = iter(x)
    #print(net(images))
    #print(torch.nn.functional.one_hot(torch.arange(0, 5), num_classes=5))
    break

torch.Size([64, 1, 28, 28])
<class 'torch.Tensor'>
tensor([[-3.1986, -3.1986, -3.1494,  ..., -3.1670, -3.1986, -3.1986],
        [-3.1929, -3.1929, -3.1602,  ..., -3.1768, -3.1929, -3.1929],
        [-3.1945, -3.1960, -3.1805,  ..., -3.1705, -3.1960, -3.1960],
        ...,
        [-3.1968, -3.1961, -3.1598,  ..., -3.1860, -3.1968, -3.1968],
        [-3.1936, -3.1936, -3.1850,  ..., -3.1669, -3.1936, -3.1936],
        [-3.1957, -3.1957, -3.1781,  ..., -3.1502, -3.1957, -3.1957]],
       grad_fn=<LogSoftmaxBackward0>)


In [43]:
for batch_idx, (images, labels) in enumerate(x):
    print(np.array(labels))
    labels = torch.nn.functional.one_hot(labels.to(torch.int64), num_classes=24)
    print(labels.shape)
    outputs = net(images)
    print(outputs)
    break

[ 4. 18. 17. 17. 10.  1. 16. 18.  4. 15. 14. 15.  6. 16.  1. 11. 18. 23.
 15. 21.  7. 19. 21.  8.  4. 13.  7.  7. 10. 13. 20.  5.  0.  4. 11.  3.
 23. 14. 13.  0. 16. 21.  5. 16. 10. 10. 11. 18.  0. 23. 15.  3. 13.  7.
  8. 12. 22. 19. 10. 20.  5. 19. 23.  3.]
torch.Size([64, 24])
tensor([[-3.1977, -3.1977, -3.1532,  ..., -3.1797, -3.1977, -3.1977],
        [-3.1993, -3.1993, -3.1596,  ..., -3.1659, -3.1993, -3.1993],
        [-3.1918, -3.1972, -3.1682,  ..., -3.1291, -3.1972, -3.1972],
        ...,
        [-3.1956, -3.1956, -3.1531,  ..., -3.1737, -3.1956, -3.1956],
        [-3.1947, -3.1947, -3.1717,  ..., -3.1731, -3.1947, -3.1947],
        [-3.1957, -3.1957, -3.1677,  ..., -3.1572, -3.1957, -3.1957]],
       grad_fn=<LogSoftmaxBackward0>)


### Train the Model

In [47]:
n_total_steps = len(x)
        

for epoch in range(num_epochs): # no. of full passes (loop) over the data
    #running_loss = 0
    #print(f'epoch: {epoch+1}')

    for batch_idx, (images, labels) in enumerate(x):
        # get data as a list of [images, labels]
        # train_loader is a batch of featuresets and labels
        # batch_idx : index of the batch
        # images    : one batch of features
        # labels    : one batch of targets
        
        # get data to cuda if possible
        images = images.to(device=device)
        labels = labels.to(device=device)
        
        # images are in correct shape
        # no need to flatten MNIST images like normal neural network
        # we did it inside CNN class
        #images = print("images.shape:", images.shape)
        
        # forward propagation
        outputs = net(images) # (batch_size x num_classes)
        
        #output = net(X.view(-1, 28*28)) # pass in reshaped batch

        loss = criterion(outputs, labels.long())
        #loss = F.nll_loss(output, y)    # calc and grab loss value

        # zero previous gradients. you will do this likely every step
        optimizer.zero_grad()
        # back-propagation
        loss.backward()
        # gradient descent or adam step (optimize weights)
        optimizer.step()

        #running_loss += loss.item()


        if (batch_idx+1) % 100 == 0:
            print(f'epoch [{epoch+1}/{num_epochs}], step [{batch_idx+1}/{n_total_steps}], loss = {loss.item():.4f}')

    print("loss =", loss.item()) # print loss. we hope loss (a measure of wrong-ness) declines!
    print("==============================================================")


#print(f'Training loss: {running_loss / len(train_loader)}')

RuntimeError: expected scalar type Long but found Float

The parameters `kernel_size`, `stride`, `padding`, `dilation` can either be:
* a single `int` – in which case the same value is used for the height and width dimension
* a `tuple` of two ints – in which case, the first int is used for the height dimension, and the second int for the width dimension

In [None]:
nn.Conv2d(
    in_channels, 
    out_channels, 
    kernel_size, 
    stride=1, 
    padding=0, 
    dilation=1, 
    groups=1, 
    bias=True, 
    padding_mode='zeros', 
    device=None, 
    dtype=None
    )