In the upper right corner, select from the dropdown menu 'Change Runtime Type', and select a GPU as available - this will allow you to run your neural network training on accelerated hardware and run everything faster.

In [1]:
import numpy as np
import torch
from torch import nn
import random
import matplotlib.pyplot as plt
import torch.optim as optim
from collections import deque
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


This indicates that a GPU has been detected and can be used - the device is saved to 'device' so that we can direct data and models to access memory on that device.

In [2]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

train_X = torch.Tensor( trainset.data/255.0 - 0.5 )
train_X = train_X.permute( 0, 3, 1, 2 )

train_X = train_X.to( device ) # This line is different from the previous CIFAR code - it transfers the tensor to the GPU memory

test_X = torch.Tensor( testset.data/255.0 - 0.5 )
test_X = test_X.permute( 0, 3, 1, 2 )

test_X = test_X.to( device ) # Again, transfering the tensor to GPU memory.

train_Y = torch.Tensor( np.asarray( trainset.targets ) ).long()
train_Y = train_Y.to( device )
test_Y = torch.Tensor( np.asarray( testset.targets ) ).long()
test_Y = test_Y.to( device )

# All the data needs to be loaded into the GPU, as that is where the model processing will occur.

def get_batch(x, y, batch_size):
  n = x.shape[0]

  batch_indices = random.sample( [ i for i in range(n) ], k = batch_size )

  x_batch = x[ batch_indices ]
  y_batch = y[ batch_indices ]

  return x_batch, y_batch

Files already downloaded and verified
Files already downloaded and verified


In [3]:
class CIFARModel(nn.Module):
  def __init__(self):
    super(CIFARModel, self).__init__()

    self.conv_layer_1 = nn.Conv2d(in_channels = 3, out_channels = 5, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_2 = nn.Conv2d(in_channels = 5, out_channels = 10, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_3 = nn.Conv2d(in_channels = 10, out_channels = 15, kernel_size = 3, stride = 1, bias=True)

    self.linear_layer = torch.nn.Linear( in_features = 15*26*26, out_features = 10, bias=True )
    # Note that the output of the last convolutional layer will be 15x26x16 - why?
    # So we want to input 15*26*26 values into the last layer, and get 10 output values out (for the class probabilities)

  def forward(self, input_tensor):
    output = self.conv_layer_1( input_tensor )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_2( output )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_3( output )
    output = nn.Sigmoid()( output )

    # At this point, the block of node values from the convolutional layer is flattened
    # So that it can be passed into a standard linear layer
    output = nn.Flatten()( output )
    output = self.linear_layer( output )
    return output

In [4]:
def confusion_matrix( model, x, y ):
  identification_counts = np.zeros( shape = (10,10), dtype = np.int32 )

  logits = model( x )
  predicted_classes = torch.argmax( logits, dim = 1 )

  n = x.shape[0]

  for i in range(n):
    actual_class = int( y[i].item() )
    predicted_class = predicted_classes[i].item()
    identification_counts[actual_class, predicted_class] += 1

  return identification_counts

In [5]:
cifar_model = CIFARModel()

cifar_model.to( device ) # The only change is that we also send the model to the GPU

print( cifar_model )
confusion_matrix( cifar_model, test_X, test_Y )

CIFARModel(
  (conv_layer_1): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_3): Conv2d(10, 15, kernel_size=(3, 3), stride=(1, 1))
  (linear_layer): Linear(in_features=10140, out_features=10, bias=True)
)


array([[   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 1000,    0,    0,    0,    0,    0]])

At this point, everything else runs as before - just faster. I also increased the bach size, which I might could have done previously. Nevertheless - faster.

In [6]:
cnn_optimizer = optim.Adam(cifar_model.parameters(), lr = 0.01 )
loss_function = torch.nn.CrossEntropyLoss()
print("Initial Test Loss:", loss_function( cifar_model( test_X ), test_Y ).item() )

Initial Test Loss: 2.353330135345459


In [7]:
import time

In [8]:
batch_size = 16
startTime = time.time()

for epochs in range(100):
  total_loss = 0
  for batch in range( train_X.shape[0] // batch_size ):
    x_batch, y_batch = get_batch(train_X, train_Y, batch_size)

    cnn_optimizer.zero_grad()
    logits = cifar_model( x_batch )
    loss = loss_function( logits, y_batch )

    loss.backward()
    cnn_optimizer.step()

    total_loss += loss.item()

  print( "Average Total Loss over Batches:", total_loss / ( train_X.shape[0] // batch_size ) )
  print( confusion_matrix( cifar_model, test_X, test_Y ) )
endTime = time.time()
print(str(endTime - startTime) + " sec")
print("or roughly " + str((endTime - startTime)//60) + " min")

Average Total Loss over Batches: 2.331151395301819
[[246  38  31  30  12  76  71  32 378  86]
 [ 25 267  10  22  44 147 173  46 175  91]
 [ 31  39  91  34  33  95 513  31  85  48]
 [ 23  23  62  98  57 164 437  53  32  51]
 [ 16   5  35  35  44  55 681  40  55  34]
 [ 20  39  47  61  66 264 383  52  41  27]
 [  8  14  31  52  33  27 772  34   7  22]
 [ 15  75  24  38  46  89 383 132  49 149]
 [ 87  42  17  29  11  96  48  29 483 158]
 [ 20  85  12  35  26  56 155  73 196 342]]
Average Total Loss over Batches: 1.9741747277832031
[[276  55  71  87  28  17  99  62  90 215]
 [  5 307  12  50  75  38 264  51  21 177]
 [ 25  25 114  60  50  27 602  46  11  40]
 [  5  21  22 107  41  57 650  54   8  35]
 [ 12   8  29  38  38  15 764  42   9  45]
 [  6  54  27 110  69  74 597  48   0  15]
 [  1   4   9  27  10  16 880  35   1  17]
 [  4  29  22  42  57  19 517 242   3  65]
 [120  71  35  74  27  37  70  55 176 335]
 [  4  93  13  36  50  14 262 143  20 365]]
Average Total Loss over Batches: 1.

Similar results - but much faster.