In [1]:
import numpy as np
import torch
from torch import nn
import random
import matplotlib.pyplot as plt
import torch.optim as optim
from collections import deque
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [2]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

train_X = torch.Tensor( trainset.data/255.0 - 0.5 )
train_X = train_X.permute( 0, 3, 1, 2 )

train_X = train_X.to( device )

test_X = torch.Tensor( testset.data/255.0 - 0.5 )
test_X = test_X.permute( 0, 3, 1, 2 )

test_X = test_X.to( device )

train_Y = torch.Tensor( np.asarray( trainset.targets ) ).long()
train_Y = train_Y.to( device )
test_Y = torch.Tensor( np.asarray( testset.targets ) ).long()
test_Y = test_Y.to( device )

def get_batch(x, y, batch_size):
  n = x.shape[0]

  batch_indices = random.sample( [ i for i in range(n) ], k = batch_size )

  x_batch = x[ batch_indices ]
  y_batch = y[ batch_indices ]

  return x_batch, y_batch

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 47557363.53it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
class CIFARModel(nn.Module):
  def __init__(self):
    super(CIFARModel, self).__init__()

    self.conv_layer_1 = nn.Conv2d(in_channels = 3, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_2 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_3 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_4 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)

    self.linear_layer = torch.nn.Linear( in_features = 20*24*24, out_features = 10, bias=True )

  def forward(self, input_tensor):
    output = self.conv_layer_1( input_tensor )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_2( output )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_3( output )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_4( output )

    output = nn.Sigmoid()( output )
    output = nn.Flatten()( output )
    output = self.linear_layer( output )
    return output

In [4]:
def confusion_matrix( model, x, y ):
  identification_counts = np.zeros( shape = (10,10), dtype = np.int32 )

  logits = model( x )
  predicted_classes = torch.argmax( logits, dim = 1 )

  n = x.shape[0]

  for i in range(n):
    actual_class = int( y[i].item() )
    predicted_class = predicted_classes[i].item()
    identification_counts[actual_class, predicted_class] += 1

  return identification_counts

In [5]:
cifar_model = CIFARModel()

cifar_model.to( device )

print( cifar_model )
confusion_matrix( cifar_model, test_X, test_Y )

CIFARModel(
  (conv_layer_1): Conv2d(3, 20, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_2): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_3): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_4): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (linear_layer): Linear(in_features=11520, out_features=10, bias=True)
)


array([[   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 1000]],
      dtype=int32)

In [6]:
cnn_optimizer = optim.Adam(cifar_model.parameters(), lr = 0.001 )
loss_function = torch.nn.CrossEntropyLoss()
print("Initial Test Loss:", loss_function( cifar_model( test_X ), test_Y ).item() )

Initial Test Loss: 2.331589937210083


In [7]:
batch_size = 16

for epochs in range(10):
  total_loss = 0
  for batch in range( train_X.shape[0] // batch_size ):
    x_batch, y_batch = get_batch(train_X, train_Y, batch_size)

    cnn_optimizer.zero_grad()
    logits = cifar_model( x_batch )
    loss = loss_function( logits, y_batch )

    loss.backward()
    cnn_optimizer.step()

    total_loss += loss.item()

  print( "Average Total Loss over Batches:", total_loss / ( train_X.shape[0] // batch_size ) )
  print( confusion_matrix( cifar_model, test_X, test_Y ) )

Average Total Loss over Batches: 2.22976244594574
[[421 114  25  78   7   1  53  16 167 118]
 [ 43 403  19  69  31  23 140  27 109 136]
 [ 93  72 106 109  50  19 422  30  41  58]
 [ 47  84  74 188  31  43 397  37  32  67]
 [ 41  34  42  77  46  13 615  44  38  50]
 [ 52 134  58 173  56  72 338  26  42  49]
 [ 17  38  24  75  16   5 749  25   9  42]
 [ 45 130  55  79  49  11 288 122  37 184]
 [180 128  10  93   2  15  37  17 356 162]
 [ 36 186  22  45  18   3 122  33 129 406]]
Average Total Loss over Batches: 1.9956081354522706
[[427  60  46  18  22  26  57  55 127 162]
 [ 28 340  21  10  74  77 148  76  93 133]
 [ 69  43 123  20  82  79 472  61  20  31]
 [ 29  28  65  51  86 136 469  86  15  35]
 [ 30  20  37  20  67  45 655  64  21  41]
 [ 27  49  50  23 100 194 441  75  17  24]
 [  6  15  18  22  40  25 795  58   6  15]
 [ 21  64  32  25  98  44 335 272   9 100]
 [219  78  24  13  29  65  35  53 271 213]
 [ 35 138  19  23  64  21 145 143  69 343]]
Average Total Loss over Batches: 1.8

In [8]:
class CIFARModelBatch(nn.Module):
  def __init__(self):
    super(CIFARModelBatch, self).__init__()

    self.conv_layer_1 = nn.Conv2d(in_channels = 3, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_1 = torch.nn.BatchNorm2d(20)
    self.conv_layer_2 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_2 = torch.nn.BatchNorm2d(20)
    self.conv_layer_3 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_3 = torch.nn.BatchNorm2d(20)
    self.conv_layer_4 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_4 = torch.nn.BatchNorm2d(20)

    self.linear_layer = torch.nn.Linear( in_features = 20*24*24, out_features = 10, bias=True )

  def forward(self, input_tensor):
    output = self.conv_layer_1( input_tensor )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_1( output )
    output = self.conv_layer_2( output )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_2( output )
    output = self.conv_layer_3( output )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_3( output )
    output = self.conv_layer_4( output )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_4( output )


    output = nn.Flatten()( output )
    output = self.linear_layer( output )
    return output

In [9]:
cifar_model = CIFARModelBatch()

cifar_model.to( device )

print( cifar_model )
confusion_matrix( cifar_model, test_X, test_Y )

CIFARModelBatch(
  (conv_layer_1): Conv2d(3, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_1): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_layer_2): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_layer_3): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_3): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_layer_4): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_4): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear_layer): Linear(in_features=11520, out_features=10, bias=True)
)


array([[ 58,  92,  45,  53,  41,  86, 226,  33,  98, 268],
       [ 64,  80, 134,  91,  51,  57, 144, 148,  97, 134],
       [ 61,  83,  79, 138, 115,  94, 101, 133,  99,  97],
       [ 66,  76, 116, 114, 109,  97, 107, 150,  80,  85],
       [ 55,  69,  51, 180, 154,  89,  70, 203,  83,  46],
       [ 85,  69, 101, 122, 107,  93, 115, 125,  60, 123],
       [ 33,  62,  93, 174, 132,  83,  29, 254,  94,  46],
       [ 93,  97,  53, 141, 101, 126,  79, 154,  68,  88],
       [ 66,  67,  57,  53,  21,  96, 184,  51,  78, 327],
       [ 63, 109, 100, 119,  59,  71, 112, 109, 125, 133]], dtype=int32)

In [10]:
cnn_optimizer = optim.Adam(cifar_model.parameters(), lr = 0.001 )
loss_function = torch.nn.CrossEntropyLoss()
print("Initial Test Loss:", loss_function( cifar_model( test_X ), test_Y ).item() )

Initial Test Loss: 2.4296348094940186


In [11]:
batch_size = 16

for epochs in range(10):
  total_loss = 0
  cifar_model.train()
  for batch in range( train_X.shape[0] // batch_size ):
    x_batch, y_batch = get_batch(train_X, train_Y, batch_size)

    cnn_optimizer.zero_grad()
    logits = cifar_model( x_batch )
    loss = loss_function( logits, y_batch )

    loss.backward()
    cnn_optimizer.step()

    total_loss += loss.item()

  print( "Average Total Loss over Batches:", total_loss / ( train_X.shape[0] // batch_size ) )
  cifar_model.eval()
  print( confusion_matrix( cifar_model, test_X, test_Y ) )

Average Total Loss over Batches: 1.9165997318077088
[[536  36 104  27  28  25  11  20 118  95]
 [ 51 622   7  10   8   9  13   6  53 221]
 [ 70  17 371 127 150  76  76  63  24  26]
 [ 10  17  56 369 111 203 107  53  28  46]
 [ 30  14  97  91 469  62 111  98  16  12]
 [  9   5  77 187 102 443  35 107  11  24]
 [ 10  13  45  89  68  40 651  42  14  28]
 [ 14   7  30  57  98  85  14 633   7  55]
 [159  59  21  16   7  15  12  10 595 106]
 [ 28  99  16  18   8   5  28  16  46 736]]
Average Total Loss over Batches: 1.0995260130691529
[[748  33  60  11  16   8  17  16  48  43]
 [ 39 776   5   8   5   7  18   6  16 120]
 [102  12 475  55 117  75  88  54  14   8]
 [ 30  21  90 375  64 222 108  44  18  28]
 [ 41   7  81  50 511  55 134 107   9   5]
 [ 23  10 115 120  49 546  39  78  10  10]
 [ 10   6  33  61  35  23 809  12   6   5]
 [ 20   8  59  35  66  90  10 687   1  24]
 [149  85  25  12  10   7   7   7 622  76]
 [ 31 106  15  13   8   3   5  22  34 763]]
Average Total Loss over Batches: 0

In [23]:
class CIFARModelDropout(nn.Module):
  def __init__(self):
    super(CIFARModelDropout, self).__init__()

    self.conv_layer_1 = nn.Conv2d(in_channels = 3, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_2 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_3 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.conv_layer_4 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.dropout_4 = torch.nn.Dropout(0.5)

    self.linear_layer = torch.nn.Linear( in_features = 20*24*24, out_features = 10, bias=True )

  def forward(self, input_tensor):
    output = self.conv_layer_1( input_tensor )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_2( output )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_3( output )
    output = nn.Sigmoid()( output )
    output = self.conv_layer_4( output )
    output = nn.Sigmoid()( output )
    output = self.dropout_4( output )


    output = nn.Flatten()( output )
    output = self.linear_layer( output )
    return output

In [25]:
cifar_model = CIFARModelDropout()

cifar_model.to( device )

print( cifar_model )
confusion_matrix( cifar_model, test_X, test_Y )

CIFARModelDropout(
  (conv_layer_1): Conv2d(3, 20, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_2): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_3): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer_4): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (dropout_4): Dropout(p=0.5, inplace=False)
  (linear_layer): Linear(in_features=11520, out_features=10, bias=True)
)


array([[324, 215,  44,   4,  32,  15,   4,  36,  77, 249],
       [320, 222,  50,   3,  34,  11,   5,  24,  76, 255],
       [335, 223,  38,   6,  35,  12,   3,  34,  65, 249],
       [325, 216,  49,   4,  35,  12,   4,  34,  74, 247],
       [308, 225,  49,   7,  32,  10,   4,  32,  72, 261],
       [303, 232,  48,   2,  45,  10,   5,  25,  79, 251],
       [317, 221,  47,   2,  39,  12,   4,  28,  74, 256],
       [296, 226,  54,   1,  41,  12,   5,  26,  66, 273],
       [336, 209,  46,   5,  34,  10,   3,  36,  59, 262],
       [313, 229,  49,   4,  37,  12,   4,  39,  71, 242]], dtype=int32)

In [26]:
cnn_optimizer = optim.Adam(cifar_model.parameters(), lr = 0.001 )
loss_function = torch.nn.CrossEntropyLoss()
print("Initial Test Loss:", loss_function( cifar_model( test_X ), test_Y ).item() )

Initial Test Loss: 2.367394208908081


In [27]:
batch_size = 16

for epochs in range(10):
  total_loss = 0
  cifar_model.train()
  for batch in range( train_X.shape[0] // batch_size ):
    x_batch, y_batch = get_batch(train_X, train_Y, batch_size)

    cnn_optimizer.zero_grad()
    logits = cifar_model( x_batch )
    loss = loss_function( logits, y_batch )

    loss.backward()
    cnn_optimizer.step()

    total_loss += loss.item()

  print( "Average Total Loss over Batches:", total_loss / ( train_X.shape[0] // batch_size ) )
  cifar_model.eval()
  print( confusion_matrix( cifar_model, test_X, test_Y ) )

Average Total Loss over Batches: 2.298037365989685
[[337  70  94  20  18  99  52  19 235  56]
 [ 90 254  32  20  50 114 109  51 165 115]
 [ 81  65 159  47  89  74 287  68  75  55]
 [ 74  55 132 111  62 153 209  56  65  83]
 [ 47  51  69  53  71  63 458  74  55  59]
 [ 96  66 100  85  51 267 148  56  77  54]
 [ 23  55  74  43  58  69 512  74  21  71]
 [110 105  80  59  88  64 143  82  84 185]
 [104  71  23  18  19 132  27  23 470 113]
 [ 75 132  27  16  29  30  76  45 253 317]]
Average Total Loss over Batches: 2.0116825002670287
[[441 118  39  26   6   5  30  31 183 121]
 [ 26 533  41  22  34   8  90  18  92 136]
 [ 92 109 303  65  39  14 196  85  58  39]
 [ 43 100 166 160  35  56 241  77  57  65]
 [ 53  58 234  47 122  14 256 131  43  42]
 [ 48 105 170 121  42 132 180  80  89  33]
 [ 12  41 137  58  43   9 601  38  20  41]
 [ 49 105 125  52  56  24  78 347  45 119]
 [134 141  15  33   4  11  28  21 485 128]
 [ 40 267  28  26  15   1  84  23 108 408]]
Average Total Loss over Batches: 1.

In [28]:
class CIFARModelBatchNormAndDropout(nn.Module):
  def __init__(self):
    super(CIFARModelBatchNormAndDropout, self).__init__()

    self.conv_layer_1 = nn.Conv2d(in_channels = 3, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_1 = torch.nn.BatchNorm2d(20)
    self.conv_layer_2 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_2 = torch.nn.BatchNorm2d(20)
    self.conv_layer_3 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_3 = torch.nn.BatchNorm2d(20)
    self.conv_layer_4 = nn.Conv2d(in_channels = 20, out_channels = 20, kernel_size = 3, stride = 1, bias=True)
    self.batch_norm_4 = torch.nn.BatchNorm2d(20)
    self.dropout_4 = torch.nn.Dropout(0.5)


    self.linear_layer = torch.nn.Linear( in_features = 20*24*24, out_features = 10, bias=True )

  def forward(self, input_tensor):
    output = self.conv_layer_1( input_tensor )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_1( output )
    output = self.conv_layer_2( output )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_2( output )
    output = self.conv_layer_3( output )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_3( output )
    output = self.conv_layer_4( output )
    output = nn.Sigmoid()( output )
    output = self.batch_norm_4( output )
    output = self.dropout_4( output )


    output = nn.Flatten()( output )
    output = self.linear_layer( output )
    return output

In [29]:
cifar_model = CIFARModelBatchNormAndDropout()

cifar_model.to( device )

print( cifar_model )
confusion_matrix( cifar_model, test_X, test_Y )

CIFARModelBatchNormAndDropout(
  (conv_layer_1): Conv2d(3, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_1): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_layer_2): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_layer_3): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_3): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_layer_4): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm_4): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout_4): Dropout(p=0.5, inplace=False)
  (linear_layer): Linear(in_features=11520, out_features=10, bias=True)
)


array([[184,  95, 112, 199,  49,  39,  79,  65,  82,  96],
       [121, 134,  93, 157, 111,  85,  88,  78,  66,  67],
       [114,  93, 103,  90, 105, 113,  97,  95,  91,  99],
       [ 83, 102, 121, 103, 117, 107,  98, 109,  85,  75],
       [ 92,  82, 109, 100, 105, 150,  85, 110,  79,  88],
       [122, 103, 115,  81, 106, 100,  94, 100,  99,  80],
       [ 67,  86,  82,  72, 134, 159,  98, 132,  81,  89],
       [ 91,  95, 111, 131, 132,  88,  99,  87,  86,  80],
       [171, 101,  87, 225,  54,  51,  96,  63,  74,  78],
       [ 82, 121, 110, 202,  91,  97, 106,  72,  49,  70]], dtype=int32)

In [30]:
cnn_optimizer = optim.Adam(cifar_model.parameters(), lr = 0.001 )
loss_function = torch.nn.CrossEntropyLoss()
print("Initial Test Loss:", loss_function( cifar_model( test_X ), test_Y ).item() )

Initial Test Loss: 2.5393030643463135


In [31]:
batch_size = 16

for epochs in range(10):
  total_loss = 0
  cifar_model.train()
  for batch in range( train_X.shape[0] // batch_size ):
    x_batch, y_batch = get_batch(train_X, train_Y, batch_size)

    cnn_optimizer.zero_grad()
    logits = cifar_model( x_batch )
    loss = loss_function( logits, y_batch )

    loss.backward()
    cnn_optimizer.step()

    total_loss += loss.item()

  print( "Average Total Loss over Batches:", total_loss / ( train_X.shape[0] // batch_size ) )
  cifar_model.eval()
  print( confusion_matrix( cifar_model, test_X, test_Y ) )

Average Total Loss over Batches: 2.029434037246704
[[599  49  24  13  25  20  36  21 164  49]
 [ 48 688   6   9   4  21  14  20  51 139]
 [102  24 229 107 155 142 121  72  25  23]
 [ 21  21  28 309  81 275 157  63  16  29]
 [ 47  18  56  90 370 120 136 140  15   8]
 [ 18  11  35 184  70 492  71  96  14   9]
 [  9  25  30  92  43  50 703  23   7  18]
 [ 27  12  14  71  64 138  38 580  12  44]
 [178  82   7  16   6  16  22  12 588  73]
 [ 66 195   7  20   9  28  33  26  63 553]]
Average Total Loss over Batches: 1.2454391029930114
[[678  35  39  19   8  14  12  11 150  34]
 [ 52 772   5  10   3  10   9   2  60  77]
 [100  15 401  63  82 188  54  35  41  21]
 [ 21  20  50 356  52 337  47  34  49  34]
 [ 36   5 114  85 429 109  63 124  25  10]
 [ 12  10  68 142  25 623   9  59  28  24]
 [  8   6  65 103  53  92 618  16  21  18]
 [ 21  15  30  56  58 136   6 629   8  41]
 [107  80  10   8   3  11   4   4 742  31]
 [ 55 135  12  16   7  16   7  13  78 661]]
Average Total Loss over Batches: 1.