**Convolutional Neural Networks**

---


*Build a CNN model*

In [8]:
import torch
import torchvision
from torchvision import transforms

# Device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# transformations to be applied during dataloading
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])


# Download dataset
#https://pytorch.org/vision/stable/generated/torchvision.datasets.CIFAR10.html

train_dataset = torchvision.datasets.CIFAR10(
    root='./cifar10_data/',
    train=True,
    transform=train_transform,
    download=True)

test_dataset = torchvision.datasets.CIFAR10(
    root='./cifar10_data/',
    train=False,
    transform=test_transform,
    download=True)

# Check out some attributes of the dataset
print(train_dataset.data.shape)
print(len(train_dataset.targets))
print(train_dataset.classes)
print(train_dataset.class_to_idx)
print(train_dataset.transforms)


Files already downloaded and verified
Files already downloaded and verified
(50000, 32, 32, 3)
50000
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
StandardTransform
Transform: Compose(
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )


In [14]:

# hyperparameters
num_epochs = 100
num_classes = 10
batch_size = 64
learning_rate = 0.001


# data loader
# __getitem__ of the dataset returns a tuple of (image,target_index)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

# Check out 
X, y = next(iter(train_loader))
print(X.shape)
print(y.shape)

torch.Size([64, 3, 32, 32])
torch.Size([64])


In [15]:
# The model

class ConvNet(torch.nn.Module):
  def __init__(self, in_channels=3, num_classes=10):
    super().__init__()

    # Define the layers
    self.conv2d_1 = torch.nn.Conv2d(in_channels=in_channels,
                                    out_channels=16,
                                    kernel_size=5,
                                    stride=1,
                                    padding=2)
    
    self.conv2d_2 = torch.nn.Conv2d(in_channels=16,
                                    out_channels=32,
                                    kernel_size=5,
                                    stride=1,
                                    padding=2)

    self.batchnorm_1 = torch.nn.BatchNorm2d(16)
    self.batchnorm_2 = torch.nn.BatchNorm2d(32)

    self.maxpool = torch.nn.MaxPool2d(kernel_size=2,stride=2)

    self.relu = torch.nn.ReLU()

    # Here, the size is worked out by hand!
    self.fc = torch.nn.Linear(8*8*32,num_classes)
  
  def forward(self,input):

    # first convolutional layer
    x = self.conv2d_1(input)
    x = self.batchnorm_1(x)
    x = self.relu(x)
    x = self.maxpool(x)     # the size (h,w) is reduced to 16x16

    # second convolutional layer
    x = self.conv2d_2(x)
    x = self.batchnorm_2(x)
    x = self.relu(x)
    x = self.maxpool(x)     # the size (h,w) is reduced to 8x8

    # flatten
    x = x.view(-1,8*8*32)   # we know shape, 8*8*32; the first dim is batch size
    # OR
    #batch_size = x.size(0)
    #x = x.view(batch_size,-1)
    # OR
    # define self.flatten = nn.Flatten() in the __init__, then
    #x = self.flatten(x)

    # fully connected
    x = self.fc(x)

    return x


model = ConvNet().to(device)
print(model)
# Note that this prints the layers defined in the constructor __init__


ConvNet(
  (conv2d_1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2d_2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batchnorm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (fc): Linear(in_features=2048, out_features=10, bias=True)
)


*Summarize the model*

In [16]:
# This prints the forward process
!pip install torchinfo
from torchinfo import summary
# Include the size of the input tensor. Include what columns to be displayed
summary(model, (1, 3, 32,32), col_names=["kernel_size", "output_size", "num_params"])

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1


Layer (type:depth-idx)                   Kernel Shape              Output Shape              Param #
ConvNet                                  --                        [1, 10]                   --
├─Conv2d: 1-1                            [5, 5]                    [1, 16, 32, 32]           1,216
├─BatchNorm2d: 1-2                       --                        [1, 16, 32, 32]           32
├─ReLU: 1-3                              --                        [1, 16, 32, 32]           --
├─MaxPool2d: 1-4                         2                         [1, 16, 16, 16]           --
├─Conv2d: 1-5                            [5, 5]                    [1, 32, 16, 16]           12,832
├─BatchNorm2d: 1-6                       --                        [1, 32, 16, 16]           64
├─ReLU: 1-7                              --                        [1, 32, 16, 16]           --
├─MaxPool2d: 1-8                         2                         [1, 32, 8, 8]             --
├─Linear: 1-9               

In [None]:
# Check out the torchinfo for some large models
#modelX = torchvision.models.resnet18()
#summary(modelX, (1, 3, 224, 224), col_names=["kernel_size", "output_size", "num_params"], depth=3)

In [18]:
# test the model

X, y = next(iter(train_loader))
X = X.to(device)
y_pred = model(X)

print(X.shape)
print(y_pred.shape)   # One-hot encoded logits
print(y.shape)        # Note that it is an integer label!

torch.Size([64, 3, 32, 32])
torch.Size([64, 10])
torch.Size([64])


*Train and test the model; and use Tensorboard to track*

In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('logs')

# For different runs, you can create subfolders
# writer = SummaryWriter('logs/run1')


# Set the loss function and the optimizer
loss_function = torch.nn.CrossEntropyLoss()     
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

# Define a function to train the model for one epoch
def train_model(epoch, train_loader):
  
  # Put model in train mode
  model.train()

  # For loss calculation
  total_loss = 0
  total_correct = 0
  dataset_size = len(train_loader.dataset)

  # Go over each batch
  for X, y in train_loader:
    X = X.to(device)
    y = y.to(device)

    y_pred = model(X)

    loss = loss_function(y_pred,y)  
    # In the above
    #   input:y_pred is logits, target y is integer index.
    #   target could also be a vector of probabilities
    #     in that case, y.softmax(1) could be used to convert to probabilities  
    
    # Zero the gradients, backpropagate the gradients, update the parameters
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Calculate loss
    total_loss += loss_function(y_pred, y).item()
    total_correct += (y_pred.argmax(1) == y).type(torch.float).sum().item()



  # Add images to tensorboard
  img_grid = torchvision.utils.make_grid(X)
  writer.add_image('batch images', img_grid)

  # Add loss and accuracy to tensorboard
  writer.add_scalar("Train Loss", total_loss/dataset_size, epoch )
  writer.add_scalar("Train Accuracy", total_correct/dataset_size, epoch )
  print(f"Epoch: {epoch}, \
          Training Loss:{total_loss/dataset_size:0.4f}, \
          Training Accuracy:{total_correct/dataset_size:0.4f}")


# Define a function to test the model
def test_model(epoch, test_loader):
  
  # For loss calculation
  total_loss = 0
  total_correct = 0
  dataset_size = len(test_loader.dataset)

  # Put the model into evaluation mode
  model.eval()

  # Do not calculate gradients
  with torch.no_grad():
    for X,y in test_loader:
      X = X.to(device)
      y = y.to(device)

      y_pred = model(X)

      # Calculate loss
      total_loss += loss_function(y_pred, y).item()
      total_correct += (y_pred.argmax(1) == y).type(torch.float).sum().item()
  

  writer.add_graph(model,X)

  writer.add_scalar("Test Loss", total_loss/dataset_size, epoch )
  writer.add_scalar("Test Accuracy", total_correct/dataset_size, epoch )
  print(f"Test Loss: {total_loss/dataset_size:0.4f}, \
          Test Accuracy: {total_correct/dataset_size:0.4f}")



In [None]:
for epoch in range(num_epochs):
  # Train loss
  train_model(epoch,train_loader)

  # Test loss
  test_model(epoch, test_loader)

In [None]:
%load_ext tensorboard
#%reload_ext tensorboard
%tensorboard --logdir=logs 
#when running code locally, tensorboard --logdir=logs returns localhost for web.


In [None]:
%reload_ext tensorboard
%tensorboard --logdir=logs 

In [None]:
# Final Note: Fully convolutional CNN
import torch.nn.functional as F

# global avg. pooling used in ResNet -- essentially averaging outputs if size >1
# This makes the network fully convolutional, meaning any input size would work

# Suppose the convolutional layers resulted in a shape like this
x = torch.randn((1,512,8,8))

# Then, we can do avg pooling as follows
avgpool = nn.AvgPool2d(x.size()[2:4]) # no trainable parameters
y = avgpool(x)
print(y.shape)

# OR
y = F.adaptive_avg_pool2d(x,(1,1))  # no trainable parameters
print(y.shape)

# Instead of avgpooling maxpooling is also possible
# Other implementation possibilities are available as well ...
