In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import random
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms

# Data Preparation

In [2]:
# defining hyperparameters
batch_size = 32

def apply_image_transformation(transformation_type="standard", *args, **kwargs):
    '''
    Apply various image transformations based on the provided transformation_type.

    Args:
    transformation_type (str): The type of transformation to apply. Supported types are 'standard', 'resize', and 'channel'.
    *args: Additional arguments based on the transformation type.
    **kwargs: Additional keyword arguments for normalization parameters.

    Returns:
    transform: A composition of transformations to be applied to the input images.

    Raises:
    ValueError: If an unsupported transformation type is provided.

    '''
    if transformation_type == "standard":
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(0.5, 0.5)
        ])
        return transform

    elif transformation_type == "resize":
        transform = transforms.Compose([
            transforms.Resize(args[0]),
            transforms.CenterCrop(args[1]),
            transforms.ToTensor(),
            transforms.Normalize(kwargs['mean'], kwargs['std'])
        ])
        return transform

    elif transformation_type == "channel":
        transform = transforms.Compose([
            transforms.Resize(args[0]),
            transforms.CenterCrop(args[1]),
            transforms.Grayscale(3),
            transforms.ToTensor(),
            transforms.Normalize(mean=kwargs['mean'], std=kwargs['std'])
        ])
        return transform

    else:
        raise ValueError("Invalid transformation type. Supported types are 'normal', 'resize', and 'grayscale'.")

def create_loader(transform, batch_size=32):
    '''
    Create data loaders for training and testing using the provided transformation.

    Args:
    transform: The transformation to be applied to the dataset.
    batch_size: The batch size of 

    Returns:
    train_loader: DataLoader for the training dataset.
    test_loader: DataLoader for the testing dataset.

    This function imports the FashionMNIST dataset from the torchvision library and applies the provided transformation to the dataset. It then creates data loaders for both the training and testing datasets, considering the specified transformation and other default parameters such as the number of workers and batch size.
    '''
    # importing training and test sets from torchvision
    train_dataset = torchvision.datasets.FashionMNIST("./data", download=True, train=True, transform=transform)
    test_dataset = torchvision.datasets.FashionMNIST("./data", download=True, train=False, transform=transform)
    
    # creating dataloaders
    train_loader = DataLoader(train_dataset, shuffle=True, num_workers=2, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, shuffle=False, num_workers=2, batch_size=batch_size)

    return train_loader, test_loader

In [3]:
transform = apply_image_transformation('standard')
basic_train_loader, basic_test_loader = create_loader(transform)

transform = apply_image_transformation('standard')
vgg_train_loader, vgg_test_loader = create_loader(transform)

transform = apply_image_transformation('resize', 256, 224, mean=(0.1307,), std=(0.3081,))
vgg_resize_train_loader, vgg_resize_test_loader = create_loader(transform)

transform = apply_image_transformation('channel', 256, 224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
vgg_channel_train_loader, vgg_channel_test_loader = create_loader(transform)

transform = apply_image_transformation('standard')
resnet_train_loader, resnet_test_loader = create_loader(transform)

transform = apply_image_transformation('channel', 256, 224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
resnet_channel_train_loader, resnet_channel_test_loader = create_loader(transform)

transform = apply_image_transformation('standard')
inception_train_loader, inception_test_loader = create_loader(transform)

transform = apply_image_transformation('channel', 299, 299, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
inception_channel_train_loader, inception_channel_test_loader = create_loader(transform)

# Training

In [4]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

def train(model, criterion, optimizer, num_epochs, train_loader, test_loader):
  total_time_taken = 0.0
  # Train the model
  for epoch in range(num_epochs):
    start_time = time.time()
    model.train()
    train_loss = 0.0
    correct = 0
    for i, data in enumerate(tqdm(train_loader)):
      # Get the inputs and labels
      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)

      # Zero the parameter gradients
      optimizer.zero_grad()

      # Forward pass
      outputs = model(inputs)
      if not torch.is_tensor(outputs):
        outputs = outputs.logits

      # Compute the loss
      loss = criterion(outputs, labels)
      train_loss += loss.item()

      # Backward pass and optimize
      loss.backward()
      optimizer.step()

      # Compute correct predictions
      pred = outputs.argmax(dim=1, keepdim=True)
      correct += pred.eq(labels.view_as(pred)).sum().item()

    total_time_taken += time.time() - start_time

    # Compute train accuracy
    train_accuracy = 100. * correct / len(train_loader.dataset)

    model.eval()
    test_loss = 0.0
    correct = 0
    test_acc = []
    with torch.no_grad():
      for i, data in enumerate(test_loader):
        # Get the inputs and labels
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        # Compute correct predictions
        pred = outputs.argmax(dim=1, keepdim=True)
        correct += pred.eq(labels.view_as(pred)).sum().item()

    # Compute test accuracy
    test_accuracy = 100. * correct / len(test_loader.dataset)
    test_acc.append(test_accuracy)

    # Print statistics
    print(f'Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%')

  print(f'\n\nTotal Time Elapsed: {total_time_taken} s')

## Batch Normalization

Rather than using a simple convolution layer, each convolution layer is transformed into a 3 layer architecture
- Convolution Layer
- Batch Normalization Layer
- Activation Layer (LeakyReLU)

In [5]:
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=0, stride=1):
        super(BasicConv2d, self).__init__()
        self.basicconv2d = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(0.2, inplace=True)
        )

    def forward(self, x):
        return self.basicconv2d(x)

## Basic CNN

In [7]:
class BasicCNN(nn.Module):
  def __init__(self):
    super(BasicCNN, self).__init__()
    self.conv1 = BasicConv2d(1, 32, 3) # 32 1x3x3 filters with stride 1, pad 0
    '''
    Output size = (28 - 3 + 2*0)/1 + 1 = 26
    Output volume = 32x26x26
    '''
    self.pool = nn.MaxPool2d(2, 2) # 2x2 filter with stride 2
    '''
    Output size = (26 - 2)/2 + 1 = 13
    Output volume = 32x13x13
    '''
    self.fc1 = nn.Linear(32 * 13 * 13, 100)
    self.fc2 = nn.Linear(100, 10)

  def forward(self, x):
    x = torch.relu(self.conv1(x))
    x = self.pool(x)
    # Flatten the output of the last convolutional layer
    x = x.view(-1, 32 * 13 * 13)
    # Apply the fully connected layers with ReLU activation
    x = torch.relu(self.fc1(x))
    # Apply the last fully connected layer with softmax activation
    x = self.fc2(x)

    return x

In [8]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

BasicCNNModel = BasicCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(BasicCNNModel.parameters(), lr=learning_rate)

train(BasicCNNModel, criterion, optimizer, num_epochs, basic_train_loader, basic_test_loader)

100%|██████████| 1875/1875 [00:10<00:00, 183.54it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.3895, Train Accuracy: 86.09%, Test Loss: 0.3606, Test Accuracy: 87.45%


100%|██████████| 1875/1875 [00:10<00:00, 187.35it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.2682, Train Accuracy: 90.21%, Test Loss: 0.2744, Test Accuracy: 90.16%


100%|██████████| 1875/1875 [00:09<00:00, 195.74it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.2255, Train Accuracy: 91.72%, Test Loss: 0.2648, Test Accuracy: 90.49%


100%|██████████| 1875/1875 [00:09<00:00, 193.37it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.1976, Train Accuracy: 92.61%, Test Loss: 0.2784, Test Accuracy: 90.61%


100%|██████████| 1875/1875 [00:09<00:00, 193.49it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.1723, Train Accuracy: 93.62%, Test Loss: 0.2654, Test Accuracy: 91.34%


100%|██████████| 1875/1875 [00:09<00:00, 194.67it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.1518, Train Accuracy: 94.36%, Test Loss: 0.2802, Test Accuracy: 91.05%


100%|██████████| 1875/1875 [00:09<00:00, 192.06it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.1353, Train Accuracy: 95.06%, Test Loss: 0.2662, Test Accuracy: 91.65%


100%|██████████| 1875/1875 [00:10<00:00, 172.88it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.1218, Train Accuracy: 95.45%, Test Loss: 0.3012, Test Accuracy: 91.36%


100%|██████████| 1875/1875 [00:09<00:00, 190.41it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.1094, Train Accuracy: 95.90%, Test Loss: 0.3200, Test Accuracy: 90.51%


100%|██████████| 1875/1875 [00:09<00:00, 195.63it/s]


Epoch 10, Train Loss: 0.0997, Train Accuracy: 96.26%, Test Loss: 0.3374, Test Accuracy: 91.41%


Total Time Elapsed: 98.87939119338989 s


## VGG

In [11]:
class ModifiedVGG_A(nn.Module):
  def __init__(self):
    super(ModifiedVGG_A, self).__init__()
    self.conv1_1 = BasicConv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
    self.conv1_2 = BasicConv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)

    self.conv2_1 = BasicConv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
    self.conv2_2 = BasicConv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)

    self.conv3_1 = BasicConv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
    self.conv3_2 = BasicConv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
    self.conv3_3 = BasicConv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)

    self.conv4_1 = BasicConv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
    self.conv4_2 = BasicConv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
    self.conv4_3 = BasicConv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)

    self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

    self.fc1 = nn.Linear(1 * 1 * 512, 64)
    self.fc2 = nn.Linear(64, 64)
    self.fc3 = nn.Linear(64, 10)

  def forward(self, x):
    x = torch.relu(self.conv1_1(x))
    x = torch.relu(self.conv1_2(x))
    x = self.maxpool(x)
    x = torch.relu(self.conv2_1(x))
    x = torch.relu(self.conv2_2(x))
    x = self.maxpool(x)
    x = torch.relu(self.conv3_1(x))
    x = torch.relu(self.conv3_2(x))
    x = torch.relu(self.conv3_3(x))
    x = self.maxpool(x)
    x = torch.relu(self.conv4_1(x))
    x = torch.relu(self.conv4_2(x))
    x = torch.relu(self.conv4_3(x))
    x = self.maxpool(x)
    x = x.view(-1, 1 * 1 * 512)
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)

    return x

In [12]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

ModifiedVGGModel_A = ModifiedVGG_A().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ModifiedVGGModel_1.parameters(), lr=learning_rate)

train(ModifiedVGGModel_A, criterion, optimizer, num_epochs, vgg_train_loader, vgg_test_loader)

100%|██████████| 1875/1875 [00:19<00:00, 96.55it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.4477, Train Accuracy: 83.59%, Test Loss: 0.3525, Test Accuracy: 87.05%


100%|██████████| 1875/1875 [00:20<00:00, 93.46it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.2765, Train Accuracy: 90.12%, Test Loss: 0.2682, Test Accuracy: 90.51%


100%|██████████| 1875/1875 [00:19<00:00, 94.13it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.2361, Train Accuracy: 91.68%, Test Loss: 0.2619, Test Accuracy: 90.91%


100%|██████████| 1875/1875 [00:19<00:00, 94.62it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.2035, Train Accuracy: 92.85%, Test Loss: 0.2623, Test Accuracy: 90.76%


100%|██████████| 1875/1875 [00:19<00:00, 95.56it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.1779, Train Accuracy: 93.68%, Test Loss: 0.2159, Test Accuracy: 92.59%


100%|██████████| 1875/1875 [00:19<00:00, 96.48it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.1596, Train Accuracy: 94.27%, Test Loss: 0.2335, Test Accuracy: 92.15%


100%|██████████| 1875/1875 [00:19<00:00, 95.32it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.1382, Train Accuracy: 95.04%, Test Loss: 0.2079, Test Accuracy: 92.77%


100%|██████████| 1875/1875 [00:19<00:00, 96.14it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.1200, Train Accuracy: 95.67%, Test Loss: 0.2067, Test Accuracy: 93.19%


100%|██████████| 1875/1875 [00:19<00:00, 96.46it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.1037, Train Accuracy: 96.30%, Test Loss: 0.2278, Test Accuracy: 92.98%


100%|██████████| 1875/1875 [00:20<00:00, 91.60it/s]


Epoch 10, Train Loss: 0.0897, Train Accuracy: 96.81%, Test Loss: 0.2268, Test Accuracy: 92.97%


Total Time Elapsed: 197.37416219711304 s


In [13]:
class ModifiedVGG_B(nn.Module):
  def __init__(self):
    super(ModifiedVGG_B, self).__init__()
    self.conv1_1 = BasicConv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
    self.conv1_2 = BasicConv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)

    self.conv2_1 = BasicConv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
    self.conv2_2 = BasicConv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)

    self.conv3_1 = BasicConv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
    self.conv3_2 = BasicConv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
    self.conv3_3 = BasicConv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)

    self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

    self.fc1 = nn.Linear(3 * 3 * 256, 256)
    self.fc2 = nn.Linear(256, 256)
    self.fc3 = nn.Linear(256, 10)

  def forward(self, x):
    x = torch.relu(self.conv1_1(x))
    x = torch.relu(self.conv1_2(x))
    x = self.maxpool(x)
    x = torch.relu(self.conv2_1(x))
    x = torch.relu(self.conv2_2(x))
    x = self.maxpool(x)
    x = torch.relu(self.conv3_1(x))
    x = torch.relu(self.conv3_2(x))
    x = torch.relu(self.conv3_3(x))
    x = self.maxpool(x)
    x = x.view(-1, 3 * 3 * 256)
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)

    return x

In [14]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

ModifiedVGGModel_B = ModifiedVGG_B().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ModifiedVGGModel_B.parameters(), lr=learning_rate)

train(ModifiedVGGModel_B, criterion, optimizer, num_epochs, vgg_train_loader, vgg_test_loader)

100%|██████████| 1875/1875 [00:16<00:00, 112.59it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.3913, Train Accuracy: 85.60%, Test Loss: 0.2799, Test Accuracy: 89.66%


100%|██████████| 1875/1875 [00:15<00:00, 120.58it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.2518, Train Accuracy: 90.83%, Test Loss: 0.2683, Test Accuracy: 90.48%


100%|██████████| 1875/1875 [00:15<00:00, 120.51it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.2117, Train Accuracy: 92.32%, Test Loss: 0.2409, Test Accuracy: 91.45%


100%|██████████| 1875/1875 [00:15<00:00, 120.06it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.1849, Train Accuracy: 93.36%, Test Loss: 0.2247, Test Accuracy: 92.11%


100%|██████████| 1875/1875 [00:15<00:00, 119.93it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.1590, Train Accuracy: 94.17%, Test Loss: 0.2315, Test Accuracy: 92.06%


100%|██████████| 1875/1875 [00:15<00:00, 118.06it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.1395, Train Accuracy: 94.84%, Test Loss: 0.2025, Test Accuracy: 92.92%


100%|██████████| 1875/1875 [00:16<00:00, 116.78it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.1180, Train Accuracy: 95.60%, Test Loss: 0.2249, Test Accuracy: 92.75%


100%|██████████| 1875/1875 [00:15<00:00, 118.65it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.1023, Train Accuracy: 96.29%, Test Loss: 0.2393, Test Accuracy: 93.19%


100%|██████████| 1875/1875 [00:15<00:00, 119.50it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.0845, Train Accuracy: 96.85%, Test Loss: 0.2261, Test Accuracy: 92.95%


100%|██████████| 1875/1875 [00:16<00:00, 116.90it/s]


Epoch 10, Train Loss: 0.0754, Train Accuracy: 97.25%, Test Loss: 0.2533, Test Accuracy: 93.08%


Total Time Elapsed: 158.5027265548706 s


In [15]:
class ModifiedVGG_C(nn.Module):
  def __init__(self):
    super(ModifiedVGG_C, self).__init__()
    self.conv1_1 = BasicConv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
    self.conv1_2 = BasicConv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)

    self.conv2_1 = BasicConv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
    self.conv2_2 = BasicConv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)

    self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

    self.fc1 = nn.Linear(7 * 7 * 128, 1024)
    self.fc2 = nn.Linear(1024, 1024)
    self.fc3 = nn.Linear(1024, 10)

  def forward(self, x):
    x = torch.relu(self.conv1_1(x))
    x = torch.relu(self.conv1_2(x))
    x = self.maxpool(x)
    x = torch.relu(self.conv2_1(x))
    x = torch.relu(self.conv2_2(x))
    x = self.maxpool(x)
    x = x.view(-1, 7 * 7 * 128)
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)

    return x

In [16]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

ModifiedVGGModel_C = ModifiedVGG_C().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ModifiedVGGModel_C.parameters(), lr=learning_rate)

train(ModifiedVGGModel_C, criterion, optimizer, num_epochs, vgg_train_loader, vgg_test_loader)

100%|██████████| 1875/1875 [00:12<00:00, 145.42it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.3786, Train Accuracy: 86.53%, Test Loss: 0.2887, Test Accuracy: 89.96%


100%|██████████| 1875/1875 [00:14<00:00, 131.53it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.2416, Train Accuracy: 91.29%, Test Loss: 0.2589, Test Accuracy: 90.69%


100%|██████████| 1875/1875 [00:12<00:00, 146.38it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.1986, Train Accuracy: 92.74%, Test Loss: 0.2351, Test Accuracy: 91.82%


100%|██████████| 1875/1875 [00:14<00:00, 133.18it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.1700, Train Accuracy: 93.68%, Test Loss: 0.2353, Test Accuracy: 92.03%


100%|██████████| 1875/1875 [00:14<00:00, 128.67it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.1413, Train Accuracy: 94.82%, Test Loss: 0.2194, Test Accuracy: 92.83%


100%|██████████| 1875/1875 [00:12<00:00, 146.42it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.1201, Train Accuracy: 95.52%, Test Loss: 0.2398, Test Accuracy: 92.66%


100%|██████████| 1875/1875 [00:12<00:00, 146.65it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.0992, Train Accuracy: 96.36%, Test Loss: 0.2502, Test Accuracy: 92.96%


100%|██████████| 1875/1875 [00:13<00:00, 141.83it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.0792, Train Accuracy: 97.06%, Test Loss: 0.2398, Test Accuracy: 92.87%


100%|██████████| 1875/1875 [00:12<00:00, 146.10it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.0665, Train Accuracy: 97.65%, Test Loss: 0.2698, Test Accuracy: 93.24%


100%|██████████| 1875/1875 [00:12<00:00, 146.44it/s]


Epoch 10, Train Loss: 0.0550, Train Accuracy: 98.06%, Test Loss: 0.2692, Test Accuracy: 93.00%


Total Time Elapsed: 133.07530236244202 s


# ResNet

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block,  num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(1, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, 2, stride=1)
        self.layer2 = self._make_layer(block, 128, 2, stride=2)
        self.layer3 = self._make_layer(block, 256, 2, stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [7]:
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet2(nn.Module):
    def __init__(self, block,  num_classes=10):
        super(ResNet2, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(1, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, 2, stride=1)
        self.layer2 = self._make_layer(block, 128, 2, stride=2)
        self.layer3 = self._make_layer(block, 256, 2, stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out,4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [8]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

ResNetModel_2 = ResNet2(BasicBlock, num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ResNetModel_2.parameters(), lr=learning_rate)

train(ResNetModel_2, criterion, optimizer, num_epochs, resnet_train_loader, resnet_test_loader)

100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:42<00:00, 44.29it/s]


Epoch 1, Train Loss: 0.3766, Train Accuracy: 86.30%, Test Loss: 0.3068, Test Accuracy: 89.11%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:24<00:00, 77.06it/s]


Epoch 2, Train Loss: 0.2539, Train Accuracy: 90.79%, Test Loss: 0.2429, Test Accuracy: 91.18%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:24<00:00, 78.09it/s]


Epoch 3, Train Loss: 0.2178, Train Accuracy: 92.04%, Test Loss: 0.2171, Test Accuracy: 92.25%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:24<00:00, 77.10it/s]


Epoch 4, Train Loss: 0.1896, Train Accuracy: 93.02%, Test Loss: 0.2216, Test Accuracy: 91.91%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:24<00:00, 76.85it/s]


Epoch 5, Train Loss: 0.1626, Train Accuracy: 94.02%, Test Loss: 0.2196, Test Accuracy: 92.45%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:23<00:00, 79.47it/s]


Epoch 6, Train Loss: 0.1398, Train Accuracy: 94.79%, Test Loss: 0.2147, Test Accuracy: 92.38%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:23<00:00, 79.24it/s]


Epoch 7, Train Loss: 0.1184, Train Accuracy: 95.60%, Test Loss: 0.2132, Test Accuracy: 92.96%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:23<00:00, 78.99it/s]


Epoch 8, Train Loss: 0.0977, Train Accuracy: 96.32%, Test Loss: 0.2087, Test Accuracy: 93.19%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:24<00:00, 77.64it/s]


Epoch 9, Train Loss: 0.0782, Train Accuracy: 97.08%, Test Loss: 0.2463, Test Accuracy: 92.42%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:24<00:00, 78.06it/s]


Epoch 10, Train Loss: 0.0642, Train Accuracy: 97.63%, Test Loss: 0.2328, Test Accuracy: 93.09%


Total Time Elapsed: 258.5876362323761 s


In [9]:
import torch.nn.functional as F
import torchvision as tv

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        self.offset_conv = nn.Conv2d(in_planes, 18, kernel_size=3, stride=stride, padding=1)
        self.conv1 = tv.ops.DeformConv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        offset = self.offset_conv(x)
        out = self.conv1(x, offset)
        out = F.relu(self.bn1(out))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = tv.ops.DeformConv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet3(nn.Module):
    def __init__(self, block,  num_classes=10):
        super(ResNet3, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(1, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, 2, stride=1)
        self.layer2 = self._make_layer(block, 128, 2, stride=2)
        self.layer3 = self._make_layer(block, 256, 2, stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [10]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

ResNetModel_3 = ResNet3(BasicBlock, num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ResNetModel_3.parameters(), lr=learning_rate)

train(ResNetModel_3, criterion, optimizer, num_epochs, resnet_train_loader, resnet_test_loader)

100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:35<00:00, 52.78it/s]


Epoch 1, Train Loss: 0.9444, Train Accuracy: 65.76%, Test Loss: 0.9425, Test Accuracy: 66.63%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:34<00:00, 53.66it/s]


Epoch 2, Train Loss: 0.7566, Train Accuracy: 72.55%, Test Loss: 0.6018, Test Accuracy: 78.19%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:34<00:00, 53.74it/s]


Epoch 3, Train Loss: 0.6518, Train Accuracy: 76.43%, Test Loss: 0.6568, Test Accuracy: 76.68%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:35<00:00, 53.47it/s]


Epoch 4, Train Loss: 0.5722, Train Accuracy: 79.27%, Test Loss: 0.6241, Test Accuracy: 76.99%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:34<00:00, 53.93it/s]


Epoch 5, Train Loss: 0.5121, Train Accuracy: 81.07%, Test Loss: 0.4641, Test Accuracy: 82.96%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:36<00:00, 51.02it/s]


Epoch 6, Train Loss: 0.4764, Train Accuracy: 82.48%, Test Loss: 0.4900, Test Accuracy: 82.29%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:34<00:00, 53.87it/s]


Epoch 7, Train Loss: 0.4190, Train Accuracy: 84.69%, Test Loss: 0.4136, Test Accuracy: 84.85%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:34<00:00, 53.70it/s]


Epoch 8, Train Loss: 0.3774, Train Accuracy: 86.11%, Test Loss: 0.3698, Test Accuracy: 86.41%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:34<00:00, 53.98it/s]


Epoch 9, Train Loss: 0.3491, Train Accuracy: 86.99%, Test Loss: 0.3843, Test Accuracy: 85.85%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:34<00:00, 53.98it/s]


Epoch 10, Train Loss: 0.3341, Train Accuracy: 87.70%, Test Loss: 0.3471, Test Accuracy: 87.30%


Total Time Elapsed: 351.1620569229126 s


# Inceptionv3

In [7]:
# Inception Building Blocks
class InceptionA(nn.Module):
  def __init__(self, channels_in, pool_channels):
    super(InceptionA, self).__init__()
    self.branch1x1 = BasicConv2d(channels_in, 64, 1, stride=1, padding=0)
    self.branch5x5 = nn.Sequential(
        BasicConv2d(channels_in, 48, 1, stride=1, padding=0),
        BasicConv2d(48, 64, 5, stride=1, padding=2)
    )
    self.branch3x3dbl = nn.Sequential(
        BasicConv2d(channels_in, 64, 1, stride=1, padding=0),
        BasicConv2d(64, 96, 3, stride=1, padding=1),
        BasicConv2d(96, 96, 3, stride=1, padding=1)
    )
    self.branch_pool = nn.Sequential(
        nn.AvgPool2d(3, stride=1, padding=1),
        BasicConv2d(channels_in, pool_channels, 1, stride=1, padding=0)
    )

  def forward(self, x):
    outputs = [self.branch1x1(x), self.branch5x5(x), self.branch3x3dbl(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 64 + 64 + 96 + pool_channels

class InceptionA_Reduction(nn.Module):
  def __init__(self, channels_in):
    super(InceptionA_Reduction, self).__init__()
    self.branch3x3 = BasicConv2d(channels_in, 384, 3, stride=2, padding=1)
    self.branch3x3dbl = nn.Sequential(
        BasicConv2d(channels_in, 64, 1, padding=0),
        BasicConv2d(64, 96, 3, padding=1),
        BasicConv2d(96, 96, 3, stride=2, padding=1)
    )
    self.branch_pool = nn.MaxPool2d(3, stride=2, padding=1)

  def forward(self, x):
    outputs = [self.branch3x3(x), self.branch3x3dbl(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 384 + 96 + channels_in

class InceptionB(nn.Module):
  def __init__(self, channels_in, channels_7x7):
    super(InceptionB, self).__init__()
    self.branch1x1 = BasicConv2d(channels_in, 192, 1, stride=1, padding=0)
    self.branch7x7 = nn.Sequential(
        BasicConv2d(channels_in, channels_7x7, 1, stride=1, padding=0),
        BasicConv2d(channels_7x7, channels_7x7, (7, 1), stride=1, padding=(3, 0)),
        BasicConv2d(channels_7x7, 192, (1, 7), stride=1, padding=(0, 3))
    )
    self.branch7x7dbl = nn.Sequential(
        BasicConv2d(channels_in, channels_7x7, 1, stride=1, padding=0),
        BasicConv2d(channels_7x7, channels_7x7, (7, 1), stride=1, padding=(3, 0)),
        BasicConv2d(channels_7x7, channels_7x7, (1, 7), stride=1, padding=(0, 3)),
        BasicConv2d(channels_7x7, channels_7x7, (7, 1), stride=1, padding=(3, 0)),
        BasicConv2d(channels_7x7, 192, (1, 7), stride=1, padding=(0, 3))
    )
    self.branch_pool = nn.Sequential(
        nn.AvgPool2d(3, stride=1, padding=1),
        BasicConv2d(channels_in, 192, 1, stride=1, padding=0)
    )

  def forward(self, x):
    outputs = [self.branch1x1(x), self.branch7x7(x), self.branch7x7dbl(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 192 + 192 + 192 + 192 = 768 channels

class InceptionB_Reduction(nn.Module):
  def __init__(self, channels_in):
    super(InceptionB_Reduction, self).__init__()
    self.branch3x3 = nn.Sequential(
      BasicConv2d(channels_in, 192, 1, stride=1, padding=0),
      BasicConv2d(192, 320, 3, stride=2, padding=1)
    )
    self.branch7x7x3 = nn.Sequential(
      BasicConv2d(channels_in, 192, 1, stride=1, padding=0),
      BasicConv2d(192, 192, (1, 7), stride=1, padding=(0, 3)),
      BasicConv2d(192, 192, (7, 1), stride=1, padding=(3, 0)),
      BasicConv2d(192, 192, 3, stride=2, padding=1)
    )
    self.branch_pool = nn.MaxPool2d(3, stride=2, padding=1)

  def forward(self, x):
    outputs = [self.branch3x3(x), self.branch7x7x3(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 320+ 192 + channels_in

class InceptionC(nn.Module):
  def __init__(self, channels_in):
    super(InceptionC, self).__init__()
    self.branch1x1 = BasicConv2d(channels_in, 320, 1, stride=1, padding=0)

    self.branch3x3_1 = BasicConv2d(channels_in, 384, 1, stride=1, padding=0)
    self.branch3x3_2a = BasicConv2d(384, 384, (1, 3), stride=1, padding=(0, 1))
    self.branch3x3_2b = BasicConv2d(384, 384, (3, 1), stride=1, padding=(1, 0))

    self.branch3x3dbl_1 = nn.Sequential(
        BasicConv2d(channels_in, 448, 1, stride=1, padding=0),
        BasicConv2d(448, 384, 3, stride=1, padding=1)
    )
    self.branch3x3dbl_2a = BasicConv2d(384, 384, (1, 3), stride=1, padding=(0, 1))
    self.branch3x3dbl_2b = BasicConv2d(384, 384, (3, 1), stride=1, padding=(1, 0))

    self.branch_pool = nn.Sequential(
        nn.AvgPool2d(3, stride=1, padding=1),
        BasicConv2d(channels_in, 192, 1, stride=1, padding=0)
    )

  def forward(self, x):
    branch1x1 = self.branch1x1(x)

    branch3x3 = self.branch3x3_1(x)
    branch3x3 = torch.cat([self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)], 1)

    branch3x3dbl = self.branch3x3dbl_1(x)
    branch3x3dbl = torch.cat([self.branch3x3dbl_2a(branch3x3dbl), self.branch3x3dbl_2b(branch3x3dbl)], 1)

    branch_pool = self.branch_pool(x)

    outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
    return torch.cat(outputs, 1)  # 320 + 768 + 768 + 192 = 2048 channels

In [20]:
class ModifiedInception_A(nn.Module):
  def __init__(self):
    super(ModifiedInception_A, self).__init__()
    self.in_block = nn.Sequential(
      BasicConv2d(1, 32, 3, stride=2, padding=0), # 15x15x32
      BasicConv2d(32, 64, 3, stride=1, padding=0), # 13x13x32
      BasicConv2d(64, 64, 3, stride=1, padding=1), # 13x13x64
      nn.MaxPool2d(kernel_size=3, stride=2), # 6x6x64
    )
    self.mix_block = nn.Sequential(
      InceptionA(64, 32),
      InceptionA(256, 64),
      InceptionA_Reduction(288),
      InceptionB(768, 128),
      InceptionB(768, 160),
      InceptionB(768, 192),
      InceptionB_Reduction(768),
      InceptionC(1280),
      InceptionC(2048),
    )
    self.out_block = nn.Sequential(
      nn.AdaptiveAvgPool2d(1),
      nn.Dropout(0.2)
    )
    self.fc = nn.Linear(2048, 10)

  def forward(self, x):
    x = self.in_block(x)
    x = self.mix_block(x)
    x = self.out_block(x)
    x = torch.flatten(x, 1)
    x = self.fc(x)

    return x

In [21]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

ModifiedInceptionModel_A = ModifiedInception_A().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ModifiedInceptionModel_A.parameters(), lr=learning_rate)

train(ModifiedInceptionModel_A, criterion, optimizer, num_epochs, inception_train_loader, inception_test_loader)

100%|██████████| 1875/1875 [01:27<00:00, 21.36it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.6057, Train Accuracy: 77.96%, Test Loss: 0.4125, Test Accuracy: 85.86%


100%|██████████| 1875/1875 [01:28<00:00, 21.26it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.3783, Train Accuracy: 86.69%, Test Loss: 0.3521, Test Accuracy: 87.66%


100%|██████████| 1875/1875 [01:29<00:00, 21.01it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.3215, Train Accuracy: 88.64%, Test Loss: 0.3133, Test Accuracy: 89.01%


100%|██████████| 1875/1875 [01:31<00:00, 20.45it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.2875, Train Accuracy: 89.81%, Test Loss: 0.2870, Test Accuracy: 90.23%


100%|██████████| 1875/1875 [01:31<00:00, 20.55it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.2528, Train Accuracy: 90.98%, Test Loss: 0.2805, Test Accuracy: 90.31%


100%|██████████| 1875/1875 [01:30<00:00, 20.79it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.2307, Train Accuracy: 91.74%, Test Loss: 0.2514, Test Accuracy: 91.32%


100%|██████████| 1875/1875 [01:29<00:00, 20.94it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.2086, Train Accuracy: 92.45%, Test Loss: 0.2813, Test Accuracy: 90.77%


100%|██████████| 1875/1875 [01:28<00:00, 21.12it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.1885, Train Accuracy: 93.26%, Test Loss: 0.2871, Test Accuracy: 90.50%


100%|██████████| 1875/1875 [01:28<00:00, 21.27it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.1709, Train Accuracy: 93.77%, Test Loss: 0.2524, Test Accuracy: 91.37%


100%|██████████| 1875/1875 [01:29<00:00, 21.01it/s]


Epoch 10, Train Loss: 0.1527, Train Accuracy: 94.46%, Test Loss: 0.2537, Test Accuracy: 91.74%


Total Time Elapsed: 894.1422131061554 s


In [22]:
class ModifiedInception_B(nn.Module):
  def __init__(self):
    super(ModifiedInception_B, self).__init__()
    self.in_block = nn.Sequential(
      BasicConv2d(1, 32, 3, stride=2, padding=0), # 15x15x32
      BasicConv2d(32, 64, 3, stride=1, padding=0), # 13x13x32
      BasicConv2d(64, 64, 3, stride=1, padding=1), # 13x13x64
      nn.MaxPool2d(kernel_size=3, stride=2), # 6x6x64
    )
    self.mix_block = nn.Sequential(
      InceptionA(64, 64),
      InceptionA_Reduction(288),
      InceptionB(768, 128),
      InceptionB(768, 192),
      InceptionB_Reduction(768),
      InceptionC(1280),
    )
    self.out_block = nn.Sequential(
      nn.AdaptiveAvgPool2d(1),
      nn.Dropout(0.2)
    )
    self.fc = nn.Linear(2048, 10)

  def forward(self, x):
    x = self.in_block(x)
    x = self.mix_block(x)
    x = self.out_block(x)
    x = torch.flatten(x, 1)
    x = self.fc(x)

    return x

In [23]:
# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

ModifiedInceptionModel_B = ModifiedInception_B().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ModifiedInceptionModel_B.parameters(), lr=learning_rate)

train(ModifiedInceptionModel_B, criterion, optimizer, num_epochs, inception_train_loader, inception_test_loader)

100%|██████████| 1875/1875 [01:13<00:00, 25.52it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.5388, Train Accuracy: 80.66%, Test Loss: 0.3790, Test Accuracy: 86.33%


100%|██████████| 1875/1875 [01:13<00:00, 25.59it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.3469, Train Accuracy: 87.56%, Test Loss: 0.3489, Test Accuracy: 87.48%


100%|██████████| 1875/1875 [01:11<00:00, 26.15it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.2948, Train Accuracy: 89.21%, Test Loss: 0.2778, Test Accuracy: 89.96%


100%|██████████| 1875/1875 [01:12<00:00, 25.88it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.2590, Train Accuracy: 90.58%, Test Loss: 0.2864, Test Accuracy: 89.59%


100%|██████████| 1875/1875 [01:10<00:00, 26.49it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.2379, Train Accuracy: 91.40%, Test Loss: 0.2878, Test Accuracy: 89.29%


100%|██████████| 1875/1875 [01:11<00:00, 26.11it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.2108, Train Accuracy: 92.27%, Test Loss: 0.2703, Test Accuracy: 90.70%


100%|██████████| 1875/1875 [01:11<00:00, 26.39it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.1911, Train Accuracy: 92.93%, Test Loss: 0.2505, Test Accuracy: 91.42%


100%|██████████| 1875/1875 [01:09<00:00, 26.95it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.1717, Train Accuracy: 93.72%, Test Loss: 0.2552, Test Accuracy: 91.14%


100%|██████████| 1875/1875 [01:10<00:00, 26.74it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.1604, Train Accuracy: 94.24%, Test Loss: 0.2531, Test Accuracy: 91.40%


100%|██████████| 1875/1875 [01:11<00:00, 26.40it/s]


Epoch 10, Train Loss: 0.1367, Train Accuracy: 94.97%, Test Loss: 0.2523, Test Accuracy: 91.42%


Total Time Elapsed: 715.276772737503 s


In [6]:
# Inception Building Blocks
class InceptionA(nn.Module):
  def __init__(self, channels_in, pool_channels):
    super(InceptionA, self).__init__()
    self.branch1x1 = BasicConv2d(channels_in, 32, 1, stride=1, padding=0)
    self.branch5x5 = nn.Sequential(
        BasicConv2d(channels_in, 24, 1, stride=1, padding=0),
        BasicConv2d(24, 32, 5, stride=1, padding=2)
    )
    self.branch3x3dbl = nn.Sequential(
        BasicConv2d(channels_in, 32, 1, stride=1, padding=0),
        BasicConv2d(32, 48, 3, stride=1, padding=1),
        BasicConv2d(48, 48, 3, stride=1, padding=1)
    )
    self.branch_pool = nn.Sequential(
        nn.AvgPool2d(3, stride=1, padding=1),
        BasicConv2d(channels_in, pool_channels, 1, stride=1, padding=0)
    )

  def forward(self, x):
    outputs = [self.branch1x1(x), self.branch5x5(x), self.branch3x3dbl(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 32 + 32 + 48 + pool_channels

class InceptionA_Reduction(nn.Module):
  def __init__(self, channels_in):
    super(InceptionA_Reduction, self).__init__()
    self.branch3x3 = BasicConv2d(channels_in, 192, 3, stride=2, padding=1)
    self.branch3x3dbl = nn.Sequential(
        BasicConv2d(channels_in, 32, 1, padding=0),
        BasicConv2d(32, 48, 3, padding=1),
        BasicConv2d(48, 48, 3, stride=2, padding=1)
    )
    self.branch_pool = nn.MaxPool2d(3, stride=2, padding=1)

  def forward(self, x):
    outputs = [self.branch3x3(x), self.branch3x3dbl(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 192 + 48 + channels_in

class InceptionB(nn.Module):
  def __init__(self, channels_in, channels_7x7):
    super(InceptionB, self).__init__()
    self.branch1x1 = BasicConv2d(channels_in, 96, 1, stride=1, padding=0)
    self.branch7x7 = nn.Sequential(
        BasicConv2d(channels_in, channels_7x7, 1, stride=1, padding=0),
        BasicConv2d(channels_7x7, channels_7x7, (7, 1), stride=1, padding=(3, 0)),
        BasicConv2d(channels_7x7, 96, (1, 7), stride=1, padding=(0, 3))
    )
    self.branch7x7dbl = nn.Sequential(
        BasicConv2d(channels_in, channels_7x7, 1, stride=1, padding=0),
        BasicConv2d(channels_7x7, channels_7x7, (7, 1), stride=1, padding=(3, 0)),
        BasicConv2d(channels_7x7, channels_7x7, (1, 7), stride=1, padding=(0, 3)),
        BasicConv2d(channels_7x7, channels_7x7, (7, 1), stride=1, padding=(3, 0)),
        BasicConv2d(channels_7x7, 96, (1, 7), stride=1, padding=(0, 3))
    )
    self.branch_pool = nn.Sequential(
        nn.AvgPool2d(3, stride=1, padding=1),
        BasicConv2d(channels_in, 96, 1, stride=1, padding=0)
    )

  def forward(self, x):
    outputs = [self.branch1x1(x), self.branch7x7(x), self.branch7x7dbl(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 96 + 96 + 96 + 96 = 384 channels

class InceptionB_Reduction(nn.Module):
  def __init__(self, channels_in):
    super(InceptionB_Reduction, self).__init__()
    self.branch3x3 = nn.Sequential(
      BasicConv2d(channels_in, 96, 1, stride=1, padding=0),
      BasicConv2d(96, 160, 3, stride=2, padding=1)
    )
    self.branch7x7x3 = nn.Sequential(
      BasicConv2d(channels_in, 96, 1, stride=1, padding=0),
      BasicConv2d(96, 96, (1, 7), stride=1, padding=(0, 3)),
      BasicConv2d(96, 96, (7, 1), stride=1, padding=(3, 0)),
      BasicConv2d(96, 96, 3, stride=2, padding=1)
    )
    self.branch_pool = nn.MaxPool2d(3, stride=2, padding=1)

  def forward(self, x):
    outputs = [self.branch3x3(x), self.branch7x7x3(x), self.branch_pool(x)]
    return torch.cat(outputs, 1)  # 160 + 96 + channels_in

class InceptionC(nn.Module):
  def __init__(self, channels_in):
    super(InceptionC, self).__init__()
    self.branch1x1 = BasicConv2d(channels_in, 160, 1, stride=1, padding=0)

    self.branch3x3_1 = BasicConv2d(channels_in, 192, 1, stride=1, padding=0)
    self.branch3x3_2a = BasicConv2d(192, 192, (1, 3), stride=1, padding=(0, 1))
    self.branch3x3_2b = BasicConv2d(192, 192, (3, 1), stride=1, padding=(1, 0))

    self.branch3x3dbl_1 = nn.Sequential(
        BasicConv2d(channels_in, 224, 1, stride=1, padding=0),
        BasicConv2d(224, 192, 3, stride=1, padding=1)
    )
    self.branch3x3dbl_2a = BasicConv2d(192, 192, (1, 3), stride=1, padding=(0, 1))
    self.branch3x3dbl_2b = BasicConv2d(192, 192, (3, 1), stride=1, padding=(1, 0))

    self.branch_pool = nn.Sequential(
        nn.AvgPool2d(3, stride=1, padding=1),
        BasicConv2d(channels_in, 96, 1, stride=1, padding=0)
    )

  def forward(self, x):
    branch1x1 = self.branch1x1(x)

    branch3x3 = self.branch3x3_1(x)
    branch3x3 = torch.cat([self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)], 1)

    branch3x3dbl = self.branch3x3dbl_1(x)
    branch3x3dbl = torch.cat([self.branch3x3dbl_2a(branch3x3dbl), self.branch3x3dbl_2b(branch3x3dbl)], 1)

    branch_pool = self.branch_pool(x)

    outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
    return torch.cat(outputs, 1)  # 160 + 384 + 384 + 96 = 1024 channels
      
class ModifiedInception_C(nn.Module):
  def __init__(self):
    super(ModifiedInception_C, self).__init__()
    self.in_block = nn.Sequential(
      BasicConv2d(1, 16, 3, stride=2, padding=0),
      BasicConv2d(16, 32, 3, stride=1, padding=0),
      BasicConv2d(32, 32, 3, stride=1, padding=1),
      nn.MaxPool2d(kernel_size=3, stride=2),
      BasicConv2d(32, 40, 3, stride=1, padding=0),
      BasicConv2d(40, 96, 3, stride=1, padding=1),
      nn.MaxPool2d(kernel_size=3, stride=2),
    )
    self.mix_block = nn.Sequential(
      InceptionA(96, 16),
      InceptionA(128, 32),
      InceptionA(144, 32),
      InceptionA_Reduction(144),
      InceptionB(384, 64),
      InceptionB(384, 80),
      InceptionB(384, 80),
      InceptionB(384, 96),
      InceptionB_Reduction(384),
      InceptionC(640),
      InceptionC(1024),
    )
    self.out_block = nn.Sequential(
      nn.AdaptiveAvgPool2d(1),
      nn.Dropout(0.2)
    )
    self.fc = nn.Linear(1024, 10)

  def forward(self, x):
    x = self.in_block(x)
    x = self.mix_block(x)
    x = self.out_block(x)
    x = torch.flatten(x, 1)
    x = self.fc(x)

    return x

In [7]:
# defining hyperparameters
learning_rate = 0.005
num_epochs = 10

ModifiedInceptionModel_C = ModifiedInception_C().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ModifiedInceptionModel_C.parameters(), lr=learning_rate)

train(ModifiedInceptionModel_C, criterion, optimizer, num_epochs, inception_train_loader, inception_test_loader)

100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:47<00:00, 17.38it/s]


Epoch 1, Train Loss: 1.0498, Train Accuracy: 62.83%, Test Loss: 0.7420, Test Accuracy: 71.32%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:45<00:00, 17.76it/s]


Epoch 2, Train Loss: 0.6453, Train Accuracy: 77.01%, Test Loss: 0.5501, Test Accuracy: 81.45%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:46<00:00, 17.68it/s]


Epoch 3, Train Loss: 0.5140, Train Accuracy: 82.48%, Test Loss: 0.4307, Test Accuracy: 84.98%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:47<00:00, 17.51it/s]


Epoch 4, Train Loss: 0.4317, Train Accuracy: 85.19%, Test Loss: 0.3943, Test Accuracy: 86.22%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:46<00:00, 17.60it/s]


Epoch 5, Train Loss: 0.3896, Train Accuracy: 86.64%, Test Loss: 0.3991, Test Accuracy: 84.81%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:46<00:00, 17.59it/s]


Epoch 6, Train Loss: 0.3581, Train Accuracy: 87.64%, Test Loss: 0.3288, Test Accuracy: 88.19%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:46<00:00, 17.55it/s]


Epoch 7, Train Loss: 0.3326, Train Accuracy: 88.52%, Test Loss: 0.3916, Test Accuracy: 86.85%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:46<00:00, 17.59it/s]


Epoch 8, Train Loss: 0.3085, Train Accuracy: 89.46%, Test Loss: 0.3093, Test Accuracy: 89.34%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:46<00:00, 17.61it/s]


Epoch 9, Train Loss: 0.2924, Train Accuracy: 89.75%, Test Loss: 0.2984, Test Accuracy: 89.30%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:46<00:00, 17.63it/s]


Epoch 10, Train Loss: 0.2787, Train Accuracy: 90.38%, Test Loss: 0.3005, Test Accuracy: 89.03%


Total Time Elapsed: 1066.0327453613281 s


---
# Transfer Learning

## VGG16

In [24]:
from torchvision.models import vgg16, VGG16_Weights
PretrainedVGGModel = vgg16(weights=VGG16_Weights.DEFAULT).to(device)

# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(PretrainedVGGModel.parameters(), lr=learning_rate)

train(PretrainedVGGModel, criterion, optimizer, num_epochs, vgg_channel_train_loader, vgg_channel_test_loader)

Using cache found in /home/UG/chua0994/.cache/torch/hub/pytorch_vision_v0.7.0
100%|██████████| 1875/1875 [05:55<00:00,  5.28it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.6592, Train Accuracy: 77.74%, Test Loss: 0.3884, Test Accuracy: 86.32%


100%|██████████| 1875/1875 [05:55<00:00,  5.28it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.4084, Train Accuracy: 85.13%, Test Loss: 0.3411, Test Accuracy: 87.43%


100%|██████████| 1875/1875 [05:55<00:00,  5.27it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.5393, Train Accuracy: 83.97%, Test Loss: 0.3472, Test Accuracy: 87.17%


100%|██████████| 1875/1875 [05:55<00:00,  5.27it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.3519, Train Accuracy: 87.47%, Test Loss: 0.3073, Test Accuracy: 88.89%


100%|██████████| 1875/1875 [05:54<00:00,  5.29it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.3033, Train Accuracy: 89.01%, Test Loss: 0.2885, Test Accuracy: 89.73%


100%|██████████| 1875/1875 [05:53<00:00,  5.30it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.3618, Train Accuracy: 88.62%, Test Loss: 0.3243, Test Accuracy: 88.31%


100%|██████████| 1875/1875 [05:53<00:00,  5.31it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.3883, Train Accuracy: 88.02%, Test Loss: 0.2806, Test Accuracy: 89.69%


100%|██████████| 1875/1875 [05:53<00:00,  5.31it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.2824, Train Accuracy: 89.71%, Test Loss: 0.2957, Test Accuracy: 89.42%


100%|██████████| 1875/1875 [05:53<00:00,  5.30it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.2676, Train Accuracy: 90.32%, Test Loss: 0.2930, Test Accuracy: 89.06%


100%|██████████| 1875/1875 [05:53<00:00,  5.30it/s]


Epoch 10, Train Loss: 0.2654, Train Accuracy: 90.32%, Test Loss: 0.2881, Test Accuracy: 89.52%


Total Time Elapsed: 3543.9587218761444 s


## ResNet

In [11]:
from torchvision.models import resnet18, ResNet18_Weights
PretrainedResNetModel = resnet18(weights=ResNet18_Weights.DEFAULT).to(device)

# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(PretrainedResNetModel.parameters(), lr=learning_rate)

train(PretrainedResNetModel, criterion, optimizer, num_epochs, inception_channel_train_loader, inception_channel_test_loader)

100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:47<00:00,  8.24it/s]


Epoch 1, Train Loss: 0.3462, Train Accuracy: 87.88%, Test Loss: 0.2795, Test Accuracy: 89.69%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:46<00:00,  8.26it/s]


Epoch 2, Train Loss: 0.2351, Train Accuracy: 91.66%, Test Loss: 0.2259, Test Accuracy: 92.34%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:46<00:00,  8.28it/s]


Epoch 3, Train Loss: 0.1964, Train Accuracy: 92.94%, Test Loss: 0.2056, Test Accuracy: 92.75%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:47<00:00,  8.23it/s]


Epoch 4, Train Loss: 0.1717, Train Accuracy: 93.86%, Test Loss: 0.1899, Test Accuracy: 93.57%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:46<00:00,  8.29it/s]


Epoch 5, Train Loss: 0.1434, Train Accuracy: 94.82%, Test Loss: 0.1904, Test Accuracy: 93.62%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:45<00:00,  8.31it/s]


Epoch 6, Train Loss: 0.1146, Train Accuracy: 95.87%, Test Loss: 0.1987, Test Accuracy: 93.57%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:45<00:00,  8.32it/s]


Epoch 7, Train Loss: 0.0888, Train Accuracy: 96.76%, Test Loss: 0.1955, Test Accuracy: 94.08%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:45<00:00,  8.31it/s]


Epoch 8, Train Loss: 0.0635, Train Accuracy: 97.75%, Test Loss: 0.2241, Test Accuracy: 93.50%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [03:45<00:00,  8.32it/s]


Epoch 9, Train Loss: 0.0495, Train Accuracy: 98.19%, Test Loss: 0.2703, Test Accuracy: 92.92%


100%|██████████████████████████████████████████████████████████████████████████████| 1875/1875 [04:25<00:00,  7.07it/s]


Epoch 10, Train Loss: 0.0395, Train Accuracy: 98.56%, Test Loss: 0.2895, Test Accuracy: 93.22%


Total Time Elapsed: 2302.2743515968323 s


## Inception V3

In [25]:
from torchvision.models import inception_v3, Inception_V3_Weights
PretrainedInceptionModel = inception_v3(weights=Inception_V3_Weights.DEFAULT).to(device)

# defining hyperparameters
learning_rate = 0.001
num_epochs = 10

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(PretrainedInceptionModel.parameters(), lr=learning_rate)

train(PretrainedInceptionModel, criterion, optimizer, num_epochs, inception_channel_train_loader, inception_channel_test_loader)

Using cache found in /home/UG/chua0994/.cache/torch/hub/pytorch_vision_v0.7.0
100%|██████████| 1875/1875 [06:02<00:00,  5.17it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.3688, Train Accuracy: 87.24%, Test Loss: 0.2656, Test Accuracy: 90.61%


100%|██████████| 1875/1875 [06:02<00:00,  5.17it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 2, Train Loss: 0.2514, Train Accuracy: 91.20%, Test Loss: 0.2228, Test Accuracy: 92.36%


100%|██████████| 1875/1875 [06:02<00:00,  5.17it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 3, Train Loss: 0.2141, Train Accuracy: 92.45%, Test Loss: 0.1988, Test Accuracy: 93.06%


100%|██████████| 1875/1875 [06:03<00:00,  5.16it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 4, Train Loss: 0.1881, Train Accuracy: 93.30%, Test Loss: 0.2186, Test Accuracy: 92.00%


100%|██████████| 1875/1875 [06:02<00:00,  5.17it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 5, Train Loss: 0.1670, Train Accuracy: 94.10%, Test Loss: 0.2732, Test Accuracy: 90.44%


100%|██████████| 1875/1875 [06:02<00:00,  5.18it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 6, Train Loss: 0.1453, Train Accuracy: 94.88%, Test Loss: 0.1810, Test Accuracy: 93.82%


100%|██████████| 1875/1875 [06:02<00:00,  5.17it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 7, Train Loss: 0.1264, Train Accuracy: 95.48%, Test Loss: 0.1786, Test Accuracy: 93.89%


100%|██████████| 1875/1875 [05:56<00:00,  5.26it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 8, Train Loss: 0.1086, Train Accuracy: 96.04%, Test Loss: 0.1774, Test Accuracy: 94.19%


100%|██████████| 1875/1875 [05:58<00:00,  5.24it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 9, Train Loss: 0.0920, Train Accuracy: 96.62%, Test Loss: 0.2234, Test Accuracy: 93.35%


100%|██████████| 1875/1875 [05:57<00:00,  5.25it/s]


Epoch 10, Train Loss: 0.0753, Train Accuracy: 97.28%, Test Loss: 0.1996, Test Accuracy: 94.35%


Total Time Elapsed: 3610.4280977249146 s
