In [1]:
import os
import math
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import random_split

from torchvision.datasets import CIFAR10

from model_architecture import *

# Dataset preparation

In [2]:
def get_dataset(train_val_split = 0.9):

    transform  = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    
    train_data = CIFAR10(root='./data', train = True, download = first_run, transform = transform)
    test_data  = CIFAR10(root='./data', train = False, download = first_run, transform = transform)

    train_size = int(train_val_split * len(train_data))
    val_size = len(train_data) - train_size


    train_data, val_data = random_split(train_data, [train_size, val_size])
    
    return train_data, val_data, test_data


def get_dataloader(batch_size, train_val_split = 0.9, num_workers = 1):
    
    train_set, val_set, test_set = get_dataset(train_val_split)
    trainloader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = num_workers, pin_memory = True)
    valloader = DataLoader(val_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)
    testloader = DataLoader(test_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)

    return trainloader, valloader, testloader

# k = 12, depth = 40

In [3]:
growth_rate = 12
depth = 40 
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [5]:
batch_size = 32
epochs = 70
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'dense_cifar10_k12_depth40')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.465715 | Validation Accuracy: 60.4% | Time elapsed: 37.65s
Epoch 2 | Training Loss (Avg): 0.963758 | Validation Accuracy: 69.84% | Time elapsed: 38.57s
Epoch 3 | Training Loss (Avg): 0.762401 | Validation Accuracy: 73.12% | Time elapsed: 40.29s
Epoch 4 | Training Loss (Avg): 0.649623 | Validation Accuracy: 77.26% | Time elapsed: 39.07s
Epoch 5 | Training Loss (Avg): 0.570356 | Validation Accuracy: 78.94% | Time elapsed: 40.10s
Epoch 6 | Training Loss (Avg): 0.510915 | Validation Accuracy: 82.04% | Time elapsed: 39.90s
Epoch 7 | Training Loss (Avg): 0.464523 | Validation Accuracy: 83.32% | Time elapsed: 39.43s
Epoch 8 | Training Loss (Avg): 0.427445 | Validation Accuracy: 83.34% | Time elapsed: 39.09s
Epoch 9 | Training Loss (Avg): 0.396111 | Validation Accuracy: 82.9% | Time elapsed: 39.90s
Epoch 10 | Training Loss (Avg): 0.369874 | Validation Accuracy: 83.62% | Time elapsed: 40.35s
Epoch 11 | Training Loss (Avg): 0.337547 | Validation Accuracy: 82.3% |

In [8]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 84.0%


# k = 24, depth = 100

In [9]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(96, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [10]:
batch_size = 32
epochs = 50
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar10_k24_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.618872 | Validation Accuracy: 56.6% | Time elapsed: 256.06s
Epoch 2 | Training Loss (Avg): 0.952254 | Validation Accuracy: 70.56% | Time elapsed: 256.43s
Epoch 3 | Training Loss (Avg): 0.662901 | Validation Accuracy: 80.0% | Time elapsed: 256.60s
Epoch 4 | Training Loss (Avg): 0.507362 | Validation Accuracy: 78.08% | Time elapsed: 256.35s
Epoch 5 | Training Loss (Avg): 0.409770 | Validation Accuracy: 83.72% | Time elapsed: 256.50s
Epoch 6 | Training Loss (Avg): 0.338115 | Validation Accuracy: 81.96% | Time elapsed: 256.32s
Epoch 7 | Training Loss (Avg): 0.279910 | Validation Accuracy: 84.24% | Time elapsed: 256.53s
Epoch 8 | Training Loss (Avg): 0.228562 | Validation Accuracy: 82.06% | Time elapsed: 256.57s
Epoch 9 | Training Loss (Avg): 0.187946 | Validation Accuracy: 85.74% | Time elapsed: 256.66s
Epoch 10 | Training Loss (Avg): 0.151118 | Validation Accuracy: 85.62% | Time elapsed: 256.35s
Epoch 11 | Training Loss (Avg): 0.129067 | Validation Accurac

In [12]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 88.0%


# k = 36, depth = 100

In [13]:
growth_rate = 36
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(144, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(108, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(108, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

In [14]:
batch_size = 32
epochs = 50
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar10_k36_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.620048 | Validation Accuracy: 52.68% | Time elapsed: 527.64s
Epoch 2 | Training Loss (Avg): 0.918516 | Validation Accuracy: 67.9% | Time elapsed: 528.33s
Epoch 3 | Training Loss (Avg): 0.640413 | Validation Accuracy: 79.42% | Time elapsed: 528.66s
Epoch 4 | Training Loss (Avg): 0.486956 | Validation Accuracy: 82.56% | Time elapsed: 528.55s
Epoch 5 | Training Loss (Avg): 0.391300 | Validation Accuracy: 83.32% | Time elapsed: 528.92s
Epoch 6 | Training Loss (Avg): 0.309893 | Validation Accuracy: 84.32% | Time elapsed: 528.40s
Epoch 7 | Training Loss (Avg): 0.255954 | Validation Accuracy: 84.24% | Time elapsed: 529.46s
Epoch 8 | Training Loss (Avg): 0.211529 | Validation Accuracy: 83.94% | Time elapsed: 529.20s
Epoch 9 | Training Loss (Avg): 0.164971 | Validation Accuracy: 85.64% | Time elapsed: 529.46s
Epoch 10 | Training Loss (Avg): 0.132925 | Validation Accuracy: 86.96% | Time elapsed: 529.31s
Epoch 11 | Training Loss (Avg): 0.100713 | Validation Accura

In [16]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 88.0%


# k = 64, depth = 100

In [4]:
growth_rate = 64
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(192, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [5]:
batch_size = 32
epochs = 27
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar10_k64_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.810048 | Validation Accuracy: 49.72% | Time elapsed: 887.00s
Epoch 2 | Training Loss (Avg): 1.158396 | Validation Accuracy: 63.9% | Time elapsed: 889.36s
Epoch 3 | Training Loss (Avg): 0.843338 | Validation Accuracy: 73.22% | Time elapsed: 889.45s
Epoch 4 | Training Loss (Avg): 0.629116 | Validation Accuracy: 75.08% | Time elapsed: 889.75s
Epoch 5 | Training Loss (Avg): 0.497948 | Validation Accuracy: 80.82% | Time elapsed: 890.42s
Epoch 6 | Training Loss (Avg): 0.392938 | Validation Accuracy: 81.86% | Time elapsed: 886.44s
Epoch 7 | Training Loss (Avg): 0.306916 | Validation Accuracy: 82.76% | Time elapsed: 892.48s
Epoch 8 | Training Loss (Avg): 0.239053 | Validation Accuracy: 83.1% | Time elapsed: 891.82s
Epoch 9 | Training Loss (Avg): 0.183678 | Validation Accuracy: 83.66% | Time elapsed: 891.68s
Epoch 10 | Training Loss (Avg): 0.146645 | Validation Accuracy: 85.08% | Time elapsed: 892.00s
Epoch 11 | Training Loss (Avg): 0.127527 | Validation Accurac

In [8]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 84.0%


# k = [16, 32, 64], depth = 100

In [10]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = [16, 32, 64]
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [11]:
batch_size = 32
epochs = 27
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [13]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar10_k163224_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.753006 | Validation Accuracy: 49.08% | Time elapsed: 251.40s
Epoch 2 | Training Loss (Avg): 1.117407 | Validation Accuracy: 65.04% | Time elapsed: 251.13s
Epoch 3 | Training Loss (Avg): 0.832991 | Validation Accuracy: 73.82% | Time elapsed: 251.20s
Epoch 4 | Training Loss (Avg): 0.663525 | Validation Accuracy: 76.86% | Time elapsed: 250.79s
Epoch 5 | Training Loss (Avg): 0.525604 | Validation Accuracy: 79.1% | Time elapsed: 251.13s
Epoch 6 | Training Loss (Avg): 0.425683 | Validation Accuracy: 80.48% | Time elapsed: 251.23s
Epoch 7 | Training Loss (Avg): 0.345004 | Validation Accuracy: 81.08% | Time elapsed: 251.15s
Epoch 8 | Training Loss (Avg): 0.277007 | Validation Accuracy: 82.8% | Time elapsed: 251.40s
Epoch 9 | Training Loss (Avg): 0.217158 | Validation Accuracy: 83.04% | Time elapsed: 250.97s
Epoch 10 | Training Loss (Avg): 0.174636 | Validation Accuracy: 84.78% | Time elapsed: 251.37s
Epoch 11 | Training Loss (Avg): 0.142559 | Validation Accurac

In [14]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 84.0%


# k = 24, depth = 100, without using bottleneck compression

In [3]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): single_

In [5]:
batch_size = 16
epochs = 20
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [8]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar10_k24_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.846598 | Validation Accuracy: 53.86% | Time elapsed: 1694.08s
Epoch 2 | Training Loss (Avg): 1.055905 | Validation Accuracy: 68.46% | Time elapsed: 1704.35s
Epoch 3 | Training Loss (Avg): 0.747201 | Validation Accuracy: 78.52% | Time elapsed: 1706.53s
Epoch 4 | Training Loss (Avg): 0.562587 | Validation Accuracy: 82.28% | Time elapsed: 1708.41s
Epoch 5 | Training Loss (Avg): 0.459111 | Validation Accuracy: 83.52% | Time elapsed: 1709.24s
Epoch 6 | Training Loss (Avg): 0.369140 | Validation Accuracy: 82.96% | Time elapsed: 1709.36s
Epoch 7 | Training Loss (Avg): 0.305803 | Validation Accuracy: 83.68% | Time elapsed: 1709.29s
Epoch 8 | Training Loss (Avg): 0.239381 | Validation Accuracy: 84.44% | Time elapsed: 1709.40s
Epoch 9 | Training Loss (Avg): 0.198957 | Validation Accuracy: 86.4% | Time elapsed: 1708.86s
Epoch 10 | Training Loss (Avg): 0.155588 | Validation Accuracy: 87.3% | Time elapsed: 1708.91s
Epoch 11 | Training Loss (Avg): 0.120280 | Validati

In [7]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 87.0%


# k = 36, depth = 100, without using bottleneck compression 

In [3]:
growth_rate = 36
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(108, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(108, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(144, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): sin

In [5]:
batch_size = 16
epochs = 10
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar10_k36_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.044905 | Validation Accuracy: 68.46% | Time elapsed: 3432.79s
Epoch 2 | Training Loss (Avg): 0.747201 | Validation Accuracy: 78.52% | Time elapsed: 3429.46s
Epoch 3 | Training Loss (Avg): 0.662687 | Validation Accuracy: 82.28% | Time elapsed: 3415.83s
Epoch 4 | Training Loss (Avg): 0.552311 | Validation Accuracy: 83.52% | Time elapsed: 3426.06s
Epoch 5 | Training Loss (Avg): 0.461940 | Validation Accuracy: 82.96% | Time elapsed: 3422.27s


In [10]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 83.0%
