In [1]:
import os
import math
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import random_split

from torchvision.datasets import CIFAR100

from model_architecture import *

# Dataset preparation

In [2]:
def get_dataset(train_val_split = 0.9):

    transform  = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    
    train_data = CIFAR100(root='./data', train = True, download = first_run, transform = transform)
    test_data  = CIFAR100(root='./data', train = False, download = first_run, transform = transform)

    train_size = int(train_val_split * len(train_data))
    val_size = len(train_data) - train_size


    train_data, val_data = random_split(train_data, [train_size, val_size])
    
    return train_data, val_data, test_data


def get_dataloader(batch_size, train_val_split = 0.9, num_workers = 1):
    
    train_set, val_set, test_set = get_dataset(train_val_split)
    trainloader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = num_workers, pin_memory = True)
    valloader = DataLoader(val_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)
    testloader = DataLoader(test_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)

    return trainloader, valloader, testloader

# k = 12, depth = 40

In [4]:
growth_rate = 12
depth = 40 
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [4]:
batch_size = 32
epochs = 30
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'dense_cifar100_k12_depth40')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.') 

Epoch 1 | Training Loss (Avg): 3.919220 | Validation Accuracy: 14.34% | Time elapsed: 26.84s
Epoch 2 | Training Loss (Avg): 3.341984 | Validation Accuracy: 24.98% | Time elapsed: 25.95s
Epoch 3 | Training Loss (Avg): 2.849911 | Validation Accuracy: 32.86% | Time elapsed: 27.16s
Epoch 4 | Training Loss (Avg): 2.487821 | Validation Accuracy: 34.92% | Time elapsed: 25.76s
Epoch 5 | Training Loss (Avg): 2.265102 | Validation Accuracy: 39.68% | Time elapsed: 26.20s
Epoch 6 | Training Loss (Avg): 2.123538 | Validation Accuracy: 42.2% | Time elapsed: 25.58s
Epoch 7 | Training Loss (Avg): 2.008504 | Validation Accuracy: 45.08% | Time elapsed: 26.60s
Epoch 8 | Training Loss (Avg): 1.910219 | Validation Accuracy: 47.04% | Time elapsed: 26.53s
Epoch 9 | Training Loss (Avg): 1.831103 | Validation Accuracy: 46.88% | Time elapsed: 24.95s
Epoch 10 | Training Loss (Avg): 1.765955 | Validation Accuracy: 47.5% | Time elapsed: 26.31s
Epoch 11 | Training Loss (Avg): 1.711262 | Validation Accuracy: 48.9% |

In [7]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 54.0%


# k = 24, depth = 100

In [8]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(96, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [9]:
batch_size = 32
epochs = 27
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [10]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar100_k24_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.896795 | Validation Accuracy: 16.98% | Time elapsed: 256.02s
Epoch 2 | Training Loss (Avg): 3.021787 | Validation Accuracy: 30.94% | Time elapsed: 256.33s
Epoch 3 | Training Loss (Avg): 2.339274 | Validation Accuracy: 41.18% | Time elapsed: 256.32s
Epoch 4 | Training Loss (Avg): 1.903393 | Validation Accuracy: 48.52% | Time elapsed: 256.79s
Epoch 5 | Training Loss (Avg): 1.599359 | Validation Accuracy: 51.82% | Time elapsed: 257.21s
Epoch 6 | Training Loss (Avg): 1.376624 | Validation Accuracy: 54.2% | Time elapsed: 256.62s
Epoch 7 | Training Loss (Avg): 1.197619 | Validation Accuracy: 57.52% | Time elapsed: 256.37s
Epoch 8 | Training Loss (Avg): 1.034492 | Validation Accuracy: 57.7% | Time elapsed: 256.73s
Epoch 9 | Training Loss (Avg): 0.897448 | Validation Accuracy: 58.4% | Time elapsed: 256.77s
Epoch 10 | Training Loss (Avg): 0.776310 | Validation Accuracy: 59.84% | Time elapsed: 256.67s
Epoch 11 | Training Loss (Avg): 0.667126 | Validation Accuracy

In [11]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 65.0%


# k = 36, depth = 100

In [12]:
growth_rate = 36
depth = 100
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(144, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(108, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(108, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

In [13]:
batch_size = 32
epochs = 20
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [14]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar100_k36_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.969072 | Validation Accuracy: 15.7% | Time elapsed: 523.87s
Epoch 2 | Training Loss (Avg): 3.106440 | Validation Accuracy: 30.66% | Time elapsed: 528.92s
Epoch 3 | Training Loss (Avg): 2.385965 | Validation Accuracy: 42.78% | Time elapsed: 528.70s
Epoch 4 | Training Loss (Avg): 1.901433 | Validation Accuracy: 48.38% | Time elapsed: 529.02s
Epoch 5 | Training Loss (Avg): 1.590301 | Validation Accuracy: 54.3% | Time elapsed: 528.54s
Epoch 6 | Training Loss (Avg): 1.346518 | Validation Accuracy: 55.32% | Time elapsed: 528.87s
Epoch 7 | Training Loss (Avg): 1.153374 | Validation Accuracy: 57.4% | Time elapsed: 529.69s
Epoch 8 | Training Loss (Avg): 0.985065 | Validation Accuracy: 60.38% | Time elapsed: 530.04s
Epoch 9 | Training Loss (Avg): 0.825549 | Validation Accuracy: 58.44% | Time elapsed: 529.42s
Epoch 10 | Training Loss (Avg): 0.690949 | Validation Accuracy: 60.58% | Time elapsed: 529.63s
Epoch 11 | Training Loss (Avg): 0.576682 | Validation Accuracy

In [15]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 64.0%


# k = 64, depth = 100

In [16]:
growth_rate = 64
depth = 100
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(192, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [17]:
batch_size = 32
epochs = 20
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [19]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar100_k64_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.991123 | Validation Accuracy: 14.56% | Time elapsed: 888.88s
Epoch 2 | Training Loss (Avg): 3.149380 | Validation Accuracy: 25.36% | Time elapsed: 890.74s
Epoch 3 | Training Loss (Avg): 2.491373 | Validation Accuracy: 39.56% | Time elapsed: 892.15s
Epoch 4 | Training Loss (Avg): 1.960120 | Validation Accuracy: 48.98% | Time elapsed: 892.43s
Epoch 5 | Training Loss (Avg): 1.618977 | Validation Accuracy: 53.62% | Time elapsed: 892.49s
Epoch 6 | Training Loss (Avg): 1.357318 | Validation Accuracy: 55.12% | Time elapsed: 893.28s
Epoch 7 | Training Loss (Avg): 1.141584 | Validation Accuracy: 58.74% | Time elapsed: 893.85s
Epoch 8 | Training Loss (Avg): 0.943368 | Validation Accuracy: 58.48% | Time elapsed: 894.44s
Epoch 9 | Training Loss (Avg): 0.778901 | Validation Accuracy: 61.22% | Time elapsed: 894.56s
Epoch 10 | Training Loss (Avg): 0.632362 | Validation Accuracy: 60.52% | Time elapsed: 893.84s
Epoch 11 | Training Loss (Avg): 0.480844 | Validation Accur

In [20]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 63.0%


# k = [16, 32, 64], depth = 100

In [21]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = [16, 32, 64]
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [22]:
batch_size = 32
epochs = 20
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [23]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar100_k163224_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.961706 | Validation Accuracy: 15.0% | Time elapsed: 250.78s
Epoch 2 | Training Loss (Avg): 3.162431 | Validation Accuracy: 28.3% | Time elapsed: 250.65s
Epoch 3 | Training Loss (Avg): 2.552028 | Validation Accuracy: 38.98% | Time elapsed: 251.04s
Epoch 4 | Training Loss (Avg): 2.050449 | Validation Accuracy: 42.72% | Time elapsed: 250.98s
Epoch 5 | Training Loss (Avg): 1.708086 | Validation Accuracy: 51.04% | Time elapsed: 251.45s
Epoch 6 | Training Loss (Avg): 1.437086 | Validation Accuracy: 53.62% | Time elapsed: 250.88s
Epoch 7 | Training Loss (Avg): 1.214127 | Validation Accuracy: 55.14% | Time elapsed: 251.01s
Epoch 8 | Training Loss (Avg): 1.021155 | Validation Accuracy: 57.08% | Time elapsed: 251.14s
Epoch 9 | Training Loss (Avg): 0.853671 | Validation Accuracy: 57.38% | Time elapsed: 251.18s
Epoch 10 | Training Loss (Avg): 0.689906 | Validation Accuracy: 59.04% | Time elapsed: 251.13s
Epoch 11 | Training Loss (Avg): 0.550240 | Validation Accurac

In [24]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 62.0%


# k = 24, depth = 100, without using bottleneck compression

In [3]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 100
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): single_

In [4]:
batch_size = 16
epochs = 20
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar100_k24_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 4.054365 | Validation Accuracy: 17.02% | Time elapsed: 1689.45s
Epoch 2 | Training Loss (Avg): 3.214349 | Validation Accuracy: 28.76% | Time elapsed: 1701.92s
Epoch 3 | Training Loss (Avg): 2.584100 | Validation Accuracy: 38.74% | Time elapsed: 1704.49s
Epoch 4 | Training Loss (Avg): 2.086738 | Validation Accuracy: 45.08% | Time elapsed: 1706.26s
Epoch 5 | Training Loss (Avg): 1.748886 | Validation Accuracy: 50.7% | Time elapsed: 1706.81s
Epoch 6 | Training Loss (Avg): 1.488756 | Validation Accuracy: 54.02% | Time elapsed: 1706.71s
Epoch 7 | Training Loss (Avg): 1.278005 | Validation Accuracy: 57.6% | Time elapsed: 1706.13s
Epoch 8 | Training Loss (Avg): 1.099473 | Validation Accuracy: 55.4% | Time elapsed: 1707.23s
Epoch 9 | Training Loss (Avg): 0.942715 | Validation Accuracy: 58.46% | Time elapsed: 1705.91s
Epoch 10 | Training Loss (Avg): 0.774427 | Validation Accuracy: 57.3% | Time elapsed: 1705.61s
Epoch 11 | Training Loss (Avg): 0.626260 | Validation

In [6]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 62.0%


# k = 36, depth = 100, without using bottleneck compression 

In [5]:
growth_rate = 36
depth = 100
reduction = 0.5
num_classes = 100
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "dense"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(108, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(108, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(144, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): sin

In [6]:
batch_size = 16
epochs = 20
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'densenet_cifar100_k36_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 4.232461 | Validation Accuracy: 16.26% | Time elapsed: 3422.27s
Epoch 2 | Training Loss (Avg): 3.371283 | Validation Accuracy: 25.2% | Time elapsed: 3426.06s
Epoch 3 | Training Loss (Avg): 2.899351 | Validation Accuracy: 32.86% | Time elapsed: 3415.83s
Epoch 4 | Training Loss (Avg): 2.449262 | Validation Accuracy: 40.34% | Time elapsed: 3429.46s
Epoch 5 | Training Loss (Avg): 2.076624 | Validation Accuracy: 45.1% | Time elapsed: 3432.79s
Epoch 6 | Training Loss (Avg): 1.773084 | Validation Accuracy: 49.66% | Time elapsed: 3432.98s
Epoch 7 | Training Loss (Avg): 1.523944 | Validation Accuracy: 52.12% | Time elapsed: 3433.56s


In [10]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 52.0%
