In [5]:
import os
import math
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import random_split

from torchvision.datasets import CIFAR100

from model_architecture import *

# Dataset preparation

In [7]:
def get_dataset(train_val_split = 0.9):

    transform  = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    
    train_data = CIFAR100(root='./data', train = True, download = first_run, transform = transform)
    test_data  = CIFAR100(root='./data', train = False, download = first_run, transform = transform)

    train_size = int(train_val_split * len(train_data))
    val_size = len(train_data) - train_size


    train_data, val_data = random_split(train_data, [train_size, val_size])
    
    return train_data, val_data, test_data


def get_dataloader(batch_size, train_val_split = 0.9, num_workers = 1):
    
    train_set, val_set, test_set = get_dataset(train_val_split)
    trainloader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = num_workers, pin_memory = True)
    valloader = DataLoader(val_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)
    testloader = DataLoader(test_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)

    return trainloader, valloader, testloader

# k = 12, depth = 40

In [4]:
growth_rate = 12
depth = 40 
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [5]:
batch_size = 32
epochs = 70
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 4.100469 | Validation Accuracy: 10.2% | Time elapsed: 31.44s
Epoch 2 | Training Loss (Avg): 3.612524 | Validation Accuracy: 16.62% | Time elapsed: 32.95s
Epoch 3 | Training Loss (Avg): 3.224169 | Validation Accuracy: 22.64% | Time elapsed: 31.62s
Epoch 4 | Training Loss (Avg): 2.915133 | Validation Accuracy: 26.84% | Time elapsed: 32.00s
Epoch 5 | Training Loss (Avg): 2.707579 | Validation Accuracy: 30.86% | Time elapsed: 31.70s
Epoch 6 | Training Loss (Avg): 2.561381 | Validation Accuracy: 30.94% | Time elapsed: 32.12s
Epoch 7 | Training Loss (Avg): 2.436757 | Validation Accuracy: 33.44% | Time elapsed: 32.96s
Epoch 8 | Training Loss (Avg): 2.338046 | Validation Accuracy: 35.28% | Time elapsed: 31.97s
Epoch 9 | Training Loss (Avg): 2.251122 | Validation Accuracy: 35.16% | Time elapsed: 32.29s
Epoch 10 | Training Loss (Avg): 2.180600 | Validation Accuracy: 36.2% | Time elapsed: 31.96s
Epoch 11 | Training Loss (Avg): 2.112917 | Validation Accuracy: 38.84% 

In [7]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 47.0%


# k = 24, depth = 100

In [8]:
growth_rate = 24
depth = 100 
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(96, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [9]:
batch_size = 32
epochs = 70
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [10]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar100_k24_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.994691 | Validation Accuracy: 13.14% | Time elapsed: 134.52s
Epoch 2 | Training Loss (Avg): 3.341164 | Validation Accuracy: 23.92% | Time elapsed: 134.47s
Epoch 3 | Training Loss (Avg): 2.873394 | Validation Accuracy: 31.14% | Time elapsed: 134.25s
Epoch 4 | Training Loss (Avg): 2.481328 | Validation Accuracy: 38.14% | Time elapsed: 134.65s
Epoch 5 | Training Loss (Avg): 2.181234 | Validation Accuracy: 41.04% | Time elapsed: 134.25s
Epoch 6 | Training Loss (Avg): 1.950975 | Validation Accuracy: 43.7% | Time elapsed: 134.86s
Epoch 7 | Training Loss (Avg): 1.775425 | Validation Accuracy: 47.2% | Time elapsed: 134.91s
Epoch 8 | Training Loss (Avg): 1.626713 | Validation Accuracy: 50.06% | Time elapsed: 134.79s
Epoch 9 | Training Loss (Avg): 1.496485 | Validation Accuracy: 50.58% | Time elapsed: 135.04s
Epoch 10 | Training Loss (Avg): 1.379419 | Validation Accuracy: 52.54% | Time elapsed: 134.66s
Epoch 11 | Training Loss (Avg): 1.272651 | Validation Accurac

In [11]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 57.0%


# k = 36, depth = 100

In [12]:
growth_rate = 36
depth = 100 
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(36, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(144, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [13]:
batch_size = 32
epochs = 70
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [15]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar100_k36_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 4.003908 | Validation Accuracy: 14.44% | Time elapsed: 305.62s
Epoch 2 | Training Loss (Avg): 3.269527 | Validation Accuracy: 25.4% | Time elapsed: 307.17s
Epoch 3 | Training Loss (Avg): 2.661414 | Validation Accuracy: 36.22% | Time elapsed: 307.41s
Epoch 4 | Training Loss (Avg): 2.257063 | Validation Accuracy: 42.04% | Time elapsed: 307.16s
Epoch 5 | Training Loss (Avg): 1.972644 | Validation Accuracy: 46.42% | Time elapsed: 308.06s
Epoch 6 | Training Loss (Avg): 1.758060 | Validation Accuracy: 48.36% | Time elapsed: 306.49s
Epoch 7 | Training Loss (Avg): 1.578770 | Validation Accuracy: 49.8% | Time elapsed: 307.07s
Epoch 8 | Training Loss (Avg): 1.434549 | Validation Accuracy: 52.94% | Time elapsed: 307.60s
Epoch 9 | Training Loss (Avg): 1.289359 | Validation Accuracy: 54.12% | Time elapsed: 307.63s
Epoch 10 | Training Loss (Avg): 1.167271 | Validation Accuracy: 55.56% | Time elapsed: 307.38s
Epoch 11 | Training Loss (Avg): 1.057477 | Validation Accurac

In [16]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 59.0%


# k = 64, depth = 100

In [47]:
growth_rate = 64
depth = 100 
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

In [48]:
batch_size = 32
epochs = 150
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [50]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar100_k64_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.982876 | Validation Accuracy: 13.74% | Time elapsed: 413.35s
Epoch 2 | Training Loss (Avg): 3.284358 | Validation Accuracy: 25.0% | Time elapsed: 412.56s
Epoch 3 | Training Loss (Avg): 2.697620 | Validation Accuracy: 35.04% | Time elapsed: 412.65s
Epoch 4 | Training Loss (Avg): 2.229133 | Validation Accuracy: 44.32% | Time elapsed: 412.50s
Epoch 5 | Training Loss (Avg): 1.879175 | Validation Accuracy: 48.06% | Time elapsed: 412.70s
Epoch 6 | Training Loss (Avg): 1.620057 | Validation Accuracy: 51.6% | Time elapsed: 412.44s
Epoch 7 | Training Loss (Avg): 1.399805 | Validation Accuracy: 52.34% | Time elapsed: 412.37s
Epoch 8 | Training Loss (Avg): 1.220312 | Validation Accuracy: 54.76% | Time elapsed: 412.68s
Epoch 9 | Training Loss (Avg): 1.056393 | Validation Accuracy: 56.02% | Time elapsed: 413.10s
Epoch 10 | Training Loss (Avg): 0.916200 | Validation Accuracy: 55.94% | Time elapsed: 412.46s
Epoch 11 | Training Loss (Avg): 0.773830 | Validation Accurac

In [51]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 63.0%


# k = [16, 32, 64], depth = 100

In [54]:
growth_rate = 24
depth = 100 
reduction = 0.5
num_classes = 100
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = [16, 32, 64]
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(40, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [55]:
batch_size = 32
epochs = 150
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [57]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar100_k163264_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 4.045616 | Validation Accuracy: 12.78% | Time elapsed: 136.72s
Epoch 2 | Training Loss (Avg): 3.326883 | Validation Accuracy: 23.78% | Time elapsed: 137.63s
Epoch 3 | Training Loss (Avg): 2.686655 | Validation Accuracy: 37.98% | Time elapsed: 137.63s
Epoch 4 | Training Loss (Avg): 2.254428 | Validation Accuracy: 43.12% | Time elapsed: 137.67s
Epoch 5 | Training Loss (Avg): 1.960911 | Validation Accuracy: 45.56% | Time elapsed: 137.46s
Epoch 6 | Training Loss (Avg): 1.735680 | Validation Accuracy: 49.88% | Time elapsed: 137.52s
Epoch 7 | Training Loss (Avg): 1.542805 | Validation Accuracy: 51.58% | Time elapsed: 137.65s
Epoch 8 | Training Loss (Avg): 1.378015 | Validation Accuracy: 53.74% | Time elapsed: 137.62s
Epoch 9 | Training Loss (Avg): 1.217390 | Validation Accuracy: 53.06% | Time elapsed: 137.40s
Epoch 10 | Training Loss (Avg): 1.080144 | Validation Accuracy: 54.66% | Time elapsed: 137.46s
Epoch 11 | Training Loss (Avg): 0.952259 | Validation Accur

In [58]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 59.0%


# k = 24, depth = 100, without using bottleneck compression

In [59]:
growth_rate = 24
depth = 100 
reduction = 0.5
num_classes = 100
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): single_

In [60]:
batch_size = 32
epochs = 150
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar100_k24_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.916723 | Validation Accuracy: 17.64% | Time elapsed: 241.21s
Epoch 2 | Training Loss (Avg): 3.097579 | Validation Accuracy: 30.1% | Time elapsed: 242.23s
Epoch 3 | Training Loss (Avg): 2.480573 | Validation Accuracy: 39.02% | Time elapsed: 242.34s
Epoch 4 | Training Loss (Avg): 2.068043 | Validation Accuracy: 44.52% | Time elapsed: 242.08s
Epoch 5 | Training Loss (Avg): 1.780366 | Validation Accuracy: 50.14% | Time elapsed: 242.50s
Epoch 6 | Training Loss (Avg): 1.569195 | Validation Accuracy: 52.16% | Time elapsed: 242.39s
Epoch 7 | Training Loss (Avg): 1.394709 | Validation Accuracy: 54.28% | Time elapsed: 242.58s
Epoch 8 | Training Loss (Avg): 1.249550 | Validation Accuracy: 56.04% | Time elapsed: 242.40s
Epoch 9 | Training Loss (Avg): 1.123991 | Validation Accuracy: 56.74% | Time elapsed: 242.33s
Epoch 10 | Training Loss (Avg): 1.004181 | Validation Accuracy: 58.22% | Time elapsed: 242.57s
Epoch 11 | Training Loss (Avg): 0.893518 | Validation Accura

In [62]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 60.0%


# k = 36, depth = 100, without using bottleneck compression

In [8]:
growth_rate = 36
depth = 100 
reduction = 0.5
num_classes = 100
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): single_

In [10]:
batch_size = 32
epochs = 100
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [2]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar100_k36_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 3.871221 | Validation Accuracy: 18.78% | Time elapsed: 476.56s
Epoch 2 | Training Loss (Avg): 3.034380 | Validation Accuracy: 30.94% | Time elapsed: 486.61s
Epoch 3 | Training Loss (Avg): 2.357912 | Validation Accuracy: 42.78% | Time elapsed: 485.66s
Epoch 4 | Training Loss (Avg): 1.927071 | Validation Accuracy: 48.74% | Time elapsed: 485.82s
Epoch 5 | Training Loss (Avg): 1.632395 | Validation Accuracy: 54.88% | Time elapsed: 485.19s
Epoch 6 | Training Loss (Avg): 1.417483 | Validation Accuracy: 55.42% | Time elapsed: 485.96s
Epoch 7 | Training Loss (Avg): 1.238649 | Validation Accuracy: 60.12% | Time elapsed: 486.23s


In [1]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 61.0%
