In [1]:
import os
import math
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import random_split

from torchvision.datasets import CIFAR10

from model_architecture import *

# Dataset preparation

In [2]:
def get_dataset(train_val_split = 0.9):

    transform  = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    
    train_data = CIFAR10(root='./data', train = True, download = first_run, transform = transform)
    test_data  = CIFAR10(root='./data', train = False, download = first_run, transform = transform)

    train_size = int(train_val_split * len(train_data))
    val_size = len(train_data) - train_size


    train_data, val_data = random_split(train_data, [train_size, val_size])
    
    return train_data, val_data, test_data


def get_dataloader(batch_size, train_val_split = 0.9, num_workers = 1):
    
    train_set, val_set, test_set = get_dataset(train_val_split)
    trainloader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = num_workers, pin_memory = True)
    valloader = DataLoader(val_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)
    testloader = DataLoader(test_set, batch_size = batch_size, shuffle = False, num_workers = num_workers, pin_memory = True)

    return trainloader, valloader, testloader

# k = 12, depth = 40

In [3]:
growth_rate = 12
depth = 40 
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [4]:
batch_size = 32
epochs = 70
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar10_k12_depth40')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.624018 | Validation Accuracy: 51.58% | Time elapsed: 34.66s
Epoch 2 | Training Loss (Avg): 1.134922 | Validation Accuracy: 65.3% | Time elapsed: 35.17s
Epoch 3 | Training Loss (Avg): 0.911614 | Validation Accuracy: 68.3% | Time elapsed: 35.93s
Epoch 4 | Training Loss (Avg): 0.784496 | Validation Accuracy: 72.04% | Time elapsed: 37.33s
Epoch 5 | Training Loss (Avg): 0.699037 | Validation Accuracy: 74.5% | Time elapsed: 35.78s
Epoch 6 | Training Loss (Avg): 0.646905 | Validation Accuracy: 75.08% | Time elapsed: 35.89s
Epoch 7 | Training Loss (Avg): 0.601672 | Validation Accuracy: 75.98% | Time elapsed: 34.31s
Epoch 8 | Training Loss (Avg): 0.558173 | Validation Accuracy: 78.18% | Time elapsed: 34.97s
Epoch 9 | Training Loss (Avg): 0.522528 | Validation Accuracy: 78.14% | Time elapsed: 33.93s
Epoch 10 | Training Loss (Avg): 0.500297 | Validation Accuracy: 79.7% | Time elapsed: 34.94s
Epoch 11 | Training Loss (Avg): 0.472686 | Validation Accuracy: 80.22% | 

In [6]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 83.0%


# k = 24, depth = 100

In [7]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(96, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [8]:
batch_size = 32
epochs = 70
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar10_k24_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.596949 | Validation Accuracy: 53.1% | Time elapsed: 135.34s
Epoch 2 | Training Loss (Avg): 1.054401 | Validation Accuracy: 66.44% | Time elapsed: 135.32s
Epoch 3 | Training Loss (Avg): 0.772946 | Validation Accuracy: 75.08% | Time elapsed: 135.19s
Epoch 4 | Training Loss (Avg): 0.625315 | Validation Accuracy: 78.38% | Time elapsed: 135.30s
Epoch 5 | Training Loss (Avg): 0.518316 | Validation Accuracy: 79.48% | Time elapsed: 135.13s
Epoch 6 | Training Loss (Avg): 0.451553 | Validation Accuracy: 80.62% | Time elapsed: 134.95s
Epoch 7 | Training Loss (Avg): 0.393580 | Validation Accuracy: 81.78% | Time elapsed: 135.21s
Epoch 8 | Training Loss (Avg): 0.343305 | Validation Accuracy: 83.02% | Time elapsed: 135.02s
Epoch 9 | Training Loss (Avg): 0.308969 | Validation Accuracy: 82.8% | Time elapsed: 135.58s
Epoch 10 | Training Loss (Avg): 0.266628 | Validation Accuracy: 83.4% | Time elapsed: 135.31s
Epoch 11 | Training Loss (Avg): 0.238872 | Validation Accuracy

In [10]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 87.0%


# k = 36, depth = 100

In [11]:
growth_rate = 36
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(36, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(144, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [12]:
batch_size = 32
epochs = 70
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [16]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar10_k36_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.583771 | Validation Accuracy: 54.54% | Time elapsed: 302.14s
Epoch 2 | Training Loss (Avg): 1.062657 | Validation Accuracy: 68.98% | Time elapsed: 307.43s
Epoch 3 | Training Loss (Avg): 0.784089 | Validation Accuracy: 77.04% | Time elapsed: 307.62s
Epoch 4 | Training Loss (Avg): 0.613397 | Validation Accuracy: 76.7% | Time elapsed: 308.09s
Epoch 5 | Training Loss (Avg): 0.505194 | Validation Accuracy: 82.82% | Time elapsed: 307.95s
Epoch 6 | Training Loss (Avg): 0.429017 | Validation Accuracy: 82.36% | Time elapsed: 308.10s
Epoch 7 | Training Loss (Avg): 0.365030 | Validation Accuracy: 83.28% | Time elapsed: 308.19s
Epoch 8 | Training Loss (Avg): 0.311701 | Validation Accuracy: 84.3% | Time elapsed: 308.05s
Epoch 9 | Training Loss (Avg): 0.267434 | Validation Accuracy: 84.8% | Time elapsed: 308.02s
Epoch 10 | Training Loss (Avg): 0.231147 | Validation Accuracy: 85.78% | Time elapsed: 306.89s
Epoch 11 | Training Loss (Avg): 0.197503 | Validation Accuracy

In [14]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 86.0%


# k = 64, depth = 100

In [17]:
growth_rate = 64
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

In [18]:
batch_size = 32
epochs = 27
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [30]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar10_k64_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.585720 | Validation Accuracy: 57.88% | Time elapsed: 411.98s
Epoch 2 | Training Loss (Avg): 0.992235 | Validation Accuracy: 68.76% | Time elapsed: 413.73s
Epoch 3 | Training Loss (Avg): 0.697471 | Validation Accuracy: 77.04% | Time elapsed: 413.81s
Epoch 4 | Training Loss (Avg): 0.538413 | Validation Accuracy: 83.5% | Time elapsed: 413.58s
Epoch 5 | Training Loss (Avg): 0.423749 | Validation Accuracy: 84.0% | Time elapsed: 412.89s
Epoch 6 | Training Loss (Avg): 0.344029 | Validation Accuracy: 84.86% | Time elapsed: 414.37s
Epoch 7 | Training Loss (Avg): 0.286461 | Validation Accuracy: 84.66% | Time elapsed: 413.56s
Epoch 8 | Training Loss (Avg): 0.231312 | Validation Accuracy: 85.36% | Time elapsed: 413.72s
Epoch 9 | Training Loss (Avg): 0.191685 | Validation Accuracy: 86.64% | Time elapsed: 413.72s
Epoch 10 | Training Loss (Avg): 0.155441 | Validation Accuracy: 85.82% | Time elapsed: 414.41s
Epoch 11 | Training Loss (Avg): 0.136688 | Validation Accurac

In [20]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 87.0%


# k = [16, 32, 64], depth = 100

In [21]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = True
layers_per_stage = None
growth_rate_per_stage = [16, 32, 64]
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): bottle_neck(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): bottle_neck(
        (batch_norm1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(40, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [22]:
batch_size = 32
epochs = 27
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [23]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar10_k163224_depth100')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.622380 | Validation Accuracy: 57.54% | Time elapsed: 137.88s
Epoch 2 | Training Loss (Avg): 1.039837 | Validation Accuracy: 69.8% | Time elapsed: 137.76s
Epoch 3 | Training Loss (Avg): 0.783207 | Validation Accuracy: 75.34% | Time elapsed: 137.80s
Epoch 4 | Training Loss (Avg): 0.622063 | Validation Accuracy: 77.54% | Time elapsed: 137.75s
Epoch 5 | Training Loss (Avg): 0.519649 | Validation Accuracy: 81.28% | Time elapsed: 137.73s
Epoch 6 | Training Loss (Avg): 0.437925 | Validation Accuracy: 81.96% | Time elapsed: 138.05s
Epoch 7 | Training Loss (Avg): 0.377451 | Validation Accuracy: 83.34% | Time elapsed: 137.76s
Epoch 8 | Training Loss (Avg): 0.319018 | Validation Accuracy: 83.24% | Time elapsed: 137.78s
Epoch 9 | Training Loss (Avg): 0.274065 | Validation Accuracy: 84.68% | Time elapsed: 137.99s
Epoch 10 | Training Loss (Avg): 0.231926 | Validation Accuracy: 84.36% | Time elapsed: 138.00s
Epoch 11 | Training Loss (Avg): 0.198381 | Validation Accura

In [24]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 87.0%


# k = 24, depth = 100, without using bottleneck compression

In [25]:
growth_rate = 24
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(48, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): single_

In [26]:
batch_size = 32
epochs = 27
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [28]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar10_k24_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.474023 | Validation Accuracy: 57.3% | Time elapsed: 239.03s
Epoch 2 | Training Loss (Avg): 0.854941 | Validation Accuracy: 74.7% | Time elapsed: 241.88s
Epoch 3 | Training Loss (Avg): 0.630884 | Validation Accuracy: 79.64% | Time elapsed: 241.93s
Epoch 4 | Training Loss (Avg): 0.510883 | Validation Accuracy: 81.88% | Time elapsed: 241.81s
Epoch 5 | Training Loss (Avg): 0.423833 | Validation Accuracy: 79.98% | Time elapsed: 242.44s
Epoch 6 | Training Loss (Avg): 0.354983 | Validation Accuracy: 83.94% | Time elapsed: 241.70s
Epoch 7 | Training Loss (Avg): 0.300270 | Validation Accuracy: 85.18% | Time elapsed: 241.91s
Epoch 8 | Training Loss (Avg): 0.255616 | Validation Accuracy: 85.26% | Time elapsed: 241.70s
Epoch 9 | Training Loss (Avg): 0.219493 | Validation Accuracy: 85.84% | Time elapsed: 241.82s
Epoch 10 | Training Loss (Avg): 0.184466 | Validation Accuracy: 85.8% | Time elapsed: 241.94s
Epoch 11 | Training Loss (Avg): 0.154972 | Validation Accuracy

In [29]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 87.0%


# k = 36, depth = 100, without using bottleneck compression 

In [3]:
growth_rate = 36
depth = 100
reduction = 0.5
num_classes = 10
bottle_neck_flag = False
layers_per_stage = None
growth_rate_per_stage = None
drop_prob = 0.0
fetch_type = "sparse"

torch.manual_seed(0)
np.random.seed(0)

net = Model(growth_rate = growth_rate, 
            depth = depth, 
            reduction = reduction,
            num_classes = num_classes,
            bottle_neck_flag = bottle_neck_flag,
            layers_per_stage = layers_per_stage,
            growth_rate_per_stage = growth_rate_per_stage, 
            drop_prob = drop_prob,
            fetch_type = fetch_type)

print(net)
total_params = sum([p.data.nelement() for p in net.parameters()])

print('')
print('Total trainable parameters in the network: %.4f' % (total_params / 1e6) + 'M')

Model(
  (features): Sequential(
    (conv0): Conv2d(3, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (dense-stage-0): dense_stage(
      (block-1): single_layer(
        (batch_norm1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-2): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-3): single_layer(
        (batch_norm1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(72, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (block-4): single_

In [4]:
batch_size = 32
epochs = 27
learning_rate = 0.1
momentum = 0.9

first_run = True 
use_cuda = torch.cuda.is_available()

train_val_split = 0.9

trainloader, valloader, testloader = get_dataloader(batch_size = batch_size, train_val_split = train_val_split)
dataloaders = {'train': trainloader, 'val' : valloader, 'test': testloader}

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
net = net.to('cuda' if use_cuda else 'cpu')

best_val_accuracy = -math.inf

for epoch in range(epochs):

    net.train()

    batches_in_pass = len(dataloaders['train'])
    
    #Training
    training_epoch_start_time = time.time()

    loss_total = 0.0
    epoch_loss = 0.0
    
    for idx, data in enumerate(dataloaders['train']):
        
        inputs, labels = data
        inputs = inputs.to('cuda' if use_cuda else 'cpu')
        labels = labels.to('cuda' if use_cuda else 'cpu')
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        loss_total += loss.item()
        
    #Validation   
    epoch_loss /= batches_in_pass

    net.eval()

    correct = 0.0
    total = 0.0
    for idx, data in enumerate(dataloaders['val']):

        inputs, labels = data
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.shape[0]
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    
    print(f'Epoch {epoch + 1} | Training Loss (Avg): {epoch_loss:.6f} | Validation Accuracy: {epoch_accuracy}% | Time elapsed: {time.time() - training_epoch_start_time:.2f}s')
    
    #Saving the model
    save_path = os.path.join(os.getcwd(), 'models', 'sparsenet_cifar10_k36_depth100_nobc')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    #torch.save(net.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pt'))
    
    if epoch_accuracy > best_val_accuracy:

        torch.save(net.state_dict(), os.path.join(save_path, 'best'+str(epochs)+'.pt'))
        best_val_accuracy = epoch_accuracy

print('Training Complete.')  

Epoch 1 | Training Loss (Avg): 1.482211 | Validation Accuracy: 63.5% | Time elapsed: 463.38s
Epoch 2 | Training Loss (Avg): 0.846644 | Validation Accuracy: 76.14% | Time elapsed: 471.92s
Epoch 3 | Training Loss (Avg): 0.618749 | Validation Accuracy: 80.78% | Time elapsed: 471.02s
Epoch 4 | Training Loss (Avg): 0.483096 | Validation Accuracy: 84.66% | Time elapsed: 471.35s
Epoch 5 | Training Loss (Avg): 0.395122 | Validation Accuracy: 85.68% | Time elapsed: 470.82s
Epoch 6 | Training Loss (Avg): 0.330204 | Validation Accuracy: 86.48% | Time elapsed: 470.84s
Epoch 7 | Training Loss (Avg): 0.272281 | Validation Accuracy: 88.04% | Time elapsed: 471.55s
Epoch 8 | Training Loss (Avg): 0.227267 | Validation Accuracy: 87.18% | Time elapsed: 471.61s
Epoch 9 | Training Loss (Avg): 0.192610 | Validation Accuracy: 87.98% | Time elapsed: 471.30s
Epoch 10 | Training Loss (Avg): 0.157642 | Validation Accuracy: 87.82% | Time elapsed: 470.99s
Epoch 11 | Training Loss (Avg): 0.136675 | Validation Accura

In [6]:
#Testing
try:
    assert os.path.exists(os.path.join(save_path, 'best'+str(epochs)+'.pt'))

    net.load_state_dict(torch.load(os.path.join(save_path, 'best'+str(epochs)+'.pt')))
    net.eval()

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for idx, data in enumerate(dataloaders['test']):

            inputs, labels = data
            inputs = inputs.to('cuda' if use_cuda else 'cpu')
            labels = labels.to('cuda' if use_cuda else 'cpu')

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.shape[0]
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on test images: {100 * correct // total}%')
    
except:
    print('Please train the model before testing.')

Accuracy of the model on test images: 86.0%
