In [3]:
# Import packages
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms

In [4]:
# Create an OTO instance
import sys
sys.path.append('..')
from sanity_check.backends.qnn import QMLP
from only_train_once import OTO
    
model = QMLP(3*32*32, 64, 32, 10) # Instantiate the model
dummy_input = torch.rand(1, 3, 32, 32)
oto = OTO(model=model.cuda(), dummy_input=dummy_input.cuda())

OTO graph constructor
graph build


  weight_clip_val = torch.tensor([-200.0, 200.0])
  act_clip_val = torch.tensor([-200.0, 200.0])


In [5]:
# Prepare CIFAR-10 dataset
trainset = CIFAR10(root='cifar10', train="True", download=True, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=1)
testset = CIFAR10(root='cifar10', train="False", download=True, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=1)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
# Plot the independency graph (optional)
oto.visualize(view=False, out_dir='./cache')

In [6]:
# Set up the Hesso optimizer
optimizer = oto.hesso(
    variant='sgd', 
    lr=0.1, 
    weight_decay=1e-4,
    target_group_sparsity=0.5,
    start_pruning_step=10 * len(trainloader), 
    pruning_periods=10,
    pruning_steps=10 * len(trainloader)
)

Setup HESSO
Target redundant groups per period:  [4, 4, 4, 4, 4, 4, 4, 4, 4, 12]


In [7]:
from utils.utils import check_accuracy

max_epoch = 100
model.cuda()
criterion = torch.nn.CrossEntropyLoss()

# Every 50 epochs, decay lr by 10.0
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1) 

for epoch in range(max_epoch):
    f_avg_val = 0.0
    lr_scheduler.step()
    for X, y in trainloader:
        X = X.cuda()
        y = y.cuda()
        y_pred = model.forward(X)
        f = criterion(y_pred, y)
        optimizer.zero_grad()
        f.backward()
        f_avg_val += f
        optimizer.step()
    opt_metrics = optimizer.compute_metrics()
    
    accuracy1, accuracy5 = check_accuracy(model, testloader)
    # accuracy1, accuracy5 = check_accuracy(model, trainloader)
    f_avg_val = f_avg_val.cpu().item() / len(trainloader)
    
    print("Ep: {ep}, loss: {f:.2f}, norm_all:{param_norm:.2f}, grp_sparsity: {gs:.2f}, acc1: {acc1:.4f}, norm_import: {norm_import:.2f}, norm_redund: {norm_redund:.2f}, num_grp_import: {num_grps_import}, num_grp_redund: {num_grps_redund}"\
         .format(ep=epoch, f=f_avg_val, param_norm=opt_metrics.norm_params, gs=opt_metrics.group_sparsity, acc1=accuracy1,\
         norm_import=opt_metrics.norm_important_groups, norm_redund=opt_metrics.norm_redundant_groups, \
         num_grps_import=opt_metrics.num_important_groups, num_grps_redund=opt_metrics.num_redundant_groups
        ))



Ep: 0, loss: 1.96, norm_all:70.26, grp_sparsity: 0.00, acc1: 0.3179, norm_import: 70.26, norm_redund: 0.00, num_grp_import: 96, num_grp_redund: 0
Ep: 1, loss: 1.77, norm_all:81.14, grp_sparsity: 0.00, acc1: 0.3523, norm_import: 81.14, norm_redund: 0.00, num_grp_import: 96, num_grp_redund: 0
Ep: 2, loss: 1.70, norm_all:90.92, grp_sparsity: 0.00, acc1: 0.3343, norm_import: 90.92, norm_redund: 0.00, num_grp_import: 96, num_grp_redund: 0
Ep: 3, loss: 1.65, norm_all:99.66, grp_sparsity: 0.00, acc1: 0.3892, norm_import: 99.66, norm_redund: 0.00, num_grp_import: 96, num_grp_redund: 0
Ep: 4, loss: 1.61, norm_all:107.20, grp_sparsity: 0.00, acc1: 0.3937, norm_import: 107.20, norm_redund: 0.00, num_grp_import: 96, num_grp_redund: 0
Ep: 5, loss: 1.58, norm_all:114.24, grp_sparsity: 0.00, acc1: 0.3852, norm_import: 114.24, norm_redund: 0.00, num_grp_import: 96, num_grp_redund: 0
Ep: 6, loss: 1.55, norm_all:120.62, grp_sparsity: 0.00, acc1: 0.3990, norm_import: 120.62, norm_redund: 0.00, num_grp_im

In [8]:
# Get compressed model in torch format
oto.construct_subnet(out_dir='./cache')

In [11]:
import os

# Compare the full model size and compressed model size
full_model_size = os.stat(oto.full_group_sparse_model_path)
compressed_model_size = os.stat(oto.compressed_model_path)
print("Size of full model     : ", full_model_size.st_size / (1024 ** 3), "GBs")
print("Size of compress model : ", compressed_model_size.st_size / (1024 ** 3), "GBs")

# Both full and compressed model should return the exact same accuracy.
full_model = torch.load(oto.full_group_sparse_model_path)
compressed_model = torch.load(oto.compressed_model_path)

acc1_full, acc5_full = check_accuracy(full_model, testloader)
print("Full model: Acc 1: {acc1}, Acc 5: {acc5}".format(acc1=acc1_full, acc5=acc5_full))

acc1_compressed, acc5_compressed = check_accuracy(compressed_model, testloader)
print("Compressed model: Acc 1: {acc1}, Acc 5: {acc5}".format(acc1=acc1_compressed, acc5=acc5_compressed))

Size of full model     :  0.0007456224411725998 GBs
Size of compress model :  0.00023599714040756226 GBs


Full model: Acc 1: 0.55392, Acc 5: 0.94594
Compressed model: Acc 1: 0.55392, Acc 5: 0.94594


In [10]:
# Add model compression block
# how to store the weight?
# how to store the activation value?
# A useful github link: https://github.com/eladhoffer/quantized.pytorch/blob/master/models/resnet_quantized.py
for n,p in full_model.state_dict().items():
    print(n)
    print("\n")
    print(len(p))
    print("\n")
# def quantize_model(model):
#     qparams = {}

#     for n, p in model.state_dict().items():
#         qp = quantize_tensor(p)
#         qparams[n + '.quantization.scale'] = torch.FloatTensor([qp.scale])
#         qparams[
#             n + '.quantization.zero_point'] = torch.ByteTensor([qp.zero_point])
#         p.copy_(qp.tensor)
#     model.type('torch.ByteTensor')
#     for n, p in qparams.items():
#         model.register_buffer(n, p)
#     model.quantized = True


# def dequantize_model(model):
#     model.float()
#     params = model.state_dict()
#     for n, p in params.items():
#         if 'quantization' not in n:
#             qp = QTensor(tensor=p,
#                          scale=params[n + '.quantization.scale'][0],
#                          zero_point=params[n + '.quantization.zero_point'][0])
#             p.copy_(dequantize_tensor(qp))
#             model.register_buffer(n + '.quantization.scale', None)
#             model.register_buffer(n + '.quantization.zero_point', None)
#     model.quantized = None

# comments
# (1) We have both activation and weights.
# (2) Do we need to and how can we quantize weight. I checked one of the Github file. 
#     Not able to see the part that incorporates mdoel quantization.

linear1.weight


64


linear1.bias


64


linear2.weight


32


linear2.bias


32


linear3.weight


10


linear3.bias


10


