In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from utils import get_network, get_training_dataloader, get_test_dataloader, WarmUpLR, \
    most_recent_folder, most_recent_weights, last_epoch, best_acc_weights
from conf import settings
from torchinfo import summary
from models.vgg import vgg16_bn

In [37]:
teacher_net = vgg16_bn()
summary(teacher_net, input_size=(1,3,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 100]                  --
├─Sequential: 1-1                        [1, 512, 1, 1]            --
│    └─Conv2d: 2-1                       [1, 64, 32, 32]           1,792
│    └─BatchNorm2d: 2-2                  [1, 64, 32, 32]           128
│    └─ReLU: 2-3                         [1, 64, 32, 32]           --
│    └─Conv2d: 2-4                       [1, 64, 32, 32]           36,928
│    └─BatchNorm2d: 2-5                  [1, 64, 32, 32]           128
│    └─ReLU: 2-6                         [1, 64, 32, 32]           --
│    └─MaxPool2d: 2-7                    [1, 64, 16, 16]           --
│    └─Conv2d: 2-8                       [1, 128, 16, 16]          73,856
│    └─BatchNorm2d: 2-9                  [1, 128, 16, 16]          256
│    └─ReLU: 2-10                        [1, 128, 16, 16]          --
│    └─Conv2d: 2-11                      [1, 128, 16, 16]          147,

In [3]:
state_dict = torch.load('checkpoint/vgg16/Thursday_27_July_2023_16h_07m_07s/vgg16-200-regular.pth', map_location="cpu")
teacher_net.load_state_dict(state_dict)

<All keys matched successfully>

In [30]:
import copy
model = copy.deepcopy(teacher_net)

In [44]:
model_int8 = torch.quantization.quantize_dynamic(
    model,  # the original model
    {torch.nn.Linear, torch.nn.Sequential},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights
model_int8.to('cpu')

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [45]:
summary(model_int8, input_size=(1,3,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 100]                  --
├─Sequential: 1-1                        [1, 512, 1, 1]            --
│    └─Conv2d: 2-1                       [1, 64, 32, 32]           1,792
│    └─BatchNorm2d: 2-2                  [1, 64, 32, 32]           128
│    └─ReLU: 2-3                         [1, 64, 32, 32]           --
│    └─Conv2d: 2-4                       [1, 64, 32, 32]           36,928
│    └─BatchNorm2d: 2-5                  [1, 64, 32, 32]           128
│    └─ReLU: 2-6                         [1, 64, 32, 32]           --
│    └─MaxPool2d: 2-7                    [1, 64, 16, 16]           --
│    └─Conv2d: 2-8                       [1, 128, 16, 16]          73,856
│    └─BatchNorm2d: 2-9                  [1, 128, 16, 16]          256
│    └─ReLU: 2-10                        [1, 128, 16, 16]          --
│    └─Conv2d: 2-11                      [1, 128, 16, 16]          147,

In [20]:
cifar100_test_loader = get_test_dataloader(
        settings.CIFAR100_TRAIN_MEAN,
        settings.CIFAR100_TRAIN_STD,
        #settings.CIFAR100_PATH,
        num_workers=4,
        batch_size=512,
    )

Files already downloaded and verified


In [46]:
def test_model(model):
    model.eval()

    correct_1 = 0.0
    correct_5 = 0.0
    total = 0
    acc = 0.0
    with torch.no_grad():
        for n_iter, (image, label) in enumerate(cifar100_test_loader):
            print("iteration: {}\ttotal {} iterations".format(n_iter + 1, len(cifar100_test_loader)))

    #         if args.gpu:
    #             image = image.cuda()
    #             label = label.cuda()
    #             print('GPU INFO.....')
    #             print(torch.cuda.memory_summary(), end='')


            output = model(image)
            _, pred = output.topk(5, 1, largest=True, sorted=True)

            label = label.view(label.size(0), -1).expand_as(pred)
            correct = pred.eq(label).float()
            acc += correct.sum()

            #compute top 5
            correct_5 += correct[:, :5].sum()

            #compute top1
            correct_1 += correct[:, :1].sum()

    # if args.gpu:
    #     print('GPU INFO.....')
    #     print(torch.cuda.memory_summary(), end='')

    print()
    print("Top 1 err: ", 1 - correct_1 / len(cifar100_test_loader.dataset))
    print("Top 5 err: ", 1 - correct_5 / len(cifar100_test_loader.dataset))
    print(f"acc: {acc.float() / len(cifar100_test_loader.dataset)}")
    print("Parameter numbers: {}".format(sum(p.numel() for p in model.parameters())))
test_model(model_int8)

iteration: 1	total 20 iterations
iteration: 2	total 20 iterations
iteration: 3	total 20 iterations
iteration: 4	total 20 iterations
iteration: 5	total 20 iterations
iteration: 6	total 20 iterations
iteration: 7	total 20 iterations
iteration: 8	total 20 iterations
iteration: 9	total 20 iterations
iteration: 10	total 20 iterations
iteration: 11	total 20 iterations
iteration: 12	total 20 iterations
iteration: 13	total 20 iterations
iteration: 14	total 20 iterations
iteration: 15	total 20 iterations
iteration: 16	total 20 iterations
iteration: 17	total 20 iterations
iteration: 18	total 20 iterations
iteration: 19	total 20 iterations
iteration: 20	total 20 iterations

Top 1 err:  tensor(0.2889)
Top 5 err:  tensor(0.1055)
acc: 0.8945000171661377
Parameter numbers: 14723136


In [47]:
torch.save(model_int8.state_dict(), 'model_quantization_int8.pth')

In [108]:
model_prun = copy.deepcopy(teacher_net)

In [74]:

print(list(model_prun.named_parameters()))


[('features.0.weight', Parameter containing:
tensor([[[[-2.6433e-02, -1.2256e-01, -1.2273e-01],
          [ 1.8811e-02, -2.7819e-02, -4.3118e-02],
          [ 7.0668e-02,  3.6981e-02,  8.7311e-03]],

         [[ 5.0954e-02, -2.9497e-02, -6.5876e-02],
          [ 7.4564e-02,  4.7684e-02,  2.5526e-03],
          [ 8.1572e-02,  6.4871e-02,  2.2913e-02]],

         [[-4.0150e-03, -7.0945e-02, -8.1919e-02],
          [ 1.6442e-02, -1.3125e-02, -2.9398e-02],
          [ 5.0249e-02,  2.2059e-02, -5.9333e-03]]],


        [[[-2.4959e-01,  5.8195e-02, -6.2090e-02],
          [-3.2309e-01,  2.1535e-01,  1.1972e-01],
          [-3.0146e-02,  1.5813e-01,  8.7733e-02]],

         [[-8.7224e-02,  2.2175e-01,  3.9580e-03],
          [-2.4814e-01,  2.1691e-01,  2.6573e-02],
          [-1.0339e-01,  2.1785e-03, -4.9226e-02]],

         [[ 2.6373e-02,  1.4328e-01, -3.4329e-02],
          [-3.6949e-02,  1.1901e-01, -5.4572e-02],
          [ 3.0393e-02, -4.3588e-02, -1.3299e-01]]],


        [[[-9.4520e-0

In [70]:
state_dict = torch.load('model_quantization_int8.pth')
print(state_dict.keys())

odict_keys(['features.0.weight', 'features.0.bias', 'features.1.weight', 'features.1.bias', 'features.1.running_mean', 'features.1.running_var', 'features.1.num_batches_tracked', 'features.3.weight', 'features.3.bias', 'features.4.weight', 'features.4.bias', 'features.4.running_mean', 'features.4.running_var', 'features.4.num_batches_tracked', 'features.7.weight', 'features.7.bias', 'features.8.weight', 'features.8.bias', 'features.8.running_mean', 'features.8.running_var', 'features.8.num_batches_tracked', 'features.10.weight', 'features.10.bias', 'features.11.weight', 'features.11.bias', 'features.11.running_mean', 'features.11.running_var', 'features.11.num_batches_tracked', 'features.14.weight', 'features.14.bias', 'features.15.weight', 'features.15.bias', 'features.15.running_mean', 'features.15.running_var', 'features.15.num_batches_tracked', 'features.17.weight', 'features.17.bias', 'features.18.weight', 'features.18.bias', 'features.18.running_mean', 'features.18.running_var', 

  device=storage.device,


In [91]:
print(list(state_dict.keys())[0])

features.0.weight


In [101]:
import torch.nn.utils.prune as prune


In [109]:
parameters_to_prune=[]
list_layer = [20,21,24,25,28,31]

In [110]:
for i in list_layer:
    parameters_to_prune.append((model_prun.features[i],"weight"))
    parameters_to_prune.append((model_prun.features[i],"bias"))

In [111]:
parameters_to_prune = tuple(parameters_to_prune)

In [112]:
prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.RandomUnstructured,
        amount=0.5,
    )

In [None]:
prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=0.5,
    )

In [106]:
summary(model_prun, input_size=(1,3,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 100]                  --
├─Sequential: 1-1                        [1, 512, 1, 1]            --
│    └─Conv2d: 2-1                       [1, 64, 32, 32]           1,608
│    └─BatchNorm2d: 2-2                  [1, 64, 32, 32]           119
│    └─ReLU: 2-3                         [1, 64, 32, 32]           --
│    └─Conv2d: 2-4                       [1, 64, 32, 32]           33,319
│    └─BatchNorm2d: 2-5                  [1, 64, 32, 32]           113
│    └─ReLU: 2-6                         [1, 64, 32, 32]           --
│    └─MaxPool2d: 2-7                    [1, 64, 16, 16]           --
│    └─Conv2d: 2-8                       [1, 128, 16, 16]          66,465
│    └─BatchNorm2d: 2-9                  [1, 128, 16, 16]          232
│    └─ReLU: 2-10                        [1, 128, 16, 16]          --
│    └─Conv2d: 2-11                      [1, 128, 16, 16]          132,

In [114]:
def test_model(model):
    model.eval()

    correct_1 = 0.0
    correct_5 = 0.0
    total = 0
    acc = 0.0
    with torch.no_grad():
        for n_iter, (image, label) in enumerate(cifar100_test_loader):
            print("iteration: {}\ttotal {} iterations".format(n_iter + 1, len(cifar100_test_loader)))

    #         if args.gpu:
    #             image = image.cuda()
    #             label = label.cuda()
    #             print('GPU INFO.....')
    #             print(torch.cuda.memory_summary(), end='')


            output = model(image)
            _, pred = output.topk(5, 1, largest=True, sorted=True)

            label = label.view(label.size(0), -1).expand_as(pred)
            correct = pred.eq(label).float()
            acc += correct.sum()

            #compute top 5
            correct_5 += correct[:, :5].sum()

            #compute top1
            correct_1 += correct[:, :1].sum()

    # if args.gpu:
    #     print('GPU INFO.....')
    #     print(torch.cuda.memory_summary(), end='')

    print()
    print("Top 1 err: ", 1 - correct_1 / len(cifar100_test_loader.dataset))
    print("Top 5 err: ", 1 - correct_5 / len(cifar100_test_loader.dataset))
    print(f"acc: {acc.float() / len(cifar100_test_loader.dataset)}")
    print("Parameter numbers: {}".format(sum(p.numel() for p in model.parameters())))
test_model(model_int8)

iteration: 1	total 20 iterations
iteration: 2	total 20 iterations
iteration: 3	total 20 iterations
iteration: 4	total 20 iterations
iteration: 5	total 20 iterations
iteration: 6	total 20 iterations
iteration: 7	total 20 iterations
iteration: 8	total 20 iterations
iteration: 9	total 20 iterations
iteration: 10	total 20 iterations
iteration: 11	total 20 iterations
iteration: 12	total 20 iterations
iteration: 13	total 20 iterations
iteration: 14	total 20 iterations
iteration: 15	total 20 iterations
iteration: 16	total 20 iterations
iteration: 17	total 20 iterations
iteration: 18	total 20 iterations
iteration: 19	total 20 iterations
iteration: 20	total 20 iterations

Top 1 err:  tensor(0.2886)
Top 5 err:  tensor(0.1055)
acc: 0.8945000171661377
Parameter numbers: 14723136


In [113]:
test_model(model_prun)

iteration: 1	total 20 iterations
iteration: 2	total 20 iterations
iteration: 3	total 20 iterations
iteration: 4	total 20 iterations
iteration: 5	total 20 iterations
iteration: 6	total 20 iterations
iteration: 7	total 20 iterations
iteration: 8	total 20 iterations
iteration: 9	total 20 iterations
iteration: 10	total 20 iterations
iteration: 11	total 20 iterations
iteration: 12	total 20 iterations
iteration: 13	total 20 iterations
iteration: 14	total 20 iterations
iteration: 15	total 20 iterations
iteration: 16	total 20 iterations
iteration: 17	total 20 iterations
iteration: 18	total 20 iterations
iteration: 19	total 20 iterations
iteration: 20	total 20 iterations

Top 1 err:  tensor(0.9900)
Top 5 err:  tensor(0.9500)
acc: 0.05000000074505806
Parameter numbers: 34015396
