In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import sys
import os
sys.path.append('../')

import distiller
from distiller.data_loggers import collector_context

os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
num_epoch = 1
batch_size = 4
model_type = 3

model_table = {1: 'torch_scratch',      # not working
        2: 'torchvision_pretrained',    # not working
        3: 'distiller_not_parallel',
        4: 'distiller_parallel'         # not working
        }



In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
#####################################
# Helper functions
#####################################
def save_model_structure(path, filename, model):
    with open(path+filename, 'w') as text_file:
        text_file.write(str(model))
    return

def generate_model_info(model, model_name):
    print('- %s, type:%s' %(model_name, type(model)))

    path_structure = './model_structure/'
    save_model_structure(path_structure, model_name+'.txt', model)

    return

In [5]:
########################################################################
# 1. Load and normalizing the CIFAR10 training and test datasets using
# torchvision
########################################################################

import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


path_c10 = '../../data.cifar10/'
trainset = torchvision.datasets.CIFAR10(root=path_c10, train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root=path_c10, train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

import matplotlib.pyplot as plt
import numpy as np

# functions to show an image

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

Files already downloaded and verified
Files already downloaded and verified
 ship   car   dog  bird


In [6]:
########################################################################
# 2. Define a network
########################################################################

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

from distiller.models import create_model
import distiller.quantization as quant
from copy import deepcopy

if model_table[model_type] == 'torch_scratch':
    model = Net()
elif model_table[model_type] == 'distiller_not_parallel':
    model= create_model(pretrained=True, dataset='imagenet', arch='resnet18', parallel=False)
elif model_table[model_type] == 'distiller_not_parallel':
    model= create_model(pretrained=True, dataset='imagenet', arch='resnet18', parallel=True)
    
generate_model_info(model, model_table[model_type])

model.to(device)

- distiller_not_parallel, type:<class 'torchvision.models.resnet.ResNet'>


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): DistillerBasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (add): EltwiseAdd()
      (relu2): ReLU(inplace=True)
    )
    (1): DistillerBasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=

In [7]:
########################################################################
# 3. Define a loss function and optimizer
########################################################################

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


In [8]:
########################################################################
# 4. Train the network
########################################################################

import time
start = time.time()

for epoch in range(num_epoch):  # loop over the dataset multiple times

    running_loss = 0.0
    model.train()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            end = time.time()
            print('[%d, %5d] loss: %.3f, time:%.1f' %
                  (epoch + 1, i + 1, running_loss / 2000, end-start))
            running_loss = 0.0
            start = time.time()

    val_loss = 0.0
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            val_loss += loss

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss /= len(testloader)

        print('\t val_loss: %.3f, acc:%.2f%%' %
                (val_loss, 100. * correct / total))


print('Finished Training')



[1,  2000] loss: 2.902, time:29.8
[1,  4000] loss: 2.656, time:29.7
[1,  6000] loss: 2.592, time:30.5
[1,  8000] loss: 2.506, time:29.7
[1, 10000] loss: 2.520, time:30.8
[1, 12000] loss: 2.450, time:30.2
	 val_loss: 2.388, acc:22.35%
Finished Training


In [None]:
########################################################################
# 5. generate stats
########################################################################
path_yaml = './stat_yaml/'
stat_filename = 'acts_quantization_stats.yaml'

# CHECK: /examples/word_language_model/quantize_lstm.ipynb
def evaluate(model):
    val_loss = 0.0
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            val_loss += loss

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss /= len(testloader)

    eval_acc = 100 * correct / total

    return val_loss, eval_acc

eval_loss, eval_acc = evaluate(model)
print('[eval] loss:%.3f, acc:%.2f%%' % (eval_loss, eval_acc))

def test_fn(model):
    return evaluate(model)[0]

from distiller.data_loggers import collect_quant_stats
collect_quant_stats(model, test_fn, save_dir=path_yaml)



[eval] loss:2.388, acc:22.35%


In [None]:
#####################################
# Quantization
#####################################

quant_mode = {'activations': 'ASYMMETRIC_UNSIGNED', 'weights': 'SYMMETRIC'}
stats_file = path_yaml + stat_filename
#stats_file = "../quantization/post_train_quant/stats/resnet18_quant_stats.yaml"
#dummy_input = distiller.get_dummy_input(dataset='cifar10')
dummy_input = distiller.get_dummy_input(input_shape=model.input_shape)

quantizer = quant.PostTrainLinearQuantizer(
    deepcopy(model), bits_activations=8, bits_parameters=8, mode=quant_mode,
    model_activation_stats=stats_file, overrides=None
)
quantizer.prepare_model(dummy_input)

pyt_model = quantizer.convert_to_pytorch(dummy_input)
generate_model_info(pyt_model, 'after_quant')

print('Distiller model device:', distiller.model_device(quantizer.model))
print('PyTorch model device:', distiller.model_device(pyt_model))

print(pyt_model.layer1[0].conv1.weight().int_repr().data[0, 0, :, :])
print(pyt_model.layer1[0].conv1.weight().dequantize().data[0, 0, :, :])


print('DISTILLER1:\n{}\n'.format(quantizer.model.conv1))
#print('DISTILLER2:\n{}\n'.format(quantizer.model.module.conv1))
print('PyTorch:\n{}\n'.format(pyt_model.conv1))

print('layer1.0.conv1')
print(pyt_model.layer1[0].conv1)
print('\nlayer1.0.add')
print(pyt_model.layer1[0].add)


In [None]:
# We use Distiller's built-in data loading functionality for ImageNet

distiller.set_seed(0)

subset_size = 1.0 # To save time, can set to value < 1.0
dataset = 'cifar10'
dataset_path = os.path.expanduser('../../data.cifar10')

batch_size_gpu = 256
num_workers_gpu = 10
_, _, test_loader_gpu, _ = distiller.apputils.load_data(
    dataset, dataset_path, batch_size_gpu, num_workers_gpu,
    effective_test_size=subset_size, fixed_subset=True, test_only=True)

distiller.set_seed(0)
batch_size_cpu = 44
num_workers_cpu = 10
_, _, test_loader_cpu, _ = distiller.apputils.load_data(
    dataset, dataset_path, batch_size_cpu, num_workers_cpu,
    effective_test_size=subset_size, fixed_subset=True, test_only=True)

In [None]:
import torchnet as tnt

def eval_model(data_loader, model, device, print_freq=10):
    print('Evaluating model')
    criterion = torch.nn.CrossEntropyLoss().to(device)

    loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True, topk=(1, 5))

    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = math.ceil(total_samples / batch_size)
    print('{0} samples ({1} per mini-batch)'.format(total_samples, batch_size))

    # Switch to evaluation mode
    model.eval()

    for step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(device), target.to(device)
            # compute output from model
            output = model(inputs)

            # compute loss and measure accuracy
            loss.add(criterion(output, target).item())
            classerr.add(output.data, target)

            if (step + 1) % print_freq == 0:
                print('[{:3d}/{:3d}] Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}'.format(
                      step + 1, total_steps, classerr.value(1), classerr.value(5), loss.mean), flush=True)
    print('----------')
    print('Overall ==> Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}'.format(
        classerr.value(1), classerr.value(5), loss.mean), flush=True)

    return

#if torch.cuda.is_available():
#    eval_model(test_loader_gpu, quantizer.model, 'cuda')

if torch.cuda.is_available():
    print('Creating CPU copy of Distiller model')
    cpu_model = distiller.make_non_parallel_copy(quantizer.model).cpu()
else:
    cpu_model = quantizer.model
eval_model(test_loader_cpu, cpu_model, 'cpu', print_freq=60)


eval_model(test_loader_cpu, pyt_model, 'cpu', print_freq=60)

if torch.cuda.is_available():
    eval_model(test_loader_gpu, quantizer.model, 'cuda')


