In [None]:
import lc
from lc.torch import ParameterTorch as Param, AsVector, AsIs
from lc.compression_types import ConstraintL0Pruning, LowRank, RankSelection, AdaptiveQuantization
from lc.models.torch import lenet300_classic, lenet300_modern_drop, lenet300_modern

import numpy as np

import torch
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets
torch.set_num_threads(4)

In [None]:
def compute_acc_loss(forward_func, data_loader):
    correct_cnt, ave_loss = 0, 0
    for batch_idx, (x, target) in enumerate(data_loader):
        with torch.no_grad():
            target = target.cuda()
            score, loss = forward_func(x.cuda(), target)
            _, pred_label = torch.max(score.data, 1)
            correct_cnt += (pred_label == target.data).sum().item()
            ave_loss += loss.data.item() * len(x)
    accuracy = correct_cnt * 1.0 / len(data_loader.dataset)
    ave_loss /= len(data_loader.dataset)
    return accuracy, ave_loss

## Data
We use the MNIST dataset for this demo. The dataset containssubtracted 28x28 grayscale images with digits from 0 to 9. The images are normalized to have grayscale value 0 to 1 and then mean is subtracted.

In [None]:
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = [10, 5]
def show_MNIST_images():
    train_data_th = datasets.MNIST(root='./datasets', download=True, train=True)
    data_train = np.array(train_data_th.data[:])
    targets = np.array(train_data_th.targets)
    images_to_show = 5
    random_indexes = np.random.randint(data_train.shape[0], size=images_to_show)
    for i,ind in enumerate(random_indexes):
        plt.subplot(1,images_to_show,i+1)
        plt.imshow(data_train[ind], cmap='gray')
        plt.xlabel(targets[ind])
        plt.xticks([])
        plt.yticks([])
show_MNIST_images()

In [None]:
def data_loader(batch_size=2048, n_workers=4):
    train_data_th = datasets.MNIST(root='./datasets', download=True, train=True)
    test_data_th = datasets.MNIST(root='./datasets', download=True, train=False)

    data_train = np.array(train_data_th.data[:]).reshape([-1, 28 * 28]).astype(np.float32)
    data_test = np.array(test_data_th.data[:]).reshape([-1, 28 * 28]).astype(np.float32)
    data_train = (data_train / 255)
    dtrain_mean = data_train.mean(axis=0)
    data_train -= dtrain_mean
    data_test = (data_test / 255).astype(np.float32)
    data_test -= dtrain_mean

    train_data = TensorDataset(torch.from_numpy(data_train), train_data_th.targets)
    test_data = TensorDataset(torch.from_numpy(data_test), test_data_th.targets)

    train_loader = DataLoader(train_data, num_workers=n_workers, batch_size=batch_size, shuffle=True,)
    test_loader = DataLoader(test_data, num_workers=n_workers, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

## Reference Network
We use cuda capable GPU for our experiments. The network has 3 fully-connected layers with dimensions 784x300, 300x100, and 100x10, and the total of 266200 parameters (which includes biases). The network was trained to have a test error of 1.79%, which is pretty decent result but not as low as you can get with convolutional neural networks.

In [None]:
device = torch.device('cuda') 


In [None]:
def train_test_acc_eval_f(net):
    train_loader, test_loader = data_loader()
    def forward_func(x, target):
        y = net(x)
        return y, net.loss(y, target)
    acc_train, loss_train = compute_acc_loss(forward_func, train_loader)
    acc_test, loss_test = compute_acc_loss(forward_func, test_loader)

    print(f"Train err: {100-acc_train*100:.2f}%, train loss: {loss_train}")
    print(f"TEST ERR: {100-acc_test*100:.2f}%, test loss: {loss_test}")
    
def load_reference_lenet300():
    net = lenet300_modern().to(device)
    state_dict = torch.utils.model_zoo.load_url('https://ucmerced.box.com/shared/static/766axnc8qq429hiqqyqqo07ek46oqoxq.th')

    net.load_state_dict(state_dict)
    net.to(device)
    return net

Let's verify the model's train and test errors:

In [None]:
train_test_acc_eval_f(load_reference_lenet300().eval().to(device))

## Compression using the LC toolkit
### Step 1: L step
We will use same L step with same hyperparamters for all our compression examples

In [None]:
def my_l_step(model, lc_penalty, step):
    train_loader, test_loader = data_loader()
    params = list(filter(lambda p: p.requires_grad, model.parameters()))
    lr = 0.7*(0.98**step)
    optimizer = optim.SGD(params, lr=lr, momentum=0.9, nesterov=True)
    print(f'L-step #{step} with lr: {lr:.5f}')
    epochs_per_step_ = 7
    if step == 0:
        epochs_per_step_ = epochs_per_step_ * 2
    for epoch in range(epochs_per_step_):
        avg_loss = []
        for x, target in train_loader:
            optimizer.zero_grad()
            x = x.to(device)
            target = target.to(dtype=torch.long, device=device)
            out = model(x)
            loss = model.loss(out, target) + lc_penalty()
            avg_loss.append(loss.item())
            loss.backward()
            optimizer.step()

        print(f"\tepoch #{epoch} is finished.")
        print(f"\t  avg. train loss: {np.mean(avg_loss):.6f}")


### Step 2: Schedule of mu values

In [None]:
mu_s = [9e-5 * (1.1 ** n) for n in range(20)]
# 20 L-C steps in total
# total training epochs is 7 x 20 = 140

### Compression time! Pruning
Let us prune all but 5% of the weights in the network (5% = 13310 weights)

In [None]:
net = load_reference_lenet300()

layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]
compression_tasks = {
    Param(layers, device): (AsVector, ConstraintL0Pruning(kappa=13310), 'pruning')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()                              # entry point to the LC algorithm

### Quantization
Now let us quantize each layer with its own codebook

In [None]:
net = load_reference_lenet300()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, AdaptiveQuantization(k=2), 'layer0_quant'),
    Param(layers[1], device): (AsVector, AdaptiveQuantization(k=2), 'layer1_quant'),
    Param(layers[2], device): (AsVector, AdaptiveQuantization(k=2), 'layer2_quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()  

### Mixing pruning, low rank, and quantization

In [None]:
net = load_reference_lenet300()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, ConstraintL0Pruning(kappa=5000), 'pruning'),
    Param(layers[1], device): (AsIs, LowRank(target_rank=9, conv_scheme=None), 'low-rank'),
    Param(layers[2], device): (AsVector, AdaptiveQuantization(k=2), 'quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()  

### Additive combination of Quantization and Pruning

In [None]:
net = load_reference_lenet300()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers, device): [
        (AsVector, ConstraintL0Pruning(kappa=2662), 'pruning'),
        (AsVector, AdaptiveQuantization(k=2), 'quant')
    ]
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()  

### Low-rank compression with automatic rank selection

In [None]:
net = load_reference_lenet300()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]
alpha=1e-9
compression_tasks = {
    Param(layers[0], device): (AsIs, RankSelection(conv_scheme='scheme_1', alpha=alpha, criterion='storage', module=layers[0], normalize=True), "layer1_lr"),
    Param(layers[1], device): (AsIs, RankSelection(conv_scheme='scheme_1', alpha=alpha, criterion='storage', module=layers[1], normalize=True), "layer2_lr"),
    Param(layers[2], device): (AsIs, RankSelection(conv_scheme='scheme_1', alpha=alpha, criterion='storage', module=layers[2], normalize=True), "layer3_lr")
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()  

### ScaledTernaryQuantization

In [None]:
from lc.compression_types import ScaledTernaryQuantization
net = load_reference_lenet300()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, ScaledTernaryQuantization(), 'layer0_quant'),
    Param(layers[1], device): (AsVector, ScaledTernaryQuantization(), 'layer1_quant'),
    Param(layers[2], device): (AsVector, ScaledTernaryQuantization(), 'layer2_quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()  

### ScaledBinaryQuantization

In [None]:
from lc.compression_types import ScaledBinaryQuantization
net = load_reference_lenet300()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, ScaledBinaryQuantization(), 'layer0_quant'),
    Param(layers[1], device): (AsVector, ScaledBinaryQuantization(), 'layer1_quant'),
    Param(layers[2], device): (AsVector, ScaledBinaryQuantization(), 'layer2_quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run() 