Took the first 3 blocks of code from Yerlan's LC Compression Collab


In [1]:
#! git clone https://github.com/UCMerced-ML/LC-model-compression

In [2]:
#! pip3 install -e ./LC-model-compression

## IMPORTANT!
At this point you need to restart the runtime by doing "Runtime => Restart Runtime"

After doing the restart, I recommend just commenting out the 2 lines of code above so that we can press run all

## Training the Neural Network

### Import packages

In [3]:
#%matplotlib inline

import lc
from lc.torch import ParameterTorch as Param, AsVector, AsIs
from lc.compression_types import ConstraintL0Pruning, LowRank, RankSelection, AdaptiveQuantization

import torch
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
from timeit import default_timer as timer
from datetime import timedelta

torch.manual_seed(0)
np.random.seed(0)
torch.set_num_threads(4)
batchsize = 2048
num_workers = 2
nnloss = torch.nn.CrossEntropyLoss()


### Making the Neural Network

In [4]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')  

In [5]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(28*28, 500)  # 28*28 from image dimension 
        self.fc2 = nn.Linear(500, 300)
        self.fc3 = nn.Linear(300, 5)

    def forward(self, x):
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net().to(device)

### Getting the subset MNIST dataset

In [6]:
def data_loader(batch_size = batchsize, n_workers = num_workers):
    train_data_th = datasets.MNIST(root='./datasets', download=True, train=True)
    test_data_th = datasets.MNIST(root='./datasets', download=True, train=False)

    label = [1, 2, 3 ,4, 6]
    data_train_fea = np.array(train_data_th.data[:]).reshape([-1, 28 * 28]).astype(np.float32)
    data_train_fea = (data_train_fea / 255)
    data_train_gnd = np.array(train_data_th.targets)
    ctr1_idx = np.where(data_train_gnd[:] == label[0])
    ctr2_idx = np.where(data_train_gnd[:] == label[1])
    ctr3_idx = np.where(data_train_gnd[:] == label[2])
    ctr4_idx = np.where(data_train_gnd[:] == label[3])
    ctr6_idx = np.where(data_train_gnd[:] == label[4])
    ctr1_idx = np.array(ctr1_idx)
    ctr2_idx = np.array(ctr2_idx)
    ctr3_idx = np.array(ctr3_idx)
    ctr4_idx = np.array(ctr4_idx)
    ctr6_idx = np.array(ctr6_idx)
    total_train_valid_idx = np.concatenate((ctr1_idx, ctr2_idx, ctr3_idx, ctr4_idx, ctr6_idx),axis = None)
    np.random.shuffle(total_train_valid_idx)#forgot to add this line of code
    train_idx = total_train_valid_idx[:27000]
    valid_idx = total_train_valid_idx[27000:]

    data_total_train = data_train_fea[total_train_valid_idx]
    target_total_train = data_train_gnd[total_train_valid_idx]

    data_train = data_train_fea[train_idx]
    target_train = data_train_gnd[train_idx]

    data_validation = data_train_fea[valid_idx]
    target_validation = data_train_gnd[valid_idx]

    data_test_fea = np.array(test_data_th.data[:]).reshape([-1, 28 * 28]).astype(np.float32)
    data_test_fea = (data_test_fea / 255)
    data_test_gnd = np.array(test_data_th.targets)
    cte1_idx = np.where(data_test_gnd[:] == label[0])
    cte2_idx = np.where(data_test_gnd[:] == label[1])
    cte3_idx = np.where(data_test_gnd[:] == label[2])
    cte4_idx = np.where(data_test_gnd[:] == label[3])
    cte6_idx = np.where(data_test_gnd[:] == label[4])
    cte1_idx = np.array(cte1_idx)
    cte2_idx = np.array(cte2_idx)
    cte3_idx = np.array(cte3_idx)
    cte4_idx = np.array(cte4_idx)
    cte6_idx = np.array(cte6_idx)
    test_idx = np.concatenate((cte1_idx, cte2_idx, cte3_idx, cte4_idx, cte6_idx),axis = None)

    data_test = data_test_fea[test_idx]
    target_test = data_test_gnd[test_idx]

    ##not sure what this is doing but it was here in both the neural network tutorial and in yerlan's collab
    dtrain_mean = data_train.mean(axis=0)
    data_train -= dtrain_mean
    data_validation -=dtrain_mean
    data_total_train -= dtrain_mean
    data_test -= dtrain_mean
    ##

    #######
    #https://discuss.pytorch.org/t/indexerror-target-2-is-out-of-bounds/69614/24
    tensor_target_train = torch.from_numpy(target_train)

    #print(tensor_target_train.size())
    # print(min(tensor_target_train))
    # print(max(tensor_target_train))
    unique_targets_train = torch.unique(tensor_target_train) #1,2,3,4,6
    # print('unique_targets_train: {}'.format(unique_targets_train))

    new_tensor_target_train = torch.empty_like(tensor_target_train) #size of tensor_target_train
    for idx, t in enumerate(unique_targets_train):
        # print('replacing {} with {}'.format(t, idx))
        new_tensor_target_train[tensor_target_train == t] = idx # [1,1,3,3]
    # print(new_tensor_target_train.size())
    # print(min(new_tensor_target_train))
    # print(max(new_tensor_target_train))

    tensor_target_validation = torch.from_numpy(target_validation)
    #print(tensor_target_validation.size())
    unique_targets_validation = torch.unique(tensor_target_validation)
    new_tensor_target_validation = torch.empty_like(tensor_target_validation)
    for idx, t in enumerate(unique_targets_validation):
      new_tensor_target_validation[tensor_target_validation == t] = idx

    tensor_target_test = torch.from_numpy(target_test)
    unique_targets_test = torch.unique(tensor_target_test)
    new_tensor_target_test = torch.empty_like(tensor_target_test)
    for idx, t in enumerate(unique_targets_test):
      new_tensor_target_test[tensor_target_test == t] = idx
    

    tensor_target_total_train = torch.from_numpy(target_total_train)
    unique_targets_total_train = torch.unique(tensor_target_total_train)
    new_tensor_target_total_train = torch.empty_like(tensor_target_total_train)
    for idx, t in enumerate(unique_targets_total_train):
      new_tensor_target_total_train[tensor_target_total_train == t] = idx


    train_data = TensorDataset(torch.from_numpy(data_train), new_tensor_target_train)
    validation_data = TensorDataset(torch.from_numpy(data_validation), new_tensor_target_validation)
    test_data = TensorDataset(torch.from_numpy(data_test), new_tensor_target_test)
    total_train_data = TensorDataset(torch.from_numpy(data_total_train), new_tensor_target_total_train)

    train_loader = DataLoader(train_data, num_workers=n_workers, batch_size=batch_size, shuffle=True)
    validation_loader = DataLoader(validation_data, num_workers = n_workers, batch_size = batch_size, shuffle = True)
    test_loader = DataLoader(test_data, num_workers=n_workers, batch_size=batch_size, shuffle=False)
    total_train_loader = DataLoader(total_train_data, num_workers = n_workers, batch_size = batch_size, shuffle = True)

    #return train_loader, validation_loader, test_loader, total_train_loader -> use this one if still training model
    return total_train_loader, test_loader # use this one if already found optimized model

### Defining all the lists used

In [7]:
plotepoch = []
plottrainloss = []
plotvalidloss = []
avg_train_loss = []
avg_valid_loss = []
plottrainacc = []
plotvalidacc = []
timetaken = []
loss_list = []

In [8]:
def clearList():
    plotepoch.clear()
    plottrainloss.clear()
    plotvalidloss.clear()
    avg_train_loss.clear()
    avg_valid_loss.clear()
    plottrainacc.clear()
    plotvalidacc.clear()

### Calculating accuracy

In [9]:
def calc_acc(loader, net):
    correct_cnt = 0
    total_cnt = 0
    net.eval()
    with torch.no_grad():
        for batch_inputs, batch_labels in loader:
            batch_inputs = batch_inputs.to(device)
            batch_labels = batch_labels.to(dtype=torch.long, device=device)
            out = net(batch_inputs)
            _, pred_labels = torch.max(out.data, 1)
            total_cnt += batch_labels.size(0)
            correct_cnt += (pred_labels == batch_labels).sum().item()
    

    accuracy = correct_cnt / total_cnt
    return accuracy

def calc_acc_loss(loader, net):
    correct_cnt = 0
    total_cnt = 0
    loss_list.clear()
    net.eval()
    with torch.no_grad():
        for batch_inputs, batch_labels in loader:
            batch_inputs = batch_inputs.to(device)
            batch_labels = batch_labels.to(dtype=torch.long, device=device)
            out = net(batch_inputs)
            loss = nnloss(out,batch_labels)
            loss_list.append(loss.item())
            _, pred_labels = torch.max(out.data, 1)
            total_cnt += batch_labels.size(0)
            correct_cnt += (pred_labels == batch_labels).sum().item()
    
    calc_loss = np.mean(loss_list)
    accuracy = correct_cnt / total_cnt
    return accuracy, calc_loss      

In [10]:
def findAccNoTest(train_loader, validation_loader, net):
    print(f'Train Accuracy: {100 * calc_acc(train_loader,net):.2f}%')
    print(f'Validation Accuracy: {100 * calc_acc(validation_loader,net):.2f}%')

def findAccWithTest(train_loader, validation_loader, test_loader, net):
    print(f'Train Accuracy: {100 * calc_acc(train_loader,net):.2f}%')
    print(f'Validation Accuracy: {100 * calc_acc(validation_loader,net):.2f}%')
    print(f'Test Accuracy: {100 *calc_acc(test_loader,net):.2f}%')

def findAccTrainTest(total_train_loader,  test_loader, net):
    print(f'Train Accuracy: {100 * calc_acc(total_train_loader,net):.2f}%')
    print(f'Test Accuracy: {100 *calc_acc(test_loader,net):.2f}%')
    

In [11]:
def train_test_acc_eval_f(net):
    train_loader, test_loader = data_loader()
    acc_train, loss_train = calc_acc_loss(train_loader,net)
    acc_test, loss_test = calc_acc_loss(test_loader,net)

    print(f"Train err: {100-acc_train*100:.2f}%, train loss: {loss_train}")
    print(f"TEST ERR: {100-acc_test*100:.2f}%, test loss: {loss_test}")

### Confusion Matrix

In [12]:
#https://christianbernecker.medium.com/how-to-create-a-confusion-matrix-in-pytorch-38d06a7f04b7

def confusion_matrix1(test_loader):
    y_pred = []
    y_true = []

    for x, target in test_loader:
        if torch.cuda.is_available():
            x = x.cuda()[:]
            target = target.cuda().to(dtype=torch.long)
        out = net(x)
        out = (torch.max(torch.exp(out),1)[1]).data.cpu().numpy()
        y_pred.extend(out)
        target = target.data.cpu().numpy()
        y_true.extend(target)

    labels = [1,2,3,4,6]
    cf_matrix = confusion_matrix(y_true,y_pred)
    df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) * 10, index = [i for i in labels], columns = [i for i in labels])
    plt.figure(figsize = (12,7))
    sn.heatmap(df_cm, annot=True)

### Yerlan's Suggestions for Project 2 Part 1 Part 1

In [13]:
#select reasonable settings
#every 10 epoch 0.9 schedular
#have 60k training
#10k test
#choose lr: 0.01 - 0.1
# lr decay: 0.95 every 10 epoch
# epoch 100-150
#sgd: use momentum
# for momentum: default 0.9 okay
# 5 - 10 experiments
#once found best parameters
#use them on entire dataset 60k
#split training 55k,5k train/valid
#in our case split training 27k,rest train/valid
#once found best parameters
#train model on all training
#report true test error on 10k only once
#use large as possible batch size that fits in gpu allocates like 2000 or 2048

### Training the Netural Network

In [14]:
# uncomment to test parameters again

# nnloss = torch.nn.CrossEntropyLoss()
# batchsize = 2048
# #if i set num_workers = 4 i get this warning
# #/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. cpuset_checked))
# #So I will do what they suggest and switch to 2
# num_workers = 2
# timein = "tmp"
# train_loader, validation_loader, test_loader, total_train_loader = data_loader(batchsize,num_workers)

def train_net(net):
    params = list(filter(lambda p: p.requires_grad, net.parameters()))
    optimizer = optim.SGD(params, lr=0.1, momentum=0.9, weight_decay = 0, nesterov=True)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
    
    epochs = 100
    clearList()
    start = timer()
    for epoch in range(epochs):
        net.train()
        for x, target in train_loader:
            if torch.cuda.is_available():
                x = x.cuda()[:]
                target = target.cuda().to(dtype=torch.long)
            optimizer.zero_grad()
            out = net(x)
            loss = nnloss(out, target)
            loss.backward()
            avg_train_loss.append(loss.item())
            optimizer.step()
        scheduler.step()
        train_loss = np.mean(avg_train_loss)
        plotepoch.append(epoch)
        plottrainloss.append(train_loss)
        
        #https://www.geeksforgeeks.org/training-neural-networks-with-validation-using-pytorch/
        net.eval()
        with torch.no_grad():
            for x, target in validation_loader:
                if torch.cuda.is_available():
                    x = x.cuda()[:]
                    target = target.cuda().to(dtype=torch.long)
                out = net(x)
                loss = nnloss(out, target)
                avg_valid_loss.append(loss.item())
        valid_loss = np.mean(avg_valid_loss)
        plotvalidloss.append(valid_loss)

        if epoch % 10 == 0:
            print(f"\tepoch #{epoch} is finished.")
            print(f"\t  Avg. Train loss: {train_loss}")
            print(f"\t  Avg. Validation loss: {valid_loss}")
        
        train_acc = calc_acc(train_loader, net)
        plottrainacc.append(train_acc)
        valid_acc = calc_acc(validation_loader, net)
        plotvalidacc.append(valid_acc)        

    fig = plt.figure(1)
    plt.plot(plotepoch,plottrainloss,color = "blue", label = "Average Train Loss")
    plt.plot(plotepoch,plotvalidloss,color = "red", label = "Average Validation Loss")
    plt.title('Epoch vs Train and Validation Average Loss')
    plt.legend(loc="upper right")
    plt.xlabel("Increasing Epoch Value by 1")
    plt.ylabel("Avg Loss")
    plt.show()
    fig2 = plt.figure(2)
    plt.plot(plottrainacc, color="blue", label="Train Accuracy")
    plt.plot(plotvalidacc, color="red", label="Validation Accuracy")
    plt.title('Epoch vs Train and Validation Accuracy')
    plt.legend(loc="lower right")
    plt.xlabel("Increasing Epoch Value by 1")
    plt.ylabel("Accuracy Score")
    plt.show()
    print("After optimizing the model")
    findAccWithTest(train_loader, validation_loader, test_loader, net)
    
    end = timer()

    #https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
    taken=(end-start)
    timetaken.append(taken)
    if(taken > 60):
        timein = "minutes"
    else:
        timein = "seconds"
    print(f"It took us {timedelta(seconds=taken)} {timein} to run this loop")
    

In [15]:
#uncomment this code if we need to train the model
# print("Before optimizing the model")
# findAccNoTest(train_loader, validation_loader, net)
# print("Training the Model")
# train_net(net)

### Code to Run the Optimized Neural Network

In [16]:
# The code above was for testing and find the best parameters

# For training the model you only need the code down below
def best_train_net(net):
    params = list(filter(lambda p: p.requires_grad, net.parameters()))
    optimizer = optim.SGD(params, lr=0.1, momentum=0.9, weight_decay = 0, nesterov=True)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
    train_loader, test_loader = data_loader()
    print("Before optimizing the model")
    print(f'Train Accuracy: {100 * calc_acc(train_loader,net):.2f}%')
        
    epochs = 100
    for epoch in range(epochs):
        net.train()
        for x, target in train_loader:
            x = x.to(device)
            target = target.to(dtype=torch.long, device=device)
            optimizer.zero_grad()
            out = net(x)
            loss = nnloss(out, target)
            loss.backward()
            avg_train_loss.append(loss.item())
            optimizer.step()
        scheduler.step()
        train_loss = np.mean(avg_train_loss)
        if epoch % 10 == 0:
            print(f"\tepoch #{epoch} is finished.")
            print(f"\t  Avg. Train loss: {train_loss}")

    print("After optimizing the model")
    findAccTrainTest(train_loader, test_loader, net)
    confusion_matrix1(test_loader)
    torch.save(net.state_dict(), "best_parameter_model.pth")

In [None]:
best_train_net(net)

Before optimizing the model
Train Accuracy: 18.64%
	epoch #0 is finished.
	  Avg. Train loss: 1.3208674669265748
	epoch #10 is finished.
	  Avg. Train loss: 0.19842685000462965
	epoch #20 is finished.
	  Avg. Train loss: 0.11505755827658706
	epoch #30 is finished.
	  Avg. Train loss: 0.08148298279973128
	epoch #40 is finished.
	  Avg. Train loss: 0.06311564424219049
	epoch #50 is finished.
	  Avg. Train loss: 0.05152207128855771


In [None]:
train_test_acc_eval_f(net)

In [None]:
def load_reference_net():
    state_dict = torch.load("best_parameter_model.pth")
    net.load_state_dict(state_dict)
    net.eval()
    return net

Below is for Project 2 Part 1 Part 2 LC Compression

All the code was taken from Yerlan's LC Compression Collab and modified to fit the parameters of the project and our model

## Compression using the LC toolkit

### Step 1: L step
We will use same L step with same hyperparamters for all our compression examples

In [None]:
def my_l_step(model, lc_penalty, step):
    train_loader, test_loader = data_loader()
    params = list(filter(lambda p: p.requires_grad, model.parameters()))
    lr = 0.1*(0.98**step)
    optimizer = optim.SGD(params, lr=lr, momentum=0.9, nesterov=True)
    print(f'L-step #{step} with lr: {lr:.5f}')
    epochs_per_step_ = 7
    if step == 0:
        epochs_per_step_ = epochs_per_step_ * 2
    for epoch in range(epochs_per_step_):
        avg_loss = []
        for x, target in train_loader:
            optimizer.zero_grad()
            x = x.to(device)
            target = target.to(dtype=torch.long, device=device)
            out = model(x)
            #loss = model.loss(out, target) + lc_penalty()
            loss = nnloss(out,target) + lc_penalty()
            avg_loss.append(loss.item())
            loss.backward()
            optimizer.step()

        print(f"\tepoch #{epoch} is finished.")
        print(f"\t  avg. train loss: {np.mean(avg_loss):.6f}")

### Step 2: Schedule of mu values

In [None]:
mu_s = [9e-5 * (1.1 ** n) for n in range(20)]
# 20 L-C steps in total
# total training epochs is 7 x 20 = 140

### Compression time! Pruning
Let us prune all but 5% of the weights in the network (5% = 27175 weights)

In [None]:
net = load_reference_net()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]
compression_tasks = {
    Param(layers, device): (AsVector, ConstraintL0Pruning(kappa=27175), 'pruning')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()                              # entry point to the LC algorithm

In [None]:
lc_alg.count_params()

In [None]:
compressed_model_bits = lc_alg.count_param_bits() + (300+100+10)*32
uncompressed_model_bits = (784*300+300*100+100*10 + 300 + 100 + 10)*32
compression_ratio = uncompressed_model_bits/compressed_model_bits
print(compression_ratio)

Note that we were pruning 95% of the weights. Naively, you would assume 20x compression ratio (100%/5%), however, this is not the case. Firstly, there are some uncompressed parts (in this case biases), and, secondly, storing a compressed model requires additional metadata (in this case positions of non-zero elements). Therefore we get only 16x compression ratio (vs naively expected 20x). 

To prevent manual computation of compression ratio, let us create a function below. Note, this function is model specific.

In [None]:
def compute_compression_ratio(lc_alg):
    compressed_model_bits = lc_alg.count_param_bits() + (300+100+10)*32
    uncompressed_model_bits = (784*300+300*100+100*10 + 300 + 100 + 10)*32
    compression_ratio = uncompressed_model_bits/compressed_model_bits
    return compression_ratio

### Quantization
Now let us quantize each layer with its own codebook

In [None]:
net = load_reference_net()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, AdaptiveQuantization(k=2), 'layer0_quant'),
    Param(layers[1], device): (AsVector, AdaptiveQuantization(k=2), 'layer1_quant'),
    Param(layers[2], device): (AsVector, AdaptiveQuantization(k=2), 'layer2_quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()  
print('Compressed_params:', lc_alg.count_params())
print('Compression_ratio:', compute_compression_ratio(lc_alg))

### Mixing pruning, low rank, and quantization

In [None]:
net = load_reference_net()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, ConstraintL0Pruning(kappa=5000), 'pruning'),
    Param(layers[1], device): (AsIs, LowRank(target_rank=9, conv_scheme=None), 'low-rank'),
    Param(layers[2], device): (AsVector, AdaptiveQuantization(k=2), 'quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()
print('Compressed_params:', lc_alg.count_params())
print('Compression_ratio:', compute_compression_ratio(lc_alg))

### Additive combination of Quantization and Pruning

In [None]:
net = load_reference_net()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers, device): [
        (AsVector, ConstraintL0Pruning(kappa=2662), 'pruning'),
        (AsVector, AdaptiveQuantization(k=2), 'quant')
    ]
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()
print('Compressed_params:', lc_alg.count_params())
print('Compression_ratio:', compute_compression_ratio(lc_alg))

### Low-rank compression with automatic rank selection

In [None]:
net = load_reference_net()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]
alpha=1e-9
compression_tasks = {
    Param(layers[0], device): (AsIs, RankSelection(conv_scheme='scheme_1', alpha=alpha, criterion='storage', module=layers[0], normalize=True), "layer1_lr"),
    Param(layers[1], device): (AsIs, RankSelection(conv_scheme='scheme_1', alpha=alpha, criterion='storage', module=layers[1], normalize=True), "layer2_lr"),
    Param(layers[2], device): (AsIs, RankSelection(conv_scheme='scheme_1', alpha=alpha, criterion='storage', module=layers[2], normalize=True), "layer3_lr")
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()
print('Compressed_params:', lc_alg.count_params())
print('Compression_ratio:', compute_compression_ratio(lc_alg))

### ScaledBinaryQuantization

In [None]:
from lc.compression_types import ScaledBinaryQuantization
net = load_reference_net()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, ScaledBinaryQuantization(), 'layer0_quant'),
    Param(layers[1], device): (AsVector, ScaledBinaryQuantization(), 'layer1_quant'),
    Param(layers[2], device): (AsVector, ScaledBinaryQuantization(), 'layer2_quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run()
print('Compressed_params:', lc_alg.count_params())
print('Compression_ratio:', compute_compression_ratio(lc_alg))

### ScaledTernaryQuantization

In [None]:
from lc.compression_types import ScaledTernaryQuantization
net = load_reference_net()
layers = [lambda x=x: getattr(x, 'weight') for x in net.modules() if isinstance(x, nn.Linear)]

compression_tasks = {
    Param(layers[0], device): (AsVector, ScaledTernaryQuantization(), 'layer0_quant'),
    Param(layers[1], device): (AsVector, ScaledTernaryQuantization(), 'layer1_quant'),
    Param(layers[2], device): (AsVector, ScaledTernaryQuantization(), 'layer2_quant')
}

lc_alg = lc.Algorithm(
    model=net,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=train_test_acc_eval_f # evaluation function
)
lc_alg.run() 
print('Compressed_params:', lc_alg.count_params())
print('Compression_ratio:', compute_compression_ratio(lc_alg))