<a href="https://colab.research.google.com/github/YannDubs/lossyless/blob/main/notebooks/minimal_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Minimal Lossyless Code

This notebook contains a minimal pipeline for training and evaluating the proposed CLIP compressor in our paper [**Lossy Compression for Lossless Prediction**](https://arxiv.org/pdf/2106.10800.pdf). The focus is on simplicity and understandibility.

**Make sure that you use a GPU** (on COLAB: runtime -> change runtime type -> Hardware accelerator: GPU)

## Environment

In [1]:
!pip install git+https://github.com/openai/CLIP.git --quiet # pretrained CLIP

  Building wheel for clip (setup.py) ... [?25l[?25hdone


In [2]:
!pip uninstall -y torchtext --quiet # in case it got installed (wouldn't work due to torch version issue)
!pip install scikit-learn==0.24.2 lightning-bolts==0.3.4 compressai==1.1.5 pytorch-lightning==1.3.8 --quiet # for evaluation, training, and compression



Basic variables depending on whether using GPU or not.

In [3]:
import torch

data_dir = "data/"
if torch.cuda.is_available():
    device, precision, gpus = "cuda", 16, 1
else:
    device, precision, gpus = "cpu", 32, 0

## Downloads
Downloads the necessary data and the pretrained CLIP.

In [4]:
import clip
from torchvision.datasets import CIFAR10, STL10

# pretrained CLIP
pretrained, preprocess = clip.load("ViT-B/32", device)

# train data
cifar = CIFAR10(data_dir, download=True, train=True, transform=preprocess)

# eval data
stl10_train = STL10(data_dir, download=True, split="train", transform=preprocess)
stl10_test = STL10(data_dir, download=True, split="test", transform=preprocess)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


## Training the compressor

We will now train the compressor on CIFAR10, by:

1. Featurize the training dataset by the pretrained CLIP => instead of working with raw images we directly work with features to be mcuh quicker.
2. Create our entropy bottleneck model.
3. Train the entropy bottleneck on CIFAR10.


In [5]:
import tqdm
from torch.utils.data import DataLoader
import numpy as np

def clip_featurize_data(dataset, device, pretrained):
    """Featurize a dataset using the pretrained CLIP model."""
    with torch.no_grad():
        Z, Y = [], []
        for x, y in tqdm.tqdm(DataLoader(dataset, batch_size=128, num_workers=16)):
            Z += [pretrained.encode_image(x.to(device).half()).cpu().numpy()]
            Y += [y.cpu().numpy()]
    return np.concatenate(Z), np.concatenate(Y)

In [6]:
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
import pytorch_lightning as pl
from compressai.entropy_models import EntropyBottleneck
import math

class ArrayCompressor(pl.LightningModule):
    """Compressor for any vectors, by using an entropy bottleneck and MSE distortion."""

    def __init__(self, *args, **kwargs):
        super().__init__()
        self.save_hyperparameters()
        self.bottleneck = EntropyBottleneck(self.hparams.z_dim)
        self.scaling = torch.nn.Parameter(torch.ones(self.hparams.z_dim))
        self.biasing = torch.nn.Parameter(torch.zeros(self.hparams.z_dim))
        self.is_updated = False

    def forward(self, batch):
        z, y = batch
        z = (z + self.biasing) * self.scaling.exp()
        z_hat, q_z = self.bottleneck(z.unsqueeze(-1).unsqueeze(-1))
        z_hat = z_hat.squeeze() / self.scaling.exp() - self.biasing
        return z_hat, q_z.squeeze(), y.squeeze()

    def step(self, batch, *args, **kwargs):
        z_hat, q_z, _ = self(batch)
        rate = -torch.log(q_z).sum(-1).mean()
        distortion = torch.norm(batch[0] - z_hat, p=1, dim=-1).mean()
        self.log_dict(
            {"rate": rate / math.log(2), "distortion": distortion}, prog_bar=True
        )
        return distortion + self.hparams.lmbda * rate

    def training_step(self, batch, _, optimizer_idx=0):
        return self.step(batch) if optimizer_idx == 0 else self.bottleneck.loss()

    def predict_step(self, batch, _, __):
        return self.compress(batch[0]), batch[1].cpu().numpy()

    def compress(self, z):
        if not self.is_updated:
            self.bottleneck.update(force=True)
            self.is_updated = True
        z = (z + self.biasing) * self.scaling.exp()
        return self.bottleneck.compress(z.unsqueeze(-1).unsqueeze(-1))

    def decompress(self, z_bytes):
        z_hat = self.bottleneck.decompress(z_bytes, [1, 1]).squeeze()
        return (z_hat / self.scaling.exp()) - self.biasing

    def configure_optimizers(self):
        param = [p for n, p in self.named_parameters() if not n.endswith(".quantiles")]
        quantile_param = [
            p for n, p in self.named_parameters() if n.endswith(".quantiles")
        ]
        optimizer = Adam(param, lr=self.hparams.lr)
        optimizer_coder = Adam(quantile_param, lr=self.hparams.lr)
        scheduler = StepLR(optimizer, self.hparams.lr_step)
        scheduler_coder = StepLR(optimizer_coder, self.hparams.lr_step)
        return [optimizer, optimizer_coder], [scheduler, scheduler_coder]

In [7]:
import time
from pl_bolts.datamodules import SklearnDataModule


start = time.time()
Z_cifar, Y_cifar = clip_featurize_data(cifar, device, pretrained)
data_kwargs = dict(
    num_workers=16, batch_size=128, pin_memory=True, val_split=0.0, test_split=0
)
dm_cifar = SklearnDataModule(Z_cifar, Y_cifar, **data_kwargs)
compressor = ArrayCompressor(z_dim=512, lmbda=4e-2, lr=1e-1, lr_step=2)
trainer = pl.Trainer(gpus=gpus, precision=precision, max_epochs=10, logger=False)
trainer.fit(compressor, datamodule=dm_cifar)
print(f"Compressor trained in {(time.time() - start)/60:.0f} minutes.")

100%|██████████| 391/391 [01:47<00:00,  3.65it/s]
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  rank_zero_warn(f'you passed in a {loader_name} but have no {step_name}. Skipping {stage} loop')
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type              | Params
-------------------------------------------------
0 | bottleneck | EntropyBottleneck | 31.2 K
-------------------------------------------------
32.3 K    Trainable params
0         Non-trainable params
32.3 K    Total params
0.129     Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…


Compressor trained in 3 minutes.


## Evaluating the compressor

Now we will evaluate the compressor by quantifying how well it can compress the STL10 dataset and how well a linear classifier can predict from the compressed dataset, by:

1. Entropy coding (i.e., compressing) the evaluating data.
2. Decompressing the evaluation data.
3. Fitting a linear SVM on the training set of the compressed STL10.
4. Evaluating the classifier on the test set of the compressed STL10.


In [8]:
def compress_data(trainer, dataset, device, pretrained, **kwargs):
    """Compresses the data using an entropy coder."""
    start = time.time()
    Z, Y = clip_featurize_data(dataset, device, pretrained)
    dm = SklearnDataModule(Z, Y, **kwargs)
    out = trainer.predict(dataloaders=dm.train_dataloader())
    Z_bytes = [o[0] for o in out]
    flat_z = [i for batch in Z_bytes for i in batch]
    Y = np.concatenate([o[1] for o in out], axis=0)
    coding_rate = sum([len(s) for s in flat_z]) * 8 / len(flat_z)
    sec_per_img = (time.time() - start) / len(flat_z)
    return Z_bytes, Y, coding_rate, sec_per_img


def decompress_data(compressor, Z_bytes):
    """Compresses the data that was entropy coded."""
    start = time.time()
    with torch.no_grad():
        Z_hat = [compressor.decompress(b).cpu().numpy() for b in Z_bytes]
    sec_per_img = (time.time() - start) / len(Z_hat)
    return np.concatenate(Z_hat), sec_per_img

In [9]:
# entropy code evaluation data. Rate: 1703.6 bits, Compression: 230.2 img/sec
Z_b_train, Y_train, *_ = compress_data(trainer, stl10_train, device, pretrained, **data_kwargs)
Z_b_test, Y_test, rate, enc_time = compress_data(
    trainer, stl10_test, device, pretrained, **data_kwargs
)
print(f"Bit-rate: {rate:.1f}. \t Compression: {1/enc_time:.1f} img/sec.")

100%|██████████| 40/40 [00:14<00:00,  2.69it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  f'Your {mode}_dataloader has `shuffle=True`, it is best practice to turn'


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Predicting', layout=Layout(flex='2'), m…

  0%|          | 0/63 [00:00<?, ?it/s]




100%|██████████| 63/63 [00:21<00:00,  2.93it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Predicting', layout=Layout(flex='2'), m…


Bit-rate: 1705.8. 	 Compression: 222.1 img/sec.


In [10]:
# Decompress data. Decoding: 2.8 img/sec (no batch processing)

Z_train, _ = decompress_data(compressor, Z_b_train)
Z_test, dec_time = decompress_data(compressor, Z_b_test)
print(f"Decompression: {1/dec_time:.1f} img/sec.")

Decompression: 5.3 img/sec.


In [11]:
# Downstream evaluation. Accuracy: 98.65%  	 Training time: 0.5

from sklearn.svm import LinearSVC

clf = LinearSVC(C=4e-3)
start = time.time()
clf.fit(Z_train, Y_train)
delta_time = time.time() - start
acc = clf.score(Z_test, Y_test)
print(
    f"Downstream STL10 accuracy: {acc*100:.2f}%.  \t Training time: {delta_time:.1f} "
)

Downstream STL10 accuracy: 98.62%.  	 Training time: 0.5 
