In [1]:
cd ..

/projects/lossyless


In [2]:
import os
from io import BytesIO, StringIO

import numpy as np
import torch
from featurize import (
    CIFAR10,
    CIFAR100,
    STL10Dataset,
    ImagenetDataset,
    ImagenetteDataset,
    get_featurized_data,
)

  stdout_func(


In [4]:
model = "CLIP_ViT"
device = "cuda" if torch.cuda.is_available() else "cpu"
Datasets = dict(
    ImagenetDataset=ImagenetDataset, Cifar10, CIFAR100=CIFAR100,STL10=STL10Dataset)
features = get_featurized_data(Datasets, model, device=device, is_half=True)

Done featurizing ImagenetDataset
Done featurizing CIFAR100
Done featurizing STL10


In [5]:
from functools import partial

from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline

CLF = partial(
    SGDClassifier,
    random_state=123,
    n_jobs=-1,
    loss="log",
    penalty="l2",
    alpha=0.0001,
    max_iter=100,
    learning_rate="adaptive",
    eta0=0.1,
    early_stopping=False,
    validation_fraction=0.1,
    n_iter_no_change=5,
)



KNN1 = partial(
    KNeighborsClassifier,
    n_neighbors=1,
    algorithm="kd_tree",
    n_jobs=-1
)

In [6]:
def get_train_test(data):
    Z_train = features[data]["train"]["Z"][:]
    Y_train = features[data]["train"]["Y"][:]
    Z_test = features[data]["test"]["Z"][:]
    Y_test = features[data]["test"]["Y"][:]
    return Z_train, Y_train, Z_test, Y_test

In [7]:
def get_lossless_size(Z_test):
    Z_test = Z_test.astype(np.float16)
    tmp = BytesIO()
    np.savez_compressed(tmp, Z=Z_test)
    out = tmp.getbuffer().nbytes * 8 / 1e6
    tmp.close()
    print(f"{out:.1f} Mega Bits")

In [8]:
def H_YlM(Z_test, Y_test):
    knn1 = KNN1()
    knn1.fit(Z_test, Y_test)
    accuracy = knn1.score(Z_test, Y_test)
    print(f"Test information {accuracy*100}%")

In [9]:
def get_train_test_hat(compressor, datamodule):
    out = trainer.predict(
        compressor,
        dataloaders=[datamodule.train_dataloader(), datamodule.test_dataloader()],
    )
    Z_train_hat = np.concatenate([o[0] for o in out[0]], axis=0)
    Y_train_hat = np.concatenate([o[1] for o in out[0]], axis=0)
    Z_test_hat = np.concatenate([o[0] for o in out[1]], axis=0)
    Y_test_hat = np.concatenate([o[1] for o in out[1]], axis=0)
    return Z_train_hat, Y_train_hat, Z_test_hat, Y_test_hat

In [9]:
from pl_bolts.datamodules import SklearnDataModule

def get_datamodule(Z_train, Y_train, Z_test, Y_test):
    datamodule = SklearnDataModule(
        Z_train,
        Y_train,
        x_test=Z_test,
        y_test=Y_test,
        num_workers=16,
        batch_size=64,
        val_split=0,
    )

    datamodule.val_dataloader = datamodule.test_dataloader
    return datamodule

In [33]:
%%time
data = "ImagenetDataset"

Z_train, Y_train, Z_test, Y_test = get_train_test(data)

CPU times: user 5.86 s, sys: 384 ms, total: 6.25 s
Wall time: 6.18 s


# Lossless Image

In [68]:
!du -sh data/imagenet256/val

2.9G	data/imagenet256/val


In [43]:
# imagenet 256 
#print(f"{2.9 * 1e9 * 8 / 1e6:.1f} Mega Bits")

In [80]:
# real imagenet (not on this computer)
print(f"{8.7 * 1e9 * 8 / 1e6:.1f} Mega Bits")

69600.0 Mega Bits


In [None]:
# information can safely be assumed to be 100% because you never have 2 same images

# Lossless Features

In [14]:
%%time
clf_lossless = CLF()
clf_lossless.fit(Z_train, Y_train)
accuracy = clf_lossless.score(Z_test, Y_test)
print(accuracy)

0.72658


In [72]:
# hdf5
out = features[data]["test"]["Z"].id.get_storage_size() * 8 / 1e6
print(f"{out:.1f} Mega Bits")

379.6 Mega Bits


In [73]:
# savez
get_lossless_size(Z_test)

379.6 Mega Bits


In [56]:
H_YlM(Z_test, Y_test)

Test information 100.0%


# PCA

In [17]:
%%time
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

clf = make_pipeline(PCA(n_components=220), CLF())
clf.fit(Z_train, Y_train)
accuracy = clf.score(Z_test, Y_test)
print(accuracy)

0.7142


In [74]:
pca = clf.named_steps["pca"]
Z_test_pca = pca.transform(Z_test)
get_lossless_size(Z_test_pca)

163.8 Mega Bits


# Learned Lossy

In [14]:
import math
import pytorch_lightning as pl
from lossyless import get_rate_estimator

import torch
from compressai.entropy_models import EntropyBottleneck
from lossyless.helpers import OrderedSet, append_optimizer_scheduler_
from utils.helpers import dict2namespace


class ArrayCompressor(pl.LightningModule):
    def __init__(
        self,
        **hparams,
    ):
        super().__init__()
        hparams = dict2namespace(hparams)
        self.save_hyperparameters(hparams)
        self.rate_estimator = get_rate_estimator(
            self.hparams.mode, z_dim=self.hparams.input_dim, **self.hparams.kwargs
        )
        self.alpha = torch.nn.Parameter(torch.ones(self.hparams.input_dim))

    def forward(self, batch):
        z, y = batch
        z = (z * self.alpha.exp()).unsqueeze(0)
        z_hat, *_ = self.rate_estimator(z, None, self)
        z_hat = z_hat.squeeze(0) / self.alpha.exp()
        return z_hat, y

    def get_specific_parameters(self, mode):
        """Returns an iterator over the desired model parameters."""
        all_param = OrderedSet(self.parameters())
        coder_param = OrderedSet(self.rate_estimator.aux_parameters())
        if mode == "main":
            return all_param - coder_param
        elif mode == "coder":
            return coder_param
        else:
            raise ValueError(f"Unkown parameter mode={mode}.")

    def configure_optimizers(self):
        optimizers, schedulers = [], []

        append_optimizer_scheduler_(
            self.hparams.optimizer_feat,
            self.hparams.scheduler_feat,
            self.get_specific_parameters("main"),
            optimizers,
            schedulers,
        )

        append_optimizer_scheduler_(
            self.hparams.optimizer_coder,
            self.hparams.scheduler_coder,
            self.get_specific_parameters("coder"),
            optimizers,
            schedulers,
        )


        return optimizers, schedulers

    def step(self, batch):
        z, _ = batch

        z_in = (z * self.alpha.exp()).unsqueeze(0)
        z_hat, rates, logs, r_other = self.rate_estimator(z_in, None, self)
        z_hat = z_hat.squeeze(0) / self.alpha.exp()

        rate = rates.mean()
        distortion = torch.norm(z - z_hat, p=self.hparams.p_norm, dim=-1).mean()
        loss = distortion + self.hparams.beta * rate

        logs["rate"] = rate / math.log(2)
        logs["distortion"] = distortion / math.log(2)
        logs["loss"] = loss

        self.log("rate", rate / math.log(2), prog_bar=True)
        self.log("distortion", distortion / math.log(2), prog_bar=True)

        return loss, logs

    def training_step(self, batch, batch_idx, optimizer_idx=0):
        if optimizer_idx == 0:
            loss, logs = self.step(batch)
            self.log_dict({f"train/pred/{k}": v for k, v in logs.items()})
        else:
            loss = self.rate_estimator.aux_loss()
            self.log(f"train/pred/coder_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        loss, logs = self.step(batch)
        self.log_dict({f"val/pred/{k}": v for k, v in logs.items()})
        return loss

    def test_step(self, batch, batch_idx):
        loss, logs = self.step(batch)
        self.log_dict({f"test/pred/{k}": v for k, v in logs.items()})
        return loss

In [15]:
max_epochs = 10

compressor = ArrayCompressor(
    input_dim=512,
    beta=1e-2,
    p_norm=1,
    mode="H_hyper",
    kwargs={},
    optimizer_coder=dict(mode="Adam", kwargs=dict(lr=1e-3)),
    optimizer_feat=dict(mode="Adam", kwargs=dict(lr=1e-2)),
    scheduler_feat=dict(modes=["expdecay"], kwargs=dict(expdecay=dict(epochs=max_epochs, decay_factor=100))),
    scheduler_coder=dict(modes=["expdecay"], kwargs=dict(expdecay=dict(epochs=max_epochs, decay_factor=100))),
)

In [11]:
trainer = pl.Trainer(gpus=1, precision=32, max_epochs=max_epochs, limit_val_batches=0.1)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores


## Same Data
The first question we will ask ourselves is whether these features can be used for good compression when the entropy model is trained on the same data distribution.

In [35]:
data = "ImagenetDataset"

In [11]:
datamodule = get_datamodule(Z_train, Y_train, Z_test, Y_test)

In [16]:
is_train = False
if is_train:
    trainer.fit(compressor, datamodule=datamodule)
    trainer.save_checkpoint("compressor.ckpt")
else:
    compressor = ArrayCompressor.load_from_checkpoint(checkpoint_path="compressor.ckpt")

In [19]:
out = trainer.test(model=compressor, test_dataloaders=[datamodule.test_dataloader()])
n_test = len(Z_test)
n_per_test = out[0]["test/pred/rate"]
print(f"{ n_test* n_per_test / 1e6 :.1f} Mega Bits")

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'distortion': 7.373038291931152,
 'rate': 2529.230712890625,
 'test/pred/H_ZlX': 0.0,
 'test/pred/H_q_S': 0.0,
 'test/pred/H_q_Z': 2529.230712890625,
 'test/pred/H_q_ZlS': 2529.230712890625,
 'test/pred/distortion': 7.373038291931152,
 'test/pred/loss': 22.641889572143555,
 'test/pred/rate': 2529.230712890625}
--------------------------------------------------------------------------------
126.5 Mega Bits


In [20]:
# jackpot is 1380.0 bit rate

In [38]:
Z_train_hat, Y_train_hat, Z_test_hat, Y_test_hat = get_train_test_hat(compressor, datamodule)



Predicting: 0it [00:00, ?it/s]

In [28]:
clf_lossless = CLF()
clf_lossless.fit(Z_train, Y_train)
accuracy = clf_lossless.score(Z_test, Y_test)
print(accuracy)

0.72622


In [75]:
H_YlM(Z_test_hat, Y_test_hat)

Test information 100.0%


## Different Data
At the end of the day what we want is to have a pretrained compressor that can be used for any downstream data. So no only do you want generalization of the entropy model and representation learning to test data but also to different domains. It's not obvious whether this is possible.We know empricially that CLIP will be able to generalizat well, but that might not be the case for the entropy model.

In [28]:
Datasets = dict(
    CIFAR100=CIFAR100, CIFAR10=CIFAR10, ImagenetDataset=ImagenetDataset, STL10=STL10Dataset)
features = get_featurized_data(Datasets, model, device=device, is_half=True)

Done featurizing CIFAR100
Done featurizing CIFAR10
Done featurizing ImagenetDataset
Done featurizing STL10


### CIFAR10

In [37]:
data = "CIFAR10"
Z_train, Y_train, Z_test, Y_test = get_train_test(data)

In [64]:
!du -sh data/cifar-10-batches-py/test_batch

30M	data/cifar-10-batches-py/test_batch


In [65]:
print(f"{30 * 1e6 * 8 / 1e6:.1f} Mega Bits")

240.0 Mega Bits


In [66]:
get_lossless_size(Z_test)

75.8 Mega Bits


In [67]:
clf_lossless = CLF()
clf_lossless.fit(Z_train, Y_train)
accuracy = clf_lossless.score(Z_test, Y_test)
print(accuracy)

0.9498


In [61]:
H_YlM(Z_test, Y_test)

Test information 100.0%


In [63]:
datamodule = get_datamodule(Z_train, Y_train, Z_test, Y_test)

In [64]:
out = trainer.test(model=compressor, test_dataloaders=[datamodule.test_dataloader()])
n_test = len(Z_test)
n_per_test = out[0]["test/pred/rate"]
print(f"{ n_test* n_per_test / 1e6 :.1f} Mega Bits")

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'distortion': 7.375333786010742,
 'rate': 2558.691162109375,
 'test/pred/H_ZlX': 0.0,
 'test/pred/H_q_S': 0.0,
 'test/pred/H_q_Z': 2558.691162109375,
 'test/pred/H_q_ZlS': 2558.691162109375,
 'test/pred/distortion': 7.375333786010742,
 'test/pred/loss': 22.847688674926758,
 'test/pred/rate': 2558.691162109375}
--------------------------------------------------------------------------------
25.6 Mega Bits


You see that the performance is not too bad (considering that for a compressor trained from that datadistribution you were getting 15.3 Mega Bits)

In [67]:
Z_train_hat, Y_train_hat, Z_test_hat, Y_test_hat = get_train_test_hat(compressor, datamodule)



Predicting: 0it [00:00, ?it/s]

In [68]:
clf_lossless = CLF()
clf_lossless.fit(Z_train_hat, Y_train_hat)
accuracy = clf_lossless.score(Z_test_hat, Y_test_hat)
print(accuracy)

0.9489


In [69]:
H_YlM(Z_test_hat, Y_test_hat)

Test information 100.0%


### CIFAR100

In [58]:
data = "CIFAR100"
Z_train, Y_train, Z_test, Y_test = get_train_test(data)

In [59]:
!du -sh data/cifar-100-python/test

30M	data/cifar-100-python/test


In [60]:
print(f"{30 * 1e6 * 8 / 1e6:.1f} Mega Bits")

240.0 Mega Bits


In [61]:
get_lossless_size(Z_test)

75.9 Mega Bits


In [62]:
clf_lossless = CLF()
clf_lossless.fit(Z_train, Y_train)
accuracy = clf_lossless.score(Z_test, Y_test)
print(accuracy)

0.7969


In [80]:
H_YlM(Z_test, Y_test)

Test information 100.0%


In [81]:
datamodule = get_datamodule(Z_train, Y_train, Z_test, Y_test)

In [82]:
out = trainer.test(model=compressor, test_dataloaders=[datamodule.test_dataloader()])
n_test = len(Z_test)
n_per_test = out[0]["test/pred/rate"]
print(f"{ n_test* n_per_test / 1e6 :.1f} Mega Bits")

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'distortion': 7.372977256774902,
 'rate': 2551.51513671875,
 'test/pred/H_ZlX': 0.0,
 'test/pred/H_q_S': 0.0,
 'test/pred/H_q_Z': 2551.51513671875,
 'test/pred/H_q_ZlS': 2551.51513671875,
 'test/pred/distortion': 7.372977256774902,
 'test/pred/loss': 22.796316146850586,
 'test/pred/rate': 2551.51513671875}
--------------------------------------------------------------------------------
25.5 Mega Bits


In [83]:
Z_train_hat, Y_train_hat, Z_test_hat, Y_test_hat = get_train_test_hat(compressor, datamodule)



Predicting: 0it [00:00, ?it/s]

In [85]:
clf_lossless = CLF()
clf_lossless.fit(Z_train_hat, Y_train_hat)
accuracy = clf_lossless.score(Z_test_hat, Y_test_hat)
print(accuracy)

0.7966


In [86]:
H_YlM(Z_test_hat, Y_test_hat)

Test information 100.0%


### STL10

In [43]:
data = "STL10"
Z_train, Y_train, Z_test, Y_test = get_train_test(data)

In [69]:
!du -sh data/stl10_binary/test_X.bin

211M	data/stl10_binary/test_X.bin


In [44]:
print(f"{211 * 1e6 * 8 / 1e6:.1f} Mega Bits")

1688.0 Mega Bits


In [71]:
get_lossless_size(Z_test)

60.7 Mega Bits


In [72]:
clf_lossless = CLF()
clf_lossless.fit(Z_train, Y_train)
accuracy = clf_lossless.score(Z_test, Y_test)
print(accuracy)

0.985875


In [73]:
H_YlM(Z_test, Y_test)

Test information 100.0%


In [74]:
datamodule = get_datamodule(Z_train, Y_train, Z_test, Y_test)

In [75]:
out = trainer.test(model=compressor, test_dataloaders=[datamodule.test_dataloader()])
n_test = len(Z_test)
n_per_test = out[0]["test/pred/rate"]
print(f"{ n_test* n_per_test / 1e6 :.1f} Mega Bits")

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'distortion': 7.375964164733887,
 'rate': 2542.785400390625,
 'test/pred/H_ZlX': 0.0,
 'test/pred/H_q_S': 0.0,
 'test/pred/H_q_Z': 2542.785400390625,
 'test/pred/H_q_ZlS': 2542.785400390625,
 'test/pred/distortion': 7.375964164733887,
 'test/pred/loss': 22.73787498474121,
 'test/pred/rate': 2542.785400390625}
--------------------------------------------------------------------------------
20.3 Mega Bits


In [76]:
Z_train_hat, Y_train_hat, Z_test_hat, Y_test_hat = get_train_test_hat(compressor, datamodule)



Predicting: 0it [00:00, ?it/s]

In [77]:
clf_lossless = CLF()
clf_lossless.fit(Z_train_hat, Y_train_hat)
accuracy = clf_lossless.score(Z_test_hat, Y_test_hat)
print(accuracy)

0.986


In [78]:
H_YlM(Z_test_hat, Y_test_hat)

Test information 100.0%
