In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
import zlib
import numpy as np
from compressai.entropy_models import EntropyBottleneck
from compressai.layers import GDN
from compressai.models import CompressionModel
from compressai.models.utils import conv, deconv
from datasets import load_dataset, Dataset
import PIL.Image as Image

In [2]:
def pil_to_pt(img):
    t = transforms.functional.pil_to_tensor(img)
    t = t.to(torch.float)
    t = t/255
    t = t-0.5
    t = t.unsqueeze(0)
    return t
def pt_to_pil(t):
    t = t+0.5
    t = t*255
    t = torch.clamp(t, min=-0.49, max=255.49)
    t = t.round()
    t = t.to(torch.uint8)
    return t

In [3]:
class Network(CompressionModel):
    def __init__(self, N=128):
        super().__init__()
        self.entropy_bottleneck = EntropyBottleneck(N)
        self.encode = nn.Sequential(
            conv(3, N),
            GDN(N),
            conv(N, N),
            GDN(N),
            conv(N, N),
        )

        self.decode = nn.Sequential(
            deconv(N, N),
            GDN(N, inverse=True),
            deconv(N, N),
            GDN(N, inverse=True),
            deconv(N, 3),
        )

    def forward(self, x):
        y = self.encode(x)
        y_hat, y_likelihoods = self.entropy_bottleneck(y)
        x_hat = self.decode(y_hat)
        return x_hat, y_likelihoods


In [4]:
def lossy_analysis_transform(img):
    x = img.to("cuda")
    z = net.encode(x).round().to(torch.int8).detach().to("cpu").numpy()
    return z
    
def lossless_entropy_encode(z):
    original_shape = z.shape
    compressed_img = zlib.compress(z.tobytes(), level=9)
    return compressed_img, original_shape

def prep_dataset(sample):
    img = sample['image']
    sample['width'] = img.width
    sample['height'] = img.height

    if (img.mode == 'L') | (img.mode == 'CMYK') | (img.mode == 'RGBA'):
        rgbimg = Image.new("RGB", img.size)
        rgbimg.paste(img)
        img = rgbimg

    t = transforms.functional.pil_to_tensor(img)
    t = t.to(torch.float)
    t = t/255
    t = t-0.5
    
    sample['img_tensor'] = t
    return sample

In [5]:
net = Network()
net = net.to("cuda")
checkpoint = torch.load("checkpoint.pth")
net.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [6]:
%%time
dataset = load_dataset("imagenet-1k",split='train[0:12000]')
dataset = dataset.map(prep_dataset)
dataset = dataset.remove_columns('image')
dataset = dataset.with_format("torch")

CPU times: user 391 ms, sys: 0 ns, total: 391 ms
Wall time: 1.23 s


In [7]:
width = dataset['width'];
height = dataset['height'];
unique_pairs = torch.unique(torch.stack([width, height], dim=1), dim=0)
pair_counts = {(w.item(), h.item()): ((width == w) & (height == h)).sum().item() 
               for w, h in unique_pairs}
sizes = sorted(pair_counts.items(), key=lambda x: x[1], reverse=True)

# only keep aspect ratios that have at least 1000 examples 
N = 0;
while (sizes[N][1]>=1000):
    N +=1
sizes = sizes[:N]

In [8]:
%%time
batch_size = 64
compressed_batch = []
label = []
latent_size = []
for size, count in sizes:
    w = size[0]; h = size[1]
    filtered = dataset.filter(lambda x: x['width']==w and x['height']==h)
    for i_batch in range(len(filtered)//batch_size):
        ind = range(i_batch * batch_size, (i_batch + 1) * batch_size)
        batch_img = filtered[ind]['img_tensor']
        z = lossy_analysis_transform(batch_img)
        compressed = [lossless_entropy_encode(z[i])[0] for i in range(batch_size)]
        batch_label = filtered[ind]['label']
        compressed_batch.append(compressed)
        label.append(batch_label)
        latent_size.append(z.shape)

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12000 [00:00<?, ? examples/s]

CPU times: user 4h 27min 32s, sys: 32.9 s, total: 4h 28min 5s
Wall time: 14min 11s


In [9]:
%%time
new_dataset = Dataset.from_dict({
    "compressed_batch" : compressed_batch,
    "label" : label,
    "latent_size" : latent_size})

CPU times: user 112 ms, sys: 28 ms, total: 140 ms
Wall time: 34.4 ms


In [11]:
new_dataset.push_to_hub("danjacobellis/imagenet_RDAE_batched_dry",split='train')

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
# dataset = load_dataset("imagenet-1k",split='test')
# dataset = dataset.map(compress_dataset)
# dataset = dataset.remove_columns('image');
# dataset.push_to_hub("danjacobellis/imagenet_RDAE_dry",split='test')

In [None]:
# dataset = load_dataset("imagenet-1k",split='validation')
# dataset = dataset.map(compress_dataset)
# dataset = dataset.remove_columns('image');
# dataset.push_to_hub("danjacobellis/imagenet_RDAE_dry",split='validation')