In [None]:
!wget https://huggingface.co/danjacobellis/walloc/resolve/main/RGB_243c_J4_nf8_v1.0.2.pth

In [None]:
!wget https://huggingface.co/danjacobellis/walloc/resolve/main/RGB_243c_J4_nf8_v1.0.2.json

In [1]:
import io
import torch
import PIL
from walloc import walloc
from datasets import load_dataset, Image
from torchvision.transforms.v2 import ToPILImage, PILToTensor, CenterCrop, Resize, Compose

In [2]:
device = "cuda:3"
codec_config = walloc.load_config("RGB_243c_J4_nf8_v1.0.2.json")
checkpoint = torch.load("RGB_243c_J4_nf8_v1.0.2.pth",map_location="cpu",weights_only=False)
codec = walloc.Codec2D(
    channels = codec_config.channels,
    J = codec_config.J,
    Ne = codec_config.Ne,
    Nd = codec_config.Nd,
    latent_dim = codec_config.latent_dim,
    latent_bits = codec_config.latent_bits,
    lightweight_encode = codec_config.lightweight_encode
)
codec.load_state_dict(checkpoint['model_state_dict'])
codec = codec.to(device)
codec.eval();

In [3]:
inet1k = load_dataset('timm/imagenet-1k-wds')

Resolving data files:   0%|          | 0/1024 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/64 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/292 [00:00<?, ?it/s]

In [4]:
resize = Resize(288, interpolation=PIL.Image.Resampling.LANCZOS)

In [5]:
def compress(sample):
    img = resize(sample['jpg'].convert("RGB"))
    x = PILToTensor()(img).to(torch.float)
    x = (x/255 - 0.5).unsqueeze(0).to(device)
    H, W = x.size(2), x.size(3)
    x_padded = walloc.pad(x,p=16)
    X = codec.wavelet_analysis(x_padded,codec.J)
    Y = codec.encoder(X)
    webp = walloc.latent_to_pil(Y.to("cpu"),codec.latent_bits, 3)[0]
    buff = io.BytesIO()
    webp.save(buff, format='WEBP', lossless=True)
    webp_bytes = buff.getbuffer()

    return {
        'image': webp_bytes,
    }

In [None]:
for split in ['validation','train']:
    compressed = inet1k[split].map(compress)
    compressed = compressed.remove_columns(['__key__', '__url__', 'jpg'])
    compressed = compressed.cast_column('image',Image())
    compressed.push_to_hub("danjacobellis/inet1k_288_walloc",split=split)

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Uploading the dataset shards:   0%|          | 0/7 [00:00<?, ?it/s]

Map:   0%|          | 0/7143 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/72 [00:00<?, ?ba/s]

Map:   0%|          | 0/7143 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/72 [00:00<?, ?ba/s]

Map:   0%|          | 0/7143 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/72 [00:00<?, ?ba/s]

Map:   0%|          | 0/7143 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/72 [00:00<?, ?ba/s]

Map:   0%|          | 0/7143 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/72 [00:00<?, ?ba/s]

Map:   0%|          | 0/7143 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/72 [00:00<?, ?ba/s]

Map:   0%|          | 0/7142 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/72 [00:00<?, ?ba/s]

Map:   0%|          | 0/1281167 [00:00<?, ? examples/s]

