<a href="https://colab.research.google.com/github/abel-bernabeu/autoencoder/blob/master/compressor_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Description

This notebook is an implementation of the enconder/decoder deep learning architecture from "Lossy image compression with compression autoencoders", by 
Lucas Theis, Wenzhe Shi, Andrew Cunningham & Ferenc Husz, published in 2017
(https://arxiv.org/pdf/1703.00395v1.pdf).

# Setup

In [None]:
# Get the dataset if needed
import os.path
if not os.path.isdir('./data'):
    !rm  -rf image_dataset_part-a.zip
    !mkdir data -p
    !cd data ; wget https://www.dropbox.com/s/91rpg5dqkhhhkzu/image_dataset_part-a.zip
    !cd data ; unzip -q image_dataset_part-a.zip

# Get the latest source code if needed
if not os.path.isdir('./autoencoder'):
    !wget https://www.dropbox.com/s/jfu0ksttohnklkq/autoencoder-master.zip && \
    unzip -q autoencoder-master.zip && \
    mv autoencoder-master/autoencoder/ . && \
    rm autoencoder-master -rf

if not os.path.isdir('./share'):
    try:
        # Try to mount share from Google Drive when on Collab
        from google.colab import driveX
        drive.mount('/content/drive/')
        !ln -s  /content/drive/My\ Drive/archive/2020/aidl/ share
    except:
        # The fallback for when not in Collab is to download share from Dropbox
        !wget https://www.dropbox.com/s/76w9gsga8mz5ve4/share.tgz && tar xzf share.tgz

In [None]:
import torch
import torch.nn as nn
import autoencoder.models
import autoencoder.utils

# Sparsity at 1/2

Our baseline model effort focuses on training the proposed neural network with the maximum possible accuracy, but not investing any effort in quantization or entropic coding of the features.

This model only achieves a 50% compression when the input is in Float32 but it is used for three purposes:

- prove that the input can be reconstructed accurately with the kind of neural network that is proposed in the paper.

- set an upper bound on accuracy (**43 db**)

- give an estimation of how long it takes to train a state of the art compression model (**4 days on a Tesla P100**)


## Hyperparameters

In [None]:
hparams = {
    'batch_size' : 32,
    'lr' : 1e-3,
    'device' : 'cuda',
    'block_width' : 128,
    'block_height' : 128,
    'hidden_state_num_channels' : 96,
    'train_dataset_size' : 5000,
    'test_dataset_size' : 500,
    'num_epochs' : 12577,
    'num_workers' : 4,
    'name' : "twitter-compressor",
    'port' : 6100,
    'checkpointing_freq' : 10,
    'inference_freq' : 20,
}

!mkdir -p share/{hparams['name']}

## Model

In [None]:
class TwitterEncoder(torch.nn.Module):

    def __init__(self, hidden_state_num_channels = 96):
        super(TwitterEncoder, self).__init__()

        self.hidden_state_num_channels = hidden_state_num_channels

        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=5, stride=2, padding=2, padding_mode='replicate'),
            nn.BatchNorm2d(64),
            nn.LeakyReLU())
  
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block3 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block5 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block6 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block7 = nn.Sequential(
            nn.Conv2d(128, self.hidden_state_num_channels, kernel_size=5, stride=2, padding=2, padding_mode='replicate'))

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x) + x
        x = self.block4(x) + x
        x = self.block5(x) + x
        x = self.block6(x) + x
        x = self.block7(x)
        return x


class TwitterDecoder(torch.nn.Module):

    def __init__(self, hidden_state_num_channels = 96):
        super(TwitterDecoder, self).__init__()

        self.hidden_state_num_channels = hidden_state_num_channels

        self.block1 = nn.Sequential(
            nn.Conv2d(self.hidden_state_num_channels, 512*4, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.PixelShuffle(2),
            nn.BatchNorm2d(512),
            nn.LeakyReLU())

        self.block2 = nn.Sequential(
            nn.Conv2d(512, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block3 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),
            nn.LeakyReLU())

        self.block5 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.BatchNorm2d(128),                        
            nn.LeakyReLU())

        self.block6 = nn.Sequential(
            nn.Conv2d(128, 256*4, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.PixelShuffle(2),
            nn.BatchNorm2d(256),
            nn.LeakyReLU())

        self.block7 = nn.Sequential(
            nn.Conv2d(256, 3*4, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
            nn.PixelShuffle(2))

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x) 
        x = self.block3(x) + x
        x = self.block4(x) + x
        x = self.block5(x) + x
        x = self.block6(x)
        x = self.block7(x)
        return x


class TwitterCompressor(autoencoder.models.CompressionAutoencoder):

    def __init__(self):
        super(autoencoder.models.CompressionAutoencoder, self).__init__()
        self.encoder = TwitterEncoder(hidden_state_num_channels = hparams['hidden_state_num_channels'])
        self.decoder = TwitterDecoder(hidden_state_num_channels = hparams['hidden_state_num_channels'])


model = TwitterCompressor()

## Dataset

In [None]:
train_loader, test_loader, few_train_x, few_train_y, few_test_x, few_test_y = autoencoder.utils.create_dataloaders(hparams)

## TensorBoard

In [None]:
try:
    # When on Google Colab try to launch an embedded TensorBoard
    from google.colab import drive
    %load_ext tensorboard
    from tensorboard import notebook
    notebook.start('--logdir share/' + hparams['name'] + '/runs/ --port ' + str(hparams['port']))
except:
    pass

## Training

In [None]:
try:
  autoencoder.utils.train(hparams=hparams, \
        model=model, \
        train_loader=train_loader, \
        test_loader=test_loader, \
        few_train_x=few_train_x, few_train_y=few_train_y, \
        few_test_x=few_test_x, few_test_y=few_test_y)
except KeyboardInterrupt:
    print('Exiting from training early')

# Sparsity at 1/4

## Hyperparameters

In [None]:
hparams = {
    'batch_size' : 40,
    'lr' : 1e-6,
    'device' : 'cuda',
    'block_width' : 224,
    'block_height' : 224,
    'hidden_state_num_channels' : 48,
    'train_dataset_size' : 1000,
    'test_dataset_size' : 500,
    'num_epochs' : 16000,
    'num_workers' : 4,
    'name' : "sparse-twitter-compressor",
    'port' : 6200,
    'checkpointing_freq' : 10,
    'inference_freq' : 20,
}

!mkdir -p share/{hparams['name']}

## Model

In [None]:
class SparseTwitterCompressor(autoencoder.models.CompressionAutoencoder):

    def __init__(self, hidden_state_num_channels= hparams['hidden_state_num_channels']):
        super(SparseTwitterCompressor, self).__init__()
        self.encoder = TwitterEncoder(hidden_state_num_channels=hidden_state_num_channels)
        self.decoder = TwitterDecoder(hidden_state_num_channels=hidden_state_num_channels)

model = SparseTwitterCompressor()

## Dataset

In [None]:
train_loader, test_loader, few_train_x, few_train_y, few_test_x, few_test_y = autoencoder.utils.create_dataloaders(hparams)

## TensorBoard

In [None]:
try:
    # When on Google Colab try to launch an embedded TensorBoard
    from google.colab import drive
    %load_ext tensorboard
    from tensorboard import notebook
    notebook.start('--logdir share/' + hparams['name'] + '/runs/ --port ' + str(hparams['port']))
except:
    pass

## Training

In [None]:
try:
  autoencoder.utils.train(hparams=hparams, \
        model=model, \
        train_loader=train_loader, \
        test_loader=test_loader, \
        few_train_x=few_train_x, few_train_y=few_train_y, \
        few_test_x=few_test_x, few_test_y=few_test_y)
except KeyboardInterrupt:
    print('Exiting from training early')

# Uniform quantization to 3 bits

## Hyperparameters

In [None]:
hparams = {
    'batch_size' : 40,
    'lr' : 1e-6,
    'device' : 'cuda',
    'block_width' : 224,
    'block_height' : 224,
    'hidden_state_num_channels' : 48,
    'train_dataset_size' : 1000,
    'test_dataset_size' : 500,
    'num_epochs' : 2500,
    'num_workers' : 4,
    'name' : "uniform-quant-twitter-compressor",
    'port' : 6300,
    'checkpointing_freq' : 10,
    'inference_freq' : 20,
}

!mkdir -p  share/{hparams['name']}

## Model

In [None]:
class QuantizingCompressionAutoencoder(torch.nn.Module):

    def __init__(self, num_bits):
        super(QuantizingCompressionAutoencoder, self).__init__()
        self.encoder = None
        self.num_bits = num_bits
        self.quantize = autoencoder.models.Quantize()
        self.dequantize = autoencoder.models.Dequantize()
        self.decoder = None

    def forward(self, x):
        h = self.encoder(x)

        batch_dim_index = 0
        channels_dim_index = 1
        rows_dim_index = 2
        cols_dim_index = 3

        batch = x.size()[batch_dim_index]
        channels  = x.size()[channels_dim_index]
        height = x.size()[rows_dim_index]
        width  = x.size()[cols_dim_index]

        per_channel_num_bits = self.num_bits * torch.ones(batch, self.encoder.hidden_state_num_channels).to(x.device)
        hq, per_channel_min, per_channel_max, per_channel_num_bits = self.quantize(h, quantization_select = None, per_channel_num_bits = per_channel_num_bits)
        hd = self.dequantize(hq, per_channel_min, per_channel_max, per_channel_num_bits)

        y = self.decoder(hd)

        yp = torch.nn.functional.hardtanh(y)

        return (yp + 1) * 0.5

## Dataset

In [None]:
train_loader, test_loader, few_train_x, few_train_y, few_test_x, few_test_y = autoencoder.utils.create_dataloaders(hparams)

In [None]:
qmodel = QuantizingCompressionAutoencoder(num_bits=3)

# Transfer learning from the non-quantized model
qmodel.encoder = model.encoder
qmodel.decoder = model.decoder

# Freeze the encoder
for param in qmodel.encoder.parameters():
    param.requires_grad = False

## TensorBoard

In [None]:
try:
    # When on Google Colab try to launch an embedded TensorBoard
    from google.colab import drive
    %load_ext tensorboard
    from tensorboard import notebook
    notebook.start('--logdir share/' + hparams['name'] + '/runs/ --port ' + str(hparams['port']))
except:
    pass

## Training

In [None]:
try:
  autoencoder.utils.train(hparams=hparams, \
        model=qmodel, \
        train_loader=train_loader, \
        test_loader=test_loader, \
        few_train_x=few_train_x, few_train_y=few_train_y, \
        few_test_x=few_test_x, few_test_y=few_test_y)
except KeyboardInterrupt:
    print('Exiting from training early')

# Sparsity at 1/8

## Hyperparameters

In [None]:
hparams = {
    'batch_size' : 40,
    'lr' : 1e-6,
    'device' : 'cuda',
    'block_width' : 224,
    'block_height' : 224,
    'hidden_state_num_channels' : 24,
    'train_dataset_size' : 1000,
    'test_dataset_size' : 500,
    'num_epochs' : 16000,
    'num_workers' : 4,
    'name' : "sparse-2-twitter-compressor",
    'port' : 6400,
    'checkpointing_freq' : 10,
    'inference_freq' : 20,
}

!mkdir -p share/{hparams['name']}

## Model

In [None]:
model = SparseTwitterCompressor(hidden_state_num_channels= hparams['hidden_state_num_channels'])

## Dataset

In [None]:
train_loader, test_loader, few_train_x, few_train_y, few_test_x, few_test_y = autoencoder.utils.create_dataloaders(hparams)

## TensorBoard

In [None]:
try:
    # When on Google Colab try to launch an embedded TensorBoard
    from google.colab import drive
    %load_ext tensorboard
    from tensorboard import notebook
    notebook.start('--logdir share/' + hparams['name'] + '/runs/ --port ' + str(hparams['port']))
except:
    pass

## Training

In [None]:
try:
  autoencoder.utils.train(hparams=hparams, \
        model=model, \
        train_loader=train_loader, \
        test_loader=test_loader, \
        few_train_x=few_train_x, few_train_y=few_train_y, \
        few_test_x=few_test_x, few_test_y=few_test_y)
except KeyboardInterrupt:
    print('Exiting from training early')

# Uniform quantization to 6 bits

## Hyperparameters

In [None]:
hparams = {
    'batch_size' : 40,
    'lr' : 1e-6,
    'device' : 'cuda',
    'block_width' : 224,
    'block_height' : 224,
    'hidden_state_num_channels' : 24,
    'train_dataset_size' : 1000,
    'test_dataset_size' : 500,
    'num_epochs' : 20000,
    'num_workers' : 4,
    'name' : "uniform-quant-2-twitter-compressor",
    'port' : 6500,
    'checkpointing_freq' : 10,
    'inference_freq' : 20,
}

!mkdir -p share/{hparams['name']}

## Model

In [None]:
qmodel = QuantizingCompressionAutoencoder(num_bits=6)

# Transfer learning from the non-quantized model
qmodel.encoder = model.encoder
qmodel.decoder = model.decoder

# Freeze the encoder
for param in qmodel.encoder.parameters():
    param.requires_grad = False

## Dataset

In [None]:
train_loader, test_loader, few_train_x, few_train_y, few_test_x, few_test_y = autoencoder.utils.create_dataloaders(hparams)

## TensorBoard

In [None]:
try:
    # When on Google Colab try to launch an embedded TensorBoard
    from google.colab import drive
    %load_ext tensorboard
    from tensorboard import notebook
    notebook.start('--logdir share/' + hparams['name'] + '/runs/ --port ' + str(hparams['port']))
except:
    pass

## Training

In [None]:
try:
  autoencoder.utils.train(hparams=hparams, \
        model=qmodel, \
        train_loader=train_loader, \
        test_loader=test_loader, \
        few_train_x=few_train_x, few_train_y=few_train_y, \
        few_test_x=few_test_x, few_test_y=few_test_y)
except KeyboardInterrupt:
    print('Exiting from training early')