# Convolutional autoencoder in PyTorch

**[Faisal Z. Qureshi](http://vclab.science.uoit.ca)**  

Check out excellent PyTorch tutorials by "SherlockLiao" at [https://github.com/L1aoXingyu/pytorch-beginner](https://github.com/L1aoXingyu/pytorch-beginner)

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.autograd import Variable

import torchvision
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

See if have cuda support

In [2]:
for i in range(torch.cuda.device_count()):
    print("Found device:", torch.cuda.get_device_name(i))

if torch.cuda.device_count() == 0:
    print("No GPU device found")
else:
    print("Current cuda device is", torch.cuda.get_device_name(torch.cuda.current_device()))

No GPU device found


In [3]:
class cudafy:
    
    def __init__(self, device=None):
        if torch.cuda.is_available() and device:
            self.device = device
        else:
            self.device = 0
    
    def name(self):
        if torch.cuda.is_available():
            return torch.cuda.get_device_name(self.device)
        return 'Cuda is not available.'
    
    def put(self, x):
        """Put x on the default cuda device."""
        if torch.cuda.is_available():
            return x.to(device=self.device)
        return x

    def __call__(self, x):
        return self.put(x)
    
    def get(self,x):
        """Get from cpu."""
        if x.is_cuda:
            return x.to(device='cpu')
        return x
    
def cpu(x):
    if x.is_cuda:
        return x.to(device='cpu')
    return x

Set up dataset

In [4]:
batch_size = 16

my_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = MNIST('../datasets', transform=my_transforms, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [5]:
def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x

Lets make an autoencoder

In [6]:
class autoencoder(nn.Module):
    
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 5, 5
            nn.Conv2d(16, 8, 3, stride=2, padding=1),  # b, 8, 3, 3
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=1)  # b, 8, 2, 2
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 3, stride=2),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

Now we train

In [7]:
gpu = cudafy()
model = gpu(autoencoder())

learning_rate = 1e-2
weight_decay = 1e-5
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [8]:
num_epochs = 1

for epoch in range(num_epochs):
    
    for data in dataloader:        
        img, _ = data # img is a [batch_size, num_channels, 28, 28] tensor
                      # here num_channels is 1
        img = gpu(img)
        
        output = model(img) # Forward
        loss = criterion(output, img)

        optimizer.zero_grad() # Backward & update weights
        loss.backward()
        optimizer.step()
        
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch + 1, num_epochs, loss.data.item()))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        print('Saving image-conv_{}.png')
        save_image(pic, 'image-conv_{}.png'.format(epoch))

epoch [1/1], loss:0.1291
Saved file


## Saving the trained model

Now that training is done, it is a good idea to save the trained model.

We are interested in state_dict dictionary that contains parameters associated with each layer in the model.  Optimizer too has a state_dict.

In [None]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, " -- " , model.state_dict()[param_tensor].size())

In [None]:
model.state_dict

In [None]:
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name)
    #print(var_name, "\t", optimizer.state_dict()[var_name])

In [None]:
#torch.save(model.state_dict(), 'conv-ae-weights.pt')

## Inference

Now lets pass an image through the learned model and see what we get

In [None]:
image, label = dataset[1]
print(image.shape, label)

import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(image[0])

In [None]:
ni = (image - 0.5)/0.5
print(ni.shape)
ni.unsqueeze_(0)
print(ni.shape)

In [None]:
oi = model(gpu(ni))
oi_ = to_img(oi.cpu().data)
plt.imshow(oi_[0,0,:,:].detach())

## Loading the saved model

We can easily load the model as follows

In [None]:
model2 = gpu(autoencoder())

The following should produce garbage, since we haven't trained the model yet

In [None]:
oi = model2(gpu(ni))
oi_ = to_img(oi)
plt.imshow(oi_[0,0,:,:].detach())

In [None]:
for i, (x, l) in enumerate(dataloader):
    print(i)
    print(x)
    print(l)
    break

Now lets load the model and see what happens

In [None]:
model2.load_state_dict(torch.load('conv-ae-weights.pt'))

image, label = dataset[1]
ni = image/0.5 - 1.0
ni.unsqueeze_(0)

oi = model2(gpu(ni))
oi_ = to_img(oi)
plt.imshow(oi_[0,0,:,:].detach())

Notice that when you load the model all is well in the world of autoencoders

## Using part of the trained network
​
Now lets assume we are interested in the encoder bit only.  I.e., we want to pass an MNIST image and wants to get its 3-dimensional encoding.  We can do it as follows.

In [None]:
class my_encoder(nn.Module):
    def __init__(self):
        super(my_encoder, self).__init__()
        
        self.features = nn.Sequential(*list(model2.encoder.children())[:])
    
    def forward(self, x):
        x = self.features(x)
        return x

In [None]:
encoder = my_encoder()
encoding = encoder(ni)
print(encoding.shape)