# **Homework 9: Variational Autoencoders**

## **About**

### **Due**

Monday 4/29/19, 11:59 PM CST

### **Goal**

This homework focuses on creating variational autoencoders applied to the MNIST dataset.

## Dev Environment

In [None]:
!pip install kaggle-cli
!pip install wget

In [None]:
import torch
import torchvision
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
%matplotlib inline
# import wget
import zipfile
import os

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
os.environ['KAGGLE_USERNAME']="skooch"
os.environ['KAGGLE_KEY']="c5356997e70d50a333ef244bd276e51d"

# upload checkpoint to GCS
project_id = 'mammography-198911'
bucket_name = 'pneumonia'

!gcloud config set project {project_id}

In [None]:
!gsutil cp gs://{bucket_name}/model_20.pt ./model_2.pt 

In [None]:
if not os.path.exists("celeba-dataset.zip"):
#   !kaggle datasets download -d jessicali9530/celeba-dataset
  !gsutil cp gs://{bucket_name}/celeba-dataset.zip ./celeba-dataset.zip
  zip_ref = zipfile.ZipFile('celeba-dataset.zip', 'r')
  zip_ref.extractall('data')
  zip_ref.close()

  zip_ref = zipfile.ZipFile('data/img_align_celeba.zip', 'r')
  zip_ref.extractall('data/images')
  zip_ref.close()

In [None]:
if not os.path.exists("Training_Pictures.zip"):
  !wget https://s3.eu-west-3.amazonaws.com/deep.skoo.ch/Training_Pictures.zip
  zip_ref = zipfile.ZipFile('Training_Pictures.zip', 'r')
  zip_ref.extractall('data/images')
  zip_ref.close()
  
# if not os.path.exists("GWB_200x200_JPEG.zip"):
#   !wget https://s3.eu-west-3.amazonaws.com/deep.skoo.ch/GWB_200x200_JPEG.zip
#   zip_ref = zipfile.ZipFile('GWB_200x200_JPEG.zip', 'r')
#   zip_ref.extractall('data/images')
#   zip_ref.close()  

In [None]:
if not os.path.exists("wiki_images2.zip"):
  !gsutil cp gs://{bucket_name}/wiki_images2.zip ./wiki_images.zip
  zip_ref = zipfile.ZipFile('wiki_images.zip', 'r')
  zip_ref.extractall('data/images')
  zip_ref.close()
  
if not os.path.exists("imdb_images3.zip"):
  !gsutil cp gs://{bucket_name}/imdb_images3.zip ./imdb_images3.zip
  zip_ref = zipfile.ZipFile('imdb_images3.zip', 'r')
  zip_ref.extractall('data/images')
  zip_ref.close()    

In [None]:
BATCH_SIZE = 64
data_path = "data/images"

transform = torchvision.transforms.Compose(
    [torchvision.transforms.RandomHorizontalFlip(p=0.5), 
      torchvision.transforms.RandomApply([
        torchvision.transforms.RandomAffine(degrees=5, translate=(0.05,0.05), scale=(0.9,1.1), shear=2, resample=False, fillcolor=0),        
      ], 0.4),
      torchvision.transforms.RandomResizedCrop((192,160), scale=(0.90, 1.1)),
      torchvision.transforms.ToTensor(),
    ])

train_dataset = torchvision.datasets.ImageFolder(
        root=data_path,
        transform=transform
    )

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=2,
    shuffle=True
)

In [None]:
for (images, _) in train_loader:
  plt.imshow(images[0].permute(1,2,0))
  plt.show()
  break

In [None]:
train_dataset

In [None]:
def output_size(i, k=3, p=2, s=1, d=1):
    o = (i + 2*p - k - (k-1)*(d-1))/s + 1
    return o
  
output_size(i=64, k=3, p=1)

### Bigger Model

In [None]:
epoch_list = []

In [None]:
def vae_loss(x, x_hat, mu=None, logvar=None):
    MSE = nn.functional.mse_loss(x, x_hat)
    
    if mu is not None and logvar is not None:
      KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
      KLD /= BATCH_SIZE * 784
      MSE = MSE + KLD
      
    return MSE
  
def train(model, train_loader, optimizer, print_metrics=100, num_epochs=10, display_images=5, scheduler=None, save_path="model_2.pt"):
    model.train()
    try:
      for epoch in range(num_epochs):
          upload_path = save_path.split(".")[0] + str(len(epoch_list) % 2) + ".pt" 
          
          tr_loss = 0.0
          batch_losses = []
          for i, (inputs, _) in enumerate(train_loader):
              if use_cuda and torch.cuda.is_available():
                  inputs = inputs.cuda()

              optimizer.zero_grad()

              recon, code, logvar = model(inputs)
              if model.variational:
                  loss = vae_loss(inputs, recon, code, logvar=logvar)
              else:  
                  loss = vae_loss(inputs, recon, code, logvar=None)

              loss.backward()
              optimizer.step()

              tr_loss += loss.item()
              batch_losses.append(loss.item())
              if print_metrics and i % print_metrics == 0:
                  print("Batch:", i, "loss:", np.mean(batch_losses))
                  batch_losses = []

              if i % 1000 == 0 and i > 0:
                print("\tEpoch", len(epoch_list), "batch", i, "loss:", loss.item())

              if i % 4000 == 0 and i > 0:
                fig, ax = plt.subplots(1, 2, figsize=(6,6))
                ax[0].imshow(inputs[0].cpu().permute(1, 2, 0) )
                ax[1].imshow(recon[0].cpu().detach().permute(1, 2, 0) )
                plt.show()

          print("Epoch:", len(epoch_list), "Loss:", tr_loss)
          
          epoch_list.append(epoch)
          
          if scheduler is not None and epoch % 5 == 0 and epoch > 1:
              scheduler.step()

          if epoch % display_images == 0:
              # plot a few random images
              try:
                  fig, ax = plt.subplots(1, 2, figsize=(6,6))
                  ax[0].imshow(inputs[0].cpu().permute(1, 2, 0) )
                  ax[1].imshow(recon[0].cpu().detach().permute(1, 2, 0) )
                  plt.show()
              except Exception as e:
                  print(e)

              torch.save(model.state_dict(), save_path)
#               !gsutil cp ./model.pt gs://{bucket_name}/
                
    except KeyboardInterrupt:
      print("Interrupting... Saving model...")
      torch.save(model.state_dict(), save_path)
      !gsutil cp {save_path} gs://{bucket_name}/{upload_path}
      return 
    
    !gsutil cp {save_path} gs://{bucket_name}/{upload_path}
    
def count_params(model):
  model_parameters = filter(lambda p: p.requires_grad, model.parameters())
  params = sum([np.prod(p.size()) for p in model_parameters])
  return params                

In [None]:
from torch import nn
class Encoder(nn.Module):
    def __init__(self, latent_dim, variational=False):
        super(Encoder, self).__init__()
        # block 1
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 32, 3, padding=1)        
        self.conv3 = nn.Conv2d(32, 32, 3, padding=1)
        self.downsize1 = nn.Conv2d(96, 64, 1)
        
        # block 2
        self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
        self.conv5 = nn.Conv2d(64, 64, 3, padding=1)
        self.conv6 = nn.Conv2d(64, 64, 3, padding=1)
        self.downsize2 = nn.Conv2d(192, 128, 1)
        
        # block 3
        self.conv7 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv8 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv9 = nn.Conv2d(128, 128, 3, padding=1)
        self.downsize3 = nn.Conv2d(384, 256, 1)
        
        # block 4
        self.conv10 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv11 = nn.Conv2d(256, 256, 3, padding=1)
        self.downsize4 = nn.Conv2d(512, 384, 1)
        
        # block 5
        self.conv12 = nn.Conv2d(384, 384, 3, padding=1)
        self.conv13 = nn.Conv2d(384, 384, 3, padding=1)
        self.downsize5 = nn.Conv2d(768, 512, 1)
        
        self.fc1 = nn.Conv2d(512, latent_dim, (6,5)) # code layer

        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(2,2)
        self.variational = variational
        
    def encode(self, x):
        # input 178x178x3 - output 80x80x32
        h1 = self.relu(self.conv1(x))
        h2 = self.relu(self.conv2(h1))
        h3 = self.relu(self.conv3(h2))
        ds1 = self.relu(self.downsize1(torch.cat((h1,h2,h3), 1)))
        mp1 = self.max_pool(ds1)
        
        # input 89x89x32 - output 64x40x40
        h4 = self.conv4(mp1)
        h5 = self.relu(self.conv5(self.relu(h4)))
        h6 = self.relu(self.conv6(h5))
        ds2 = self.relu(self.downsize2(torch.cat((h4,h5,h6), 1)))
        mp2 = self.max_pool(ds2)
        
        # input 54x44x64 - output 128x20x20
        h7 = self.conv7(mp2)
        h8 = self.relu(self.conv8(self.relu(h7)))
        h9 = self.relu(self.conv9(h8))
        ds3 = self.relu(self.downsize3(torch.cat((h7,h8,h9), 1)))
        mp3 = self.max_pool(ds3)
        
        # input 128x22x22 - output 256x10x10
        h10 = self.relu(self.conv10(mp3))
        h11 = self.relu(self.conv11(h10))
        ds4 = self.relu(self.downsize4(torch.cat((h10,h11), 1)))
        mp4 = self.max_pool(ds4)
        
        # input 256x11x11 - output 384x5x5
        h12 = self.relu(self.conv12(mp4))
        h13 = self.relu(self.conv13(h12))
        ds5 = self.relu(self.downsize5(torch.cat((h12,h13), 1)))
        mp5 = self.max_pool(ds5)
        
        # input 4x4x64 - output latent_dimx1
        code = self.fc1(mp5)
        logvar = None
  
        return code, logvar
  
    def forward(self, x):
        code, logvar = self.encode(x)
        return code, logvar

class Decoder(nn.Module):
    def __init__(self, latent_dim):
        super(Decoder, self).__init__()
        self.fc1 = nn.ConvTranspose2d(latent_dim, 384, (6,5), stride=1)
        self.deconv1 = nn.ConvTranspose2d(384, 256, 2, stride=2, output_padding=0)
        self.deconv3 = nn.ConvTranspose2d(256, 224, 2, stride=2, output_padding=0)
        self.deconv4 = nn.ConvTranspose2d(224, 192, 2, stride=2, output_padding=0)
        self.deconv5 = nn.ConvTranspose2d(192, 156, 2, stride=2, output_padding=0)
        self.deconv6 = nn.ConvTranspose2d(156, 128, 2, stride=2, output_padding=0)
#         self.deconv7 = nn.ConvTranspose2d(128, 64, 2, stride=1)
        self.deconv8 = nn.Conv2d(128, 3, 1, stride=1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
  
    def decode(self, x):
        # input latent_dimx1 - output 2048x1
        h0 = self.relu(self.fc1(x))
        
        # in 5x5 - out 10x10
        h1 = self.relu(self.deconv1(h0))
        
        # in 10x10 - out 20x20
        h3 = self.relu(self.deconv3(h1))
        h4 = self.relu(self.deconv4(h3))
        h5 = self.relu(self.deconv5(h4))
        h6 = self.relu(self.deconv6(h5))
#         h7 = self.relu(self.deconv7(h6, output_size=(160,160)))
        h8 = self.deconv8(h6)
      
        return self.sigmoid(h8)
    
    def forward(self, x):
        return self.decode(x)
    
# a model that uses the Encoder and Decoder
class SingleModel(nn.Module):
    def __init__(self, encoder, decoder, variational=False):
        super(SingleModel, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.variational = variational

    def sample(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        eps = torch.autograd.Variable(std.data.new(std.size()).normal_())
        return eps.mul(std).add_(mu)

    def forward(self, X):
        mu, logvar = encoder(X)

        if self.training and logvar is not None:
            z = self.sample(mu, logvar)
        else:
            z = mu
            
        recon = self.decoder(z)
        
        return recon, mu, logvar

In [None]:
from torch import optim
import numpy as np

  ## YOUR CODE HERE ##
encoder = Encoder(4096, variational=False)
decoder = Decoder(4096)
model = SingleModel(encoder, decoder)
params = model.parameters()

print("Total Params:", count_params(model))

# run on GPU
use_cuda = False
  
if use_cuda and torch.cuda.is_available():
    encoder.cuda()
    decoder.cuda()
    model.cuda()
    
optimizer = optim.Adam(params, lr=0.0004)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)

try:
  model.load_state_dict(torch.load("model_2.pt"))
  print("Model loaded")
except:
  print("Error loading model")
  pass

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=3, display_images=1, scheduler=scheduler, save_path="model2.pt")

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
  train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
  train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
train(model, train_loader, optimizer, print_metrics=0, num_epochs=2, display_images=1, scheduler=scheduler)

In [None]:
  train(model, train_loader, optimizer, print_metrics=0, num_epochs=8, display_images=1, scheduler=scheduler)

In [None]:
  train(model, train_loader, optimizer, print_metrics=0, num_epochs=5, display_images=1, scheduler=scheduler)

In [None]:
noise = np.random.normal(loc=0.3, scale=7.5, size=4096).reshape((1,4096,1,1))
image = model.decoder(torch.from_numpy(noise).cuda().float())
plt.imshow(image[0].cpu().detach().permute(1,2,0))
plt.show()

In [None]:
# # download and unzip the data
# url = "https://s3.eu-west-3.amazonaws.com/deep.skoo.ch/GWB_64x64.zip"
# if not os.path.exists("gwb_images.zip"):
#   wget.download(url, 'gwb_images.zip')

#   zip_ref = zipfile.ZipFile('gwb_images.zip', 'r')
#   zip_ref.extractall('data/gwb')
#   zip_ref.close()

In [None]:
# use the existing encoder to create a GWBush decoder
from torch import optim
import numpy as np

  ## YOUR CODE HERE ##
encoder = model.encoder
gwb_decoder = Decoder(2048)
gwb_model = SingleModel(encoder, gwb_decoder)
gwb_params = gwb_model.decoder.parameters()

# run on GPU
use_cuda = True

if use_cuda and torch.cuda.is_available():
    encoder.cuda()
    gwb_decoder.cuda()
    gwb_model.cuda()
    
optimizer = optim.Adam(gwb_params, lr=0.002)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)    

In [None]:
# gwb dataset
BATCH_SIZE = 64
data_path = "data/gwb"

transform = torchvision.transforms.Compose(
    [torchvision.transforms.RandomHorizontalFlip(p=0.5), 
      torchvision.transforms.RandomChoice([
        torchvision.transforms.RandomAffine(degrees=7, translate=(0.05,0.05), scale=(0.9,1.1), shear=3, resample=False, fillcolor=0),
        torchvision.transforms.RandomResizedCrop((192,160), scale=(0.90, 1.1)),
      ]),
    torchvision.transforms.ToTensor(),
    ])

gwb_train_dataset = torchvision.datasets.ImageFolder(
        root=data_path,
        transform=transform
    )

gwb_train_loader2 = torch.utils.data.DataLoader(
    gwb_train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)

In [None]:
for (images, _) in gwb_train_loader2:
  plt.imshow(images[0].permute(1,2,0))
  break

In [None]:
train(gwb_model, gwb_train_loader2, optimizer, print_metrics=0, num_epochs=200, display_images=20, scheduler=scheduler, save_path="./gwb_model.pt")