# CNN VAE for SRL for DIY Self driving car

In this notebook you will learn the CNN VAE(beta) model. The result model is used for state representation in reinforcement learning.

First collection training data. you can use notebooks\utility\data_collection.ipynb 

Collect images of the course while driving the car on the course. Collect 1k to 10k images. Adjust the number of data collected according to the size of the course. When running the course, run in the center of the course, the side of the side line, zigzag running, etc. During the trial during reinforcement learning, you do not know how to run on the course. Collect data so that the course can be represented in the event of an error.


## Installing TensorBoardX

In [None]:
!pip install tensorboardX

## Mount google drive

You upload zip file that contain training data. The zip file copy from googledrive. 
Set zip file name to DATASET_ZIP.

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

In [None]:
import os
DATASET_FILE = ''
DATASET_DIR = 'dataset'
DATASET_ZIP = os.path.join(DATASET_DIR, DATASET_FILE)

## Copy from google drive

Copy training data and unzip.

In [None]:
!rm -rf dataset_root
!cp '/content/drive/My Drive/$DATASET_ZIP' ./
!unzip -q $DATASET_FILE

!mkdir dataset_root
!mv $DATASET_DIR './dataset_root'

## Import module

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.utils import save_image
from IPython.display import Image
from IPython.core.display import Image, display

%load_ext autoreload
%autoreload 2


## Load GPU device

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Load dataset



In [None]:
bs = 64
dataset = datasets.ImageFolder(root='./dataset_root', transform=transforms.Compose([
    torchvision.transforms.Resize((120, 160)),
    torchvision.transforms.Lambda(lambda x: x.crop((0, 40, 160, 120))),
    transforms.ToTensor(),
]))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=True,  num_workers=2, pin_memory=True)
len(dataset.imgs), len(dataloader)

In [None]:
fixed_x, _ = next(iter(dataloader))
save_image(fixed_x, 'real_image.png')
Image('real_image.png')

## Define VAE Network 

In [None]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

class UnFlatten(nn.Module):
    def forward(self, input, size=256):
        return input.view(input.size(0), size, 3, 8)


class VAE(nn.Module):
    def __init__(self, image_channels=3, h_dim=6144, z_dim=32):
        super(VAE, self).__init__()
        self.z_dim = z_dim
        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 32, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2),
            nn.ReLU(),
            Flatten()
        )

        self.fc1 = nn.Linear(h_dim, z_dim)
        self.fc2 = nn.Linear(h_dim, z_dim)
        self.fc3 = nn.Linear(z_dim, h_dim)

        self.decoder = nn.Sequential(
            UnFlatten(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=5, stride=2),
            nn.ReLU(),
        )

        self.out1 = nn.Sequential(nn.ConvTranspose2d(32, image_channels, kernel_size=4, stride=2),
                                  nn.Sigmoid(),
                                  )
        self.out2 = nn.Sequential(nn.ConvTranspose2d(32, image_channels, kernel_size=4, stride=2),
                                  nn.Sigmoid(),
                                  )

    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        esp = torch.randn(*mu.size()).to(device)
        z = mu + std * esp
        return z

    def bottleneck(self, h):
        mu, logvar = self.fc1(h), self.fc2(h)#F.softplus(self.fc2(h))
        z = self.reparameterize(mu, logvar)
        return z, mu, logvar

    def encode(self, x):
        h = self.encoder(x)
        z, mu, logvar = self.bottleneck(h)
        return z, mu, logvar

    def decode(self, z):
        z = self.fc3(z)
        x = self.decoder(z)
        mu_y = self.out1(x)
        sigma_y = self.out2(x)
        return mu_y, sigma_y

    def forward(self, x):
        z, mu, logvar = self.encode(x)
        mu_y, sigma_y = self.decode(z)
        return mu_y, sigma_y, mu, logvar

    def loss_fn(self, image, mu_y, sigma_y, mean, logvar):
        m_vae_loss = (mu_y - image)**2 /sigma_y
        m_vae_loss = 0.5 * torch.sum(m_vae_loss)
        a_vae_loss = torch.log(2.0 * torch.pi * sigma_y)
        a_vae_loss = 0.5 * torch.sum(a_vae_loss)
        KL = -0.5 * torch.sum((1 + logvar - mean.pow(2) - logvar.exp()), dim=0)
        KL = torch.mean(KL)
        return torch.mean((KL*5) + (10*m_vae_loss) +  a_vae_loss)


## Prepare Training

Create VAE model and initialize optimizer.

In [None]:
from torchsummary import summary
VARIANTS_SIZE = 32
image_channels = fixed_x.size(1)
vae = VAE(image_channels=image_channels, z_dim=VARIANTS_SIZE ).to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
summary(vae, (3, 80, 160))

## Tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./runs

## Start training

In [0]:
from tensorboardX import SummaryWriter
import numpy as np
epochs = 100
writer = SummaryWriter()

vae.train()
for epoch in range(epochs):
    losses = []
    grid = None
    for idx, (images, _) in enumerate(dataloader):
        images = images.to(device, non_blocking=True)
        optimizer.zero_grad()
        mu_y, sigma_y, mu, logvar = vae(images)
        loss = vae.loss_fn(images, mu_y, sigma_y, mu, logvar)
        loss.backward()
        optimizer.step()
        losses.append(loss.cpu().detach().numpy())
        grid = torchvision.utils.make_grid(mu_y)
        grid_sigma = torchvision.utils.make_grid(sigma_y)
    writer.add_image('Image/reconst', grid, epoch)
    writer.add_image('Image/sigma', grid_sigma, epoch)
    writer.add_scalar('Loss/train',np.average(losses), epoch)
    print("EPOCH: {} loss: {}".format(epoch+1, np.average(losses)))

torch.save(vae.state_dict(), 'vae.torch', _use_new_zipfile_serialization=False)

## Visualize latent space
Visualizing latent space by TensorBoard.
You can visualize latent space with TensorBoard Projector view.
The latent spaces are auto labeled by K-means. If similar images stick together, we consider the quality of the latent space to be good.


In [None]:
from sklearn.cluster import KMeans
vae.eval()

latent_spaces = None
for idx,(images, _) in enumerate(dataloader):
    images = images.to(device)
    z, _, _ = vae.encode(images)
    z = z.detach().cpu().numpy()
    if latent_spaces is None:
      latent_spaces = z.copy()
    else:
      latent_spaces = np.append(latent_spaces, z, axis=0)
    if len(latent_spaces) > 5000:
        break

images, sigma_y = vae.decode(torch.Tensor(latent_spaces).to(device))
images = F.interpolate(images, size=(40, 40), mode='bilinear', align_corners=False)

kmeans_model = KMeans(n_clusters=5, verbose=0, n_init=10)
labels = kmeans_model.fit_predict(latent_spaces)

writer.add_embedding(mat=latent_spaces, metadata=labels, label_img=images)
writer.close()

## Re-launch TensorBoard
This cell do kill tensorboard process and re-launch TensorBoardX. When do not show projector tab, click reload button.

In [None]:
!kill $(ps | grep tensorboard | cut -f 1 -d '?')
%tensorboard --logdir ./runs

## Cleanup

Copy trained model file to GoogleDrive. 

In [None]:
!cp vae.torch '/content/drive/My Drive/vae.torch'