<a href="https://colab.research.google.com/github/XRater/DUL_2019/blob/hw4/DUL_HW4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pickle
import torch
import torch.distributions as D
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import plotly.graph_objs as go

In [0]:
def sample_data_1():
  count = 100000
  rand = np.random.RandomState(0)
  return [[1.0, 2.0]] + rand.randn(count, 2) * [[5.0, 1.0]]
def sample_data_2():
  count = 100000
  rand = np.random.RandomState(0)
  return [[1.0, 2.0]] + (rand.randn(count, 2) * [[5.0, 1.0]]).dot([[np.sqrt(2) / 2, np.sqrt(2) / 2], [-np.sqrt(2) / 2, np.sqrt(2) / 2]])

In [0]:
def show(data, size):
  xs, ys = data[:size, 0], data[:size, 1]
  data = go.Scatter(x=xs, y=ys, mode='markers')
  fig = go.Figure(data=data, layout=go.Layout(width=700, height=700))
  fig.update_xaxes(range=[-20, 20])
  fig.update_yaxes(range=[-20, 20])
  fig.show()

In [4]:
data_1 = sample_data_1()
data_1_train, data_1_test = train_test_split(data_1)
show(data_1, 5000)

In [5]:
data_2 = sample_data_2()
data_2_train, data_2_test = train_test_split(data_2)
show(data_2, 5000)

In [0]:
class MultiLinear(nn.Module):

  def __init__(self, input_size, mid_size, output_size):
    super(MultiLinear, self).__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.model = nn.Sequential(
        nn.Linear(input_size, mid_size),
        nn.ReLU(),
        nn.Linear(mid_size, output_size)
    );

  def forward(self, X):
    return self.model(X)

In [0]:
class VaeEncoder(nn.Module):

  def __init__(self, model):
    super(VaeEncoder, self).__init__()
    self.input_size = model.input_size
    self.hidden_size = model.output_size
    assert(self.hidden_size % 2 == 0)

    self.model = model

  def forward(self, X):
    return self.model(X)

In [0]:
class VaeDecoder(nn.Module):

  def __init__(self, model):
    super(VaeDecoder, self).__init__()
    self.hidden_size = model.input_size
    self.output_size = model.output_size

    self.model = model

  def forward(self, X):
    return self.model(X)

In [0]:
class VaeEncoderDecoder(nn.Module):

  def __init__(self, encoder, decoder):
    super(VaeEncoderDecoder, self).__init__()
    self.encoder = encoder
    self.decoder = decoder
    self.mse_loss = nn.MSELoss()

  def forward(self, X):
    X = X.float()
    N = X.shape[0]
    mus, sigmas = torch.chunk(self.encoder(X), 2, dim=1)
    encoded = mus + torch.sqrt(sigmas.exp()) * torch.randn(sigmas.shape)
    decoded = self.decoder(encoded)

    kl_loss = (sigmas.exp() + mus * mus - sigmas).mean()
    mse_loss = self.mse_loss(decoded, X).mean()
    self.loss = kl_loss + mse_loss

    return decoded

  def sample(self, N):
    encoded = torch.randn(N, self.decoder.hidden_size)
    return self.decoder(encoded)


In [0]:
def train(model, optimizer, train_loader, test_loader, num_epochs):
  for epoch in range(num_epochs):
    loss, val_loss = 0, 0
    for batch in train_loader:
      optimizer.zero_grad()
      data = model(batch)
      curr_loss = model.loss
      loss += curr_loss.item()
      curr_loss.backward()
      optimizer.step()

    with torch.no_grad():
      for batch in test_loader:
        data = model(batch)
        val_loss += model.loss

    print(f"After epoch {epoch} loss is {loss / len(train_loader)} and validation loss is {val_loss / len(test_loader)}")

In [0]:
def run_realVAE_model_train(train_data, test_data):
  batch_size = 512
  num_epochs = 30

  encoder = VaeEncoder(MultiLinear(2, 32, 20))
  decoder = VaeDecoder(MultiLinear(10, 32, 2))
  model = VaeEncoderDecoder(encoder, decoder)
  optimizer = torch.optim.Adam(model.parameters())
  train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
  test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)
  train(model, optimizer, train_loader, test_loader, num_epochs)

  return model

In [12]:
VAEmodel_data_1 = run_realVAE_model_train(data_1_train, data_1_test)
VAEmodel_data_2 = run_realVAE_model_train(data_2_train, data_2_test)

After epoch 0 loss is 8.0448905221459 and validation loss is 3.7010765075683594
After epoch 1 loss is 3.2504509046775145 and validation loss is 3.001568555831909
After epoch 2 loss is 2.9102962487408903 and validation loss is 2.824968099594116
After epoch 3 loss is 2.771878772852372 and validation loss is 2.72102952003479
After epoch 4 loss is 2.689499585806918 and validation loss is 2.657827854156494
After epoch 5 loss is 2.609154853691049 and validation loss is 2.5716958045959473
After epoch 6 loss is 2.5133448798640243 and validation loss is 2.4398698806762695
After epoch 7 loss is 2.3747486159915017 and validation loss is 2.296755790710449
After epoch 8 loss is 2.2434545049861985 and validation loss is 2.1896345615386963
After epoch 9 loss is 2.1460952904759623 and validation loss is 2.1050331592559814
After epoch 10 loss is 2.0850519874468953 and validation loss is 2.058985471725464
After epoch 11 loss is 2.050270726891602 and validation loss is 2.038001298904419
After epoch 12 lo

In [0]:
def showSampleData(model, size):
  samples = model.sample(size)
  samples = samples.detach().numpy()
  show(samples, size)

In [14]:
showSampleData(VAEmodel_data_1, 1000)

In [15]:
showSampleData(VAEmodel_data_2, 1000)