In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import numpy as np
import pylab

X, _ = make_moons(n_samples=5000, random_state=42, noise=0.1)

In [None]:
pylab.scatter(X[:,0], X[:,1])
pylab.show()

In [None]:
# Import the torch functions
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable


In [None]:
# Define the z (the random input to the GAN)
def z(n):
    return torch.Tensor(np.random.normal(0, 1, (n,2)))  # Gaussian

In [None]:
# Generator
g_input_size = 2     # Dimension of random noise given to generator
g_hidden_size = 50   # Number of hidden units of the generator complexity
g_output_size = 2    # size of generated output vector

class Generator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Generator, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.elu(self.layer1(x)) # ELU activations are like ReLU activations but without zero gradients for ELU(x), x < 0.
        x = F.sigmoid(self.layer2(x))
        return self.layer3(x)

In [None]:
# Generator creation
G = Generator(input_size=g_input_size, hidden_size=g_hidden_size, output_size=g_output_size)

g_learning_rate = 2e-4
optim_betas = (0.9, 0.999)

g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=optim_betas)


In [None]:
# Discriminator (classifier) creation
# Define the mini-batch sizes
minibatch_size = 100

d_input_size = 2     # Input to the discriminator (will determine 'real' or 'fake')
d_hidden_size = 50   # Number of hidden units of the discriminator
d_output_size = 1    # Output of discriminator for 'real' vs. 'fake' classes


class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Discriminator, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.elu(self.layer1(x))
        x = F.elu(self.layer2(x))
        return F.sigmoid(self.layer3(x))

In [None]:
D = Discriminator(input_size=d_input_size, hidden_size=d_hidden_size, output_size=d_output_size)

# Negative log-likelihood loss (aka, cross-entropy loss) for binary classes (fake, real)
loss = nn.BCELoss() 

d_learning_rate = 2e-4
optim_betas = (0.9, 0.999) # paramters specific to Adam optimnization (not part of this lecture)

d_optimizer = optim.Adam(D.parameters(), lr=d_learning_rate, betas=optim_betas)

In [None]:
D.zero_grad()

#  Train D on real data
d_real_data = Variable(torch.FloatTensor(X[0:minibatch_size,:]))
d_real_decision = D(d_real_data)

print(d_real_decision[0:3,:])

In [None]:
d_real_error = loss(d_real_decision, Variable(torch.Tensor([[1]]*100)))  # ones = true data
d_real_error.backward() # store gradients, but don't change params yet because we are not done with the examples

In [None]:
d_gen_input = Variable(z(minibatch_size)) # Input to the Generator
d_fake_data = G(d_gen_input).detach()  # Generate outputs and detach to avoid training the Generator on these labels... 
                                       # ...just get the examples
d_fake_decision = D(d_fake_data)  # Perform a foward pass using the fake data
print(d_fake_decision[0:3,:]) # print the decitions

In [None]:
d_fake_error = loss(d_fake_decision, Variable(torch.Tensor([[0]]*100)))  # zeros = fake
d_fake_error.backward() # this will append the gradients of these examples to the 
                          # previous gradients (computed over the real data)

In [None]:
d_optimizer.step()     # Only optimizes Discriminator's parameters; 
                      #   the updates are based on the stored gradients from backward()

In [None]:
G.zero_grad() # must zero the Generator gradient

d_gen_input = Variable(z(minibatch_size))
d_fake_data = G(d_gen_input).detach()  # detach to avoid training G on these labels

In [None]:
dg_fake_decision = D(d_fake_data)

# we want to fool the classifier, so pretend it's all real data and we will train G to 
#   minimize this loss (maximize the likelihood of getting the fake data to have labels "1" (true data))
g_error = loss(dg_fake_decision, Variable(torch.Tensor([[1]]*100)))  

g_error.backward() # Get Generator gradients

g_optimizer.step()  # Only optimizes G's parameters

In [None]:
num_epochs = 200

d_steps = 20 # number of gradient steps of discriminator per minibatch
g_steps = 1 # number of gradient steps of generator per minibatch

print_interval = 10
batch_no = 0

for epoch in range(num_epochs):
    for d_index in range(d_steps):
        # 1. Train Discriminator on real+fake

        D.zero_grad()

        #  Train D on real data
        d_real_data = Variable(torch.FloatTensor(X[(batch_no*minibatch_size):((batch_no+1)*minibatch_size),:]))
        d_real_decision = D(d_real_data)
        d_real_error = loss(d_real_decision, Variable(torch.Tensor([[1]]*100)))  # ones = true data
        d_real_error.backward() # store gradients, but don't change params yet
        
        batch_no += 1
        if batch_no >= 5000/minibatch_size:
            batch_no = 0
        
        d_gen_input = Variable(z(minibatch_size))
        d_fake_data = G(d_gen_input).detach()  # Generate outputs and detach to avoid training the Generator on these labels
        d_fake_decision = D(d_fake_data)
        d_fake_error = loss(d_fake_decision, Variable(torch.Tensor([[0]]*100)))  # zeros = fake data
        d_fake_error.backward()
        d_optimizer.step()     # Only optimizes D based on stored gradients from both backwards

    for g_index in range(g_steps):
        # 2. Train Generator on Discriminator's response (but WE WILL NOT train Discriminator on these labels)
        G.zero_grad()

        d_gen_input = Variable(z(minibatch_size))
        d_fake_data = G(d_gen_input)  # we will be training G on these labels
        dg_fake_decision = D(d_fake_data)
        g_error = loss(dg_fake_decision, Variable(torch.Tensor([[1]]*100)))  # we want to fool, so pretend it's all genuine
        g_error.backward()
        g_optimizer.step()  # Only optimizes G's parameters

    if epoch % print_interval == 0:
        print("%s" % (epoch))

In [None]:
noise = z(5000).numpy()

pylab.scatter(noise[:,0], noise[:,1])
pylab.show()

In [None]:
d_gen_input = Variable(z(5000))
d_fake_data = G(d_gen_input).detach().data.numpy()

pylab.scatter(X[:,0], X[:,1],c="blue")
pylab.scatter(d_fake_data[:,0], d_fake_data[:,1],c="red")
pylab.show()