In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import time
import os
import torchvision.utils as vutils

In [2]:
matplotlib_is_available = True
try:
  from matplotlib import pyplot as plt
except ImportError:
  print("Will skip plotting; matplotlib is not available.")
  matplotlib_is_available = False

In [3]:
# Data params
data_mean = 0
data_stddev = 5

In [4]:
# ### Uncomment only one of these to define what data is actually sent to the Discriminator
#(name, preprocess, d_input_func) = ("Raw data", lambda data: data, lambda x: x)
#(name, preprocess, d_input_func) = ("Data and variances", lambda data: decorate_with_diffs(data, 2.0), lambda x: x * 2)
#(name, preprocess, d_input_func) = ("Data and diffs", lambda data: decorate_with_diffs(data, 1.0), lambda x: x * 2)
(name, preprocess, d_input_func) = ("Only 4 moments", lambda data: get_moments(data), lambda x: 4)
print("Using data [%s]" % (name))

Using data [Only 4 moments]


In [5]:
def get_distribution_sampler(mu, sigma):
    return lambda n: torch.Tensor(np.random.normal(mu, sigma, (1, n)))  # Gaussian

In [6]:
def get_generator_input_sampler():
    return lambda m, n: torch.rand(m, n)  # Uniform-dist data into generator, _NOT_ Gaussian



In [7]:
class Generator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, f):
        super(Generator, self).__init__()
        self.map1 = nn.Linear(input_size, hidden_size)
        self.map2 = nn.Linear(hidden_size, hidden_size)
        self.map3 = nn.Linear(hidden_size, output_size)
        self.f = f

    def forward(self, x):
        x = self.map1(x)
        x = self.f(x)
        x = self.map2(x)
        x = self.f(x)
        x = self.map3(x)
        return x

In [8]:
class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, f):
        super(Discriminator, self).__init__()
        self.map1 = nn.Linear(input_size, hidden_size)
        self.map2 = nn.Linear(hidden_size, hidden_size)
        self.map3 = nn.Linear(hidden_size, output_size)
        self.f = f

    def forward(self, x):
        x = self.f(self.map1(x))
        x = self.f(self.map2(x))
        #return self.f(self.map3(x))
        return (self.map3(x))

In [9]:
def extract(v):
    return v.data.storage().tolist()


In [10]:
def stats(d):
    return [np.mean(d), np.std(d)]

In [11]:
def get_moments(d):
    # Return the first 4 moments of the data provided
    mean = torch.mean(d)
    diffs = d - mean
    var = torch.mean(torch.pow(diffs, 2.0))
    std = torch.pow(var, 0.5)
    zscores = diffs / std
    skews = torch.mean(torch.pow(zscores, 3.0))
    kurtoses = torch.mean(torch.pow(zscores, 4.0)) - 3.0  # excess kurtosis, should be 0 for Gaussian
    final = torch.cat((mean.reshape(1,), std.reshape(1,), skews.reshape(1,), kurtoses.reshape(1,)))
    return final


In [12]:
def decorate_with_diffs(data, exponent, remove_raw_data=False):
    mean = torch.mean(data.data, 1, keepdim=True)
    mean_broadcast = torch.mul(torch.ones(data.size()), mean.tolist()[0][0])
    diffs = torch.pow(data - Variable(mean_broadcast), exponent)
    if remove_raw_data:
        return torch.cat([diffs], 1)
    else:
        return torch.cat([data, diffs], 1)
    

In [13]:

# Model parameters

num_runs = 1

#job_name = "./saves/baseline/starter_gan_default"


# Model parameters
g_input_size = 1      # Random noise dimension coming into generator, per output vector
g_hidden_size = 5     # Generator complexity
g_output_size = 1     # Size of generated output vector
d_input_size = 500    # Minibatch size - cardinality of distributions
d_hidden_size = 10    # Discriminator complexity
d_output_size = 1     # Single dimension for 'real' vs. 'fake' classification
minibatch_size = d_input_size

d_learning_rate = 1e-3
g_learning_rate = 1e-3
sgd_momentum = 0.9
beta1 = 0.5

num_epochs = 5000
print_interval = 100
save_interval = 10
d_steps = 20
g_steps = 20

dfe, dre, ge = 0, 0, 0
d_real_data, d_fake_data, g_fake_data = None, None, None

discriminator_activation_function = torch.sigmoid
generator_activation_function = torch.tanh



In [14]:
def plot_distribution(job_name, data_name, data):
    #print("Plotting the ", data_name, "...")
    #print(" Values: %s" % (str(gen_data)))
    fig = plt.hist(data, bins=50)
    plt.xlabel('Value')
    plt.ylabel('Count')
    plt.title('Histogram of ' + data_name + ' Distribution')
    plt.grid(True)
    plt.savefig(job_name + data_name + '.png')
    #plt.show()
    
   
    plt.clf()

In [15]:
def train(job_name):
    #print(job_name)
    d_sampler = get_distribution_sampler(data_mean, data_stddev)
    gi_sampler = get_generator_input_sampler()
    G = Generator(input_size=g_input_size,
                  hidden_size=g_hidden_size,
                  output_size=g_output_size,
                  f=generator_activation_function)
    D = Discriminator(input_size=d_input_func(d_input_size),
                      hidden_size=d_hidden_size,
                      output_size=d_output_size,
                      f=discriminator_activation_function)
    criterion = nn.BCELoss()  # Binary cross entropy: http://pytorch.org/docs/nn.html#bceloss
    #d_optimizer = optim.SGD(D.parameters(), lr=d_learning_rate, momentum=sgd_momentum)
    #g_optimizer = optim.SGD(G.parameters(), lr=g_learning_rate, momentum=sgd_momentum)
    d_optimizer = optim.Adam(D.parameters(), lr=d_learning_rate, betas=(beta1, 0.999))
    g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=(beta1, 0.999))

    use_gpu = torch.cuda.is_available()
    #print("use_gpu: ", use_gpu)
    if use_gpu:
        G = G.cuda()
        D = D.cuda()
    start_time = time.time()
    if not os.path.exists(job_name + '_info_over_epoch.txt'):
        with open(job_name + '_info_over_epoch.txt', 'a'): pass
    open_file = open(job_name + '_info_over_epoch.txt', 'r+')
    open_file.seek(0)
    open_file.truncate()
    open_file.close()
    
    if not os.path.exists(job_name + '_results.txt'):
        with open(job_name + '_results.txt', 'a'): pass
    open_file = open(job_name + '_results.txt', 'r+')
    open_file.seek(0)
    open_file.truncate()
    open_file.close()
    
    avg_dre = 0
    avg_dfe = 0
    avg_ge = 0
    for epoch in range(num_epochs):
        
        for d_index in range(d_steps):
            # 1. Train D on real+fake
            D.zero_grad()

            #  1A: Train D on real
            d_real_data = Variable(d_sampler(d_input_size))
            
            if use_gpu:
                d_real_data = d_real_data.cuda()
            
            d_real_decision = D(preprocess(d_real_data))

            labels = Variable(torch.ones([1]))
            
            if use_gpu:
                labels = labels.cuda()
            d_real_error = criterion(d_real_decision, labels)   # ones = true
            #Wassterstein loss
            #d_real_error = -torch.mean(d_real_decision)
            d_real_error.backward() # compute/store gradients, but don't change params

            #  1B: Train D on fake
            d_gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
            if use_gpu:
                d_gen_input = d_gen_input.cuda()
            d_fake_data = G(d_gen_input).detach()  # detach to avoid training G on these labels
            d_fake_decision = D(preprocess(d_fake_data.t()))
            
            labels = Variable(torch.zeros([1]))

                              
            if use_gpu:
                labels = labels.cuda()
            d_fake_error = criterion(d_fake_decision, labels)  # zeros = fake
            #Wassterstein loss
            #d_fake_error = torch.mean(d_fake_decision)
            d_fake_error.backward()
            d_optimizer.step()     # Only optimizes D's parameters; changes based on stored gradients from backward()

            dre, dfe = extract(d_real_error)[0], extract(d_fake_error)[0]
            avg_dre += dre
            avg_dfe += dfe
            
            #Wasserstein weight clipping 
            #for p in D.parameters():
                #p.data.clamp_(-0.01, 0.01)
            
            

        for g_index in range(g_steps):
            # 2. Train G on D's response (but DO NOT train D on these labels)
            G.zero_grad()

            gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
            if use_gpu:
                gen_input = gen_input.cuda()
                              
            g_fake_data = G(gen_input)
            dg_fake_decision = D(preprocess(g_fake_data.t()))
                              
            labels = Variable(torch.ones([1]))
            if use_gpu:
                labels = labels.cuda()
            g_error = criterion(dg_fake_decision, labels)  # Train G to pretend it's genuine
            #Wassterstein loss
            #g_error = -torch.mean(dg_fake_decision)
            g_error.backward()
            g_optimizer.step()  # Only optimizes G's parameters
            ge = extract(g_error)[0]
            avg_ge += ge
        
       

        '''if epoch % print_interval == 0:
            print("Epoch %s: D (%s real_err, %s fake_err) G (%s err); Real Dist (%s),  Fake Dist (%s) " %
                  (epoch, dre, dfe, ge, stats(extract(d_real_data)), stats(extract(d_fake_data))))'''
        if epoch % save_interval == 0:
            with open(job_name + '_info_over_epoch.txt', 'a') as f:
                f.write("{} {} {} {}\n".format(epoch, avg_dre/(d_steps*save_interval), avg_dfe/(d_steps*save_interval), avg_ge/(g_steps*save_interval)))
            avg_dre = 0
            avg_dfe = 0
            avg_ge = 0

    #Plot real and generated figures
    if matplotlib_is_available:
        plot_distribution(job_name, 'Real', d_real_data.cpu())
        plot_distribution(job_name, 'Generated', extract(g_fake_data))
        
    #Calculate and save KL_DIV
    input_data = g_fake_data.detach().cpu()
    target_data = d_real_data.view(500, 1).cpu()
    min_elem = abs(min(torch.min(input_data), torch.min(target_data))) + 1
    print(min_elem)
    #min_tensor = Variable(torch.Tensor([min_elem])).expand(input_data.size())
    #print(min_tensor)
    #print(min_tensor.size())

    kl_loss = torch.nn.functional.kl_div(torch.log(input_data + min_elem), target_data + min_elem)
    gen_mean = extract(torch.mean(input_data, dim = 0))
    gen_std = extract(torch.std(input_data, dim = 0))
    real_mean = extract(torch.mean(target_data, dim = 0))
    real_std = extract(torch.std(target_data, dim = 0))
    
    
    with open(job_name + '_results.txt', 'a') as f:
                f.write("""Generated Mean: {} \nGenerated StdDev: {} \nReal Mean: {} \nReal StdDev: {}\nKL Loss: {} \n""".format(gen_mean, gen_std, real_mean, real_std, kl_loss))
                f.close()
        
    torch.save({
                'model_state_dict': G.state_dict(),
                'optimizer_state_dict': g_optimizer.state_dict()
                }, job_name + '_generator_model')
    
    print("Total run time: ", time.time() - start_time)
    
    return gen_mean[0], gen_std[0], real_mean[0], real_std[0], kl_loss
    
    
    
        
        

In [16]:
def test_or_gen(start_train, model_name=None):
    if start_train:
        
        train()
    else:
        checkpoint=None
        if os.path.isfile(model_name + '_generator_model'):
            print("fetching model")
            checkpoint = torch.load(model_name + '_generator_model')
            G.load_state_dict(checkpoint['model_state_dict'])
            g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=(beta1, 0.999))
            g_optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        gen_input = Variable(gi_sampler(minibatch_size, g_input_size)).cuda()
        g_fake_data = G(gen_input)
        print("Plotting the generated distribution...")
        gen_data = extract(g_fake_data)
        #print(" Values: %s" % (str(gen_data)))
        plt.hist(gen_data, bins=50)
        plt.xlabel('Value')
        plt.ylabel('Count')
        plt.title('Histogram of Generated Distribution')
        plt.grid(True)
        plt.show()


In [17]:
avg_gen_mean, avg_gen_std, avg_real_mean, avg_real_std, avg_kl_loss = 0, 0 ,0 ,0 ,0
path_name = "./saves/baseline_"
job_name = "/05_27_GAN"
start_time = time.time()
for i in range(num_runs):
    new_path = path_name + str(i)
    os.mkdir(new_path)
    curr_job = new_path+job_name
    gen_mean, gen_std, real_mean, real_std, kl_loss = train(curr_job)
    print(f'--------- Run #{i} --------------')
    print(f"Generated Mean: {gen_mean}\nGenerated StdDev: {gen_std}\nReal_Mean: {real_mean}\nReal StdDev: {real_std}\nKL_Loss: {kl_loss}")
    
    avg_gen_mean += gen_mean
    avg_gen_std += gen_std
    avg_real_mean += real_mean
    avg_real_std += real_std
    avg_kl_loss += kl_loss
avg_gen_mean /= num_runs
avg_gen_std /= num_runs
avg_real_mean /= num_runs
avg_real_std /= num_runs
avg_kl_loss /= num_runs

final_data = "./saves/baseline_final_results.txt"
if not os.path.exists(final_data):
    with open(final_data, 'a'): pass
open_file = open(final_data, 'r+')
open_file.seek(0)
open_file.truncate()
open_file.close()

with open(final_data, 'a') as f:
    f.write("""Generated Mean: {} \nGenerated StdDev: {} \nReal Mean: {} \nReal StdDev: {}\nKL Loss: {} \n""".format(avg_gen_mean, avg_gen_std, avg_real_mean, avg_real_std, avg_kl_loss))
    f.close()
print("Total run time: ", time.time() - start_time)


tensor(16.0278)
Total run time:  782.8399302959442
--------- Run #0 --------------
Generated Mean: -0.4199334681034088
Generated StdDev: 5.115114688873291
Real_Mean: -0.38280779123306274
Real StdDev: 4.983419418334961
KL_Loss: 1.9956449270248413




tensor(16.6932)
Total run time:  777.6854360103607
--------- Run #1 --------------
Generated Mean: -0.00025499059120193124
Generated StdDev: 4.975346565246582
Real_Mean: -0.25343257188796997
Real StdDev: 4.89206075668335
KL_Loss: 1.4943549633026123
tensor(14.1212)
Total run time:  789.465904712677
--------- Run #2 --------------
Generated Mean: -0.21424873173236847
Generated StdDev: 4.771478652954102
Real_Mean: -0.10172083973884583
Real StdDev: 4.889560699462891
KL_Loss: 1.8769780397415161
tensor(16.4311)
Total run time:  785.2699921131134
--------- Run #3 --------------
Generated Mean: 0.1752815842628479
Generated StdDev: 4.817841529846191
Real_Mean: -0.2107079029083252
Real StdDev: 4.924190998077393
KL_Loss: 1.1400326490402222
tensor(15.5653)
Total run time:  776.2132842540741
--------- Run #4 --------------
Generated Mean: -0.09924923628568649
Generated StdDev: 5.191056251525879
Real_Mean: 0.047237578779459
Real StdDev: 5.145570278167725
KL_Loss: 2.095823049545288
tensor(22.4660)
To

<Figure size 432x288 with 0 Axes>

In [18]:
'''checkpoint=None
if os.path.isfile(job_name + '_generator_model'):
    print("fetching model")
    checkpoint = torch.load(job_name + '_generator_model')
    G.load_state_dict(checkpoint['model_state_dict'])
    g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=(beta1, 0.999))
    g_optimizer.load_state_dict(checkpoint['optimizer_state_dict'])'''

'''G.eval().cpu()
with torch.no_grad():
    gen_input = Variable(gi_sampler(minibatch_size, g_input_size))

    g_fake_data = G(gen_input)'''


#plot_distribution('Generated', extract(g_fake_data))

d_sampler = get_distribution_sampler(data_mean, data_stddev)

d_real_data = Variable(d_sampler(d_input_size))
x = torch.min(d_real_data)
print(x)



tensor(-14.8918)


In [19]:
test_sampler = get_distribution_sampler(100, 1)
input_1 = Variable(test_sampler(d_input_size))
input_2 = Variable(test_sampler(d_input_size))

kl_loss = torch.nn.functional.kl_div(torch.log(input_1), input_2)

print(kl_loss)


tensor(-0.0695)
