In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals

#!pip install tensorflow-gpu==2.0.0-beta1

"""
Source: https://github.com/matiRLC/Keras-GAN/blob/master/gan/gan.py
Adapted by: matias@u.nus.edu
Updates:
    Try CGAN
    supervised discriminator
    Wasserstein GAN
"""

# Tensorflow
import tensorflow as tf
from tensorflow.keras import layers
from IPython import display 
print("Tensorflow version {}".format(tf.__version__))

# Progress bar
from tqdm import tqdm

# Numpy, pandas, matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import PIL

# Sklearn
from sklearn import preprocessing
from sklearn.manifold import TSNE

class DCGAN():
    def __init__(self, n_features, data_dim=28, n_hidden=150, n_layers=2, lr= 0.0001, display=False):
        self.display = display
        self.seed = 13 # change if necessary
        
        self.lr = lr
        self.n_features = n_features # number of features in the dataset
        self.data_dim = data_dim # dimension of the square image
        self.latent_dim = 100
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        
        # optimizers
        self.generator_optimizer = tf.keras.optimizers.Adam(lr)
        self.discriminator_optimizer = tf.keras.optimizers.Adam(lr)

        # Build the generator
        self.generator = self.build_generator()
        
        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        
        # helper function to computer cross entropy loss
        self.cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        
        # loss arrays
        self.loss_g = []
        self.loss_d = []
        
    def build_generator(self):
        model = tf.keras.Sequential()
        model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(self.latent_dim,)))
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU())

        model.add(layers.Reshape((7, 7, 256)))
        assert model.output_shape == (None, 7, 7, 256) # Note: None is the batch size

        model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
        assert model.output_shape == (None, 7, 7, 128)
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU())

        model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
        assert model.output_shape == (None, 14, 14, 64)
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU())

        model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
        assert model.output_shape == (None, self.data_dim, self.data_dim, 1)

        print("Generator Summary:")
        model.summary()
        
        return model

    def build_discriminator(self):
        model = tf.keras.Sequential()
        model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
                                         input_shape=[self.data_dim, self.data_dim, 1]))
        model.add(layers.LeakyReLU())
        model.add(layers.Dropout(0.3))

        model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
        model.add(layers.LeakyReLU())
        model.add(layers.Dropout(0.3))

        model.add(layers.Flatten())
        model.add(layers.Dense(1))

        print("Discriminator Summary:")
        model.summary()

        return model
    
    def discriminator_loss(self, real_output, fake_output):
        real_loss = self.cross_entropy(tf.ones_like(real_output), real_output)
        fake_loss = self.cross_entropy(tf.zeros_like(fake_output), fake_output)
        total_loss = real_loss + fake_loss
        return total_loss
    
    # TODO: add correlation loss, of mean and std
    def generator_loss(self, fake_output):
        return self.cross_entropy(tf.ones_like(fake_output), fake_output)
    
    # This annotation causes the function to be "compiled" and therefore run as graph
    @tf.function 
    def train_step(self, data, BATCH_SIZE):
        noise = tf.random.uniform([BATCH_SIZE, self.latent_dim], -1, 1)
#         noise = tf.random.normal([BATCH_SIZE, self.latent_dim])

        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            # TODO: pre-train the generator before adversarial training
            generated_data = self.generator(noise, training=True)

            real_output = self.discriminator(data, training=True)
            fake_output = self.discriminator(generated_data, training=True)

            gen_loss = self.generator_loss(fake_output)
            disc_loss = self.discriminator_loss(real_output, fake_output)
        
        gradients_of_generator = gen_tape.gradient(gen_loss, 
                                                   self.generator.trainable_variables)
        gradients_of_discriminator = disc_tape.gradient(disc_loss, 
                                                        self.discriminator.trainable_variables)

        self.generator_optimizer.apply_gradients(zip(gradients_of_generator, 
                                                self.generator.trainable_variables))
        self.discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, 
                                                    self.discriminator.trainable_variables))
        return gen_loss, disc_loss
        
    def train(self, dataset, EPOCHS, BATCH_SIZE=128, SAMPLE_INTERVAL=15):
        origin_dataset = dataset.copy()
        # transform dataset into images of data
        dataset = self.prepare_dataset(dataset, BATCH_SIZE) # TODO: UPDATE THIS AS DATASET
        
        # progress bar
        pbar = tqdm(total=EPOCHS)
        
        for epoch in range(EPOCHS):
            for data_batch in dataset:
                gen_loss, disc_loss = self.train_step(data_batch, BATCH_SIZE)
            
            # Save the model every SAMPLE_INTERVAL epochs
            if epoch % SAMPLE_INTERVAL == 0:
                self.generate_data(origin_dataset, epoch, BATCH_SIZE)
                display.clear_output(wait=True)
             #   checkpoint.save(file_prefix = checkpoint_prefix)
            
            # update losses
            self.loss_g.append(gen_loss.numpy())
            self.loss_d.append(disc_loss.numpy())
#             print("D Loss: {0:.2f}, G Loss: {0:.2f}".format(self.loss_d[-1], self.loss_g[-1]))   

            # progress bar
            pbar.update(1)
            
        # Generate after the final epoch
        display.clear_output(wait=True)
        self.generate_data(origin_dataset, epoch, BATCH_SIZE)
    
        pbar.close()
    
    def generate_data(self, origin_dataset, epoch=1, BATCH_SIZE=128):
        # TODO: change for uniform -1,1
        noise = tf.random.uniform([BATCH_SIZE * 5, self.latent_dim], -1, 1)
#         noise = tf.random.normal([BATCH_SIZE * 5, self.latent_dim])        
        generated_x = self.generator(noise, training=False).numpy()

        if self.display:
            fig = plt.figure()
            plt.hist(generated_x.numpy(), bins=40, density=True, histtype='bar')
            plt.title("testing:" + str(epoch))
            plt.show()
            fig.savefig("../output/GANtest/{}.png".format(epoch))
        
        generated_x = generated_x.reshape(generated_x.shape[0], generated_x.shape[1] * generated_x.shape[2])

        # Selecting the correct number of atributes (used in training)
        generated_x = generated_x[:, : self.n_features]

        print("Synth Data shape= " + str(generated_x.shape))

        min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
        min_max_scaler.fit(origin_dataset)
        generated_x = min_max_scaler.inverse_transform(generated_x)
        
        generated_x = pd.DataFrame(generated_x, columns=origin_dataset.columns.values)
        generated_x['Gender'] = round(generated_x['Gender'])
        generated_x['Discrete Thermal Comfort_TA'] = round(generated_x['Discrete Thermal Comfort_TA'])
        
        return generated_x
    
    def get_losses(self):
        """
        Return loses
        """
        return self.loss_g , self.loss_d
    
    def padding_duplicating(self, data, row_size):
        arr_data = np.array(data.values.tolist())

        col_num = arr_data.shape[1]

        npad = ((0, 0), (0, row_size - col_num))

        # padding with zero
        arr_data = np.pad(arr_data, pad_width=npad, mode='constant', constant_values=0.)

        # duplicating Values 
        for i in range(1, arr_data.shape[1] // col_num):
            arr_data[:, col_num * i: col_num * (i + 1)] = arr_data[:, 0: col_num]

        return arr_data
    
    def reshape(self, data, dim):
        data = data.reshape(data.shape[0], dim, -1)

        return data
    
    def prepare_dataset(self, dataframe, BATCH_SIZE=128):
        """
        Transform dataframe to images
        """
        BUFFER_SIZE = dataframe.shape[0] * 2
        # rescale everything between [-1,1]
        min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
        # normalizing dat
        X = pd.DataFrame(min_max_scaler.fit_transform(dataframe))
        # padding to match square image size
        padded_ar = self.padding_duplicating(X, self.data_dim * self.data_dim)
        # reshape dataset
        X = self.reshape(padded_ar, self.data_dim)
        print("Final Real Data shape = " + str(X.shape))
        # final shape for image        
        X = X.reshape(X.shape[0], self.data_dim, self.data_dim, 1).astype('float32')
        print("Final Real Data shape = " + str(X.shape))

        # shuffle and create batches
        X = tf.data.Dataset.from_tensor_slices(X).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
        
        return X

Tensorflow version 2.0.0-beta1
