In [1]:
import tensorflow as tf

In [2]:
tf.__version__

'2.3.0'

In [3]:
import pandas as pd 
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers
from IPython import display

import pickle
import time
import os


In [4]:
#Reading the data - Features 
X = pd.read_csv("data/Adult/Adult.csv", delimiter=';')

In [5]:
# Reading the data - target
# y = pd.read_csv("data/Adult/Adult_labels.csv")

In [6]:
X = X[0:48832]

In [7]:
X.head()

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country
0,25,0,226802,0,7,0,0,0,0,0,0,0,40,0
1,38,0,89814,1,9,1,1,1,1,0,0,0,50,0
2,28,1,336951,2,12,1,2,1,1,0,0,0,40,0
3,44,0,160323,3,10,1,0,1,0,0,7688,0,40,0
4,18,2,103497,3,10,0,3,0,1,1,0,0,30,0


In [8]:
# Normalizing Initial Data
min_max_scaler = MinMaxScaler(feature_range=(-1, 1))
X = pd.DataFrame(min_max_scaler.fit_transform(X))
s = pickle.dumps(min_max_scaler)


In [9]:
# Converting dataframe to numpy array
X_train = X.to_numpy()

In [10]:
X_train[0:6]

array([[-0.78082192, -1.        , -0.70974248, -1.        , -0.2       ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -0.20408163, -1.        ],
       [-0.42465753, -1.        , -0.89509747, -0.86666667,  0.06666667,
        -0.66666667, -0.85714286, -0.6       , -0.5       , -1.        ,
        -1.        , -1.        ,  0.        , -1.        ],
       [-0.69863014, -0.75      , -0.56070265, -0.73333333,  0.46666667,
        -0.66666667, -0.71428571, -0.6       , -0.5       , -1.        ,
        -1.        , -1.        , -0.20408163, -1.        ],
       [-0.26027397, -1.        , -0.79969353, -0.6       ,  0.2       ,
        -0.66666667, -1.        , -0.6       , -1.        , -1.        ,
        -0.84623846, -1.        , -0.20408163, -1.        ],
       [-0.97260274, -0.5       , -0.87658335, -0.6       ,  0.2       ,
        -1.        , -0.57142857, -1.        , -0.5       ,  1.        ,
        -1.        , -1.  

In [11]:
# Padding the data to maxlen of 28
X_train_padded = tf.keras.preprocessing.sequence.pad_sequences(X_train, padding='post', maxlen=28, dtype='float32')

In [12]:
X_train_padded.shape

(48832, 28)

Paper reference : https://arxiv.org/pdf/1806.03384.pdf

In [13]:
# Reshaping the data 
train_tabdata = X_train_padded.reshape(1744, 28, 28, 1)


In [14]:
BUFFER_SIZE = 6000
BATCH_SIZE = 256

In [15]:
# Batch and shuffle the data
train_dataset = tf.data.Dataset.from_tensor_slices(train_tabdata).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [16]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((7, 7, 256)))
    assert model.output_shape == (None, 7, 7, 256) # Note: None is the batch size

    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 7, 7, 128)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 14, 14, 64)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 28, 28, 1)

    return model

In [17]:
generator = make_generator_model()


In [18]:
noise = tf.random.normal([1, 100])
noise

<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
array([[ 0.483309  ,  1.0523266 , -0.21477291, -0.4421579 , -0.02740003,
        -0.35777128,  2.2388873 , -0.28438753,  0.3284141 , -0.43830106,
         2.0968466 , -2.0726464 ,  0.01861378,  0.9133805 ,  0.68421185,
         0.4343149 ,  0.27180195,  0.06572846, -0.9583688 ,  0.9007706 ,
         0.21765038, -0.9605544 ,  1.7204759 ,  0.0468441 , -0.43217617,
        -0.36636716, -2.69674   ,  0.7167376 ,  0.17443171, -2.3725348 ,
        -0.39343128, -0.5782881 , -0.99862534,  1.2277476 ,  0.72473   ,
        -0.85291713,  0.02526405,  0.41578192, -1.3255163 ,  0.2251227 ,
         0.06569139,  1.2659862 ,  1.3816924 ,  0.13119936, -0.01291602,
         1.0664173 ,  0.29920405, -0.9245237 , -0.41817844, -0.6066443 ,
         1.9573762 , -0.7339047 ,  0.2119151 , -0.7973536 , -0.04582154,
        -0.1419401 , -1.0621858 ,  0.363056  , -0.13621473, -0.70057786,
        -0.35042718,  0.05003823,  0.40624166, -0.65013444, -0.83534527,
 

In [19]:
generated_data = generator(noise, training=False)
generated_data

<tf.Tensor: shape=(1, 28, 28, 1), dtype=float32, numpy=
array([[[[-1.39352027e-03],
         [-2.86678318e-03],
         [ 1.20250287e-03],
         [ 3.51207610e-03],
         [-2.80848611e-03],
         [ 1.14674363e-02],
         [-8.66856892e-03],
         [-8.83816928e-03],
         [-7.77733698e-03],
         [ 8.35524593e-03],
         [-3.82725499e-03],
         [-8.66420660e-03],
         [ 2.60457234e-03],
         [ 6.60848385e-03],
         [ 7.03829341e-03],
         [-8.13531596e-03],
         [ 2.90919328e-03],
         [ 9.19398852e-03],
         [-8.62875581e-03],
         [ 5.03344927e-06],
         [-2.12632306e-03],
         [ 1.58144403e-02],
         [ 6.16464857e-03],
         [-6.23348809e-04],
         [-6.91030547e-03],
         [ 1.62198544e-02],
         [-4.44268202e-03],
         [-2.70254561e-03]],

        [[-3.98945669e-03],
         [-6.61533140e-03],
         [-8.74585239e-04],
         [ 7.24255713e-03],
         [-1.37472863e-03],
         [-2.17846

In [20]:
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
                                     input_shape=[28, 28, 1]))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1))

    return model

In [21]:
discriminator = make_discriminator_model()


In [22]:
decision = discriminator(generated_data)
print (decision)

tf.Tensor([[0.00067225]], shape=(1, 1), dtype=float32)


In [23]:
# This method returns a helper function to compute cross entropy loss
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [24]:
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [25]:
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

In [26]:
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

In [27]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [28]:
EPOCHS = 1000
noise_dim = 100
num_examples_to_generate = 16

seed = tf.random.normal([num_examples_to_generate, noise_dim])

In [29]:
# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
@tf.function
def train_step(data):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_data = generator(noise, training=True)

        real_output = discriminator(data, training=True)
        fake_output = discriminator(generated_data, training=True)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [30]:
def train(dataset, epochs):
    for epoch in range(epochs):
        start = time.time()

        for data in dataset:
            train_step(data)


        # Save the model every 15 epochs
        if (epoch + 1) % 100 == 0:
            checkpoint.save(file_prefix = checkpoint_prefix)

            print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

   # Generate after the final epoch
    display.clear_output(wait=True)
    generate_and_save_data(generator,
                           epochs,
                           seed)

In [31]:

def generate_and_save_data(model, epoch, test_input):
  # Notice `training` is set to False.
  # This is so all layers run in inference mode (batchnorm).
    predictions = model(test_input, training=False)
    predictions = predictions.numpy()
    predictions = predictions.reshape(448,28)
    pred = predictions[:,14]
    pred = pred.reshape(-1,14)
    print(pred.shape)
    
    transformed = min_max_scaler.inverse_transform(pred)
    print(transformed)
    np.savetxt("gen_data.csv", transformed, delimiter=",")



In [32]:
train(train_dataset, EPOCHS)


(32, 14)
[[5.2987663e+01 3.9990366e+00 7.5054438e+05 7.5383859e+00 8.4382334e+00
  3.0424480e+00 7.0140567e+00 2.4977582e+00 2.0090799e+00 5.0328708e-01
  5.0151020e+04 2.1718867e+03 5.0665394e+01 2.0673445e+01]
 [5.3844727e+01 3.9955456e+00 7.5787125e+05 7.5010767e+00 8.5163174e+00
  3.0010529e+00 7.0959654e+00 2.4738069e+00 2.0127463e+00 5.0495881e-01
  5.0788766e+04 2.1296150e+03 4.9887077e+01 2.0891415e+01]
 [5.3502132e+01 4.0190229e+00 7.5787556e+05 7.5541787e+00 8.5024118e+00
  3.0107346e+00 7.0514855e+00 2.4972525e+00 2.0038872e+00 5.0079221e-01
  5.0108879e+04 2.1709478e+03 5.0062004e+01 2.0526594e+01]
 [5.3622288e+01 4.0041690e+00 7.5531856e+05 7.5077209e+00 8.5062294e+00
  2.9930456e+00 6.9986100e+00 2.4989901e+00 1.9943553e+00 5.0064611e-01
  5.0533512e+04 2.1733347e+03 4.9935139e+01 2.0558220e+01]
 [5.3610615e+01 4.0239162e+00 7.5508588e+05 7.5345078e+00 8.5228071e+00
  3.0005100e+00 6.9823437e+00 2.4842584e+00 2.0119398e+00 5.0358450e-01
  5.0432176e+04 2.1901841e+03 5.076

In [33]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fb55dd4db00>