In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras.layers import Input,Dense,Lambda,Reshape,Conv1DTranspose, Conv1D,Flatten
from tensorflow.keras.models import Model,Sequential
from tensorflow import keras
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import time
from numpy import zeros
from numpy import ones
from numpy.random import rand
from numpy.random import randn

# Load and Define Data

In [2]:
df = pd.read_csv('../Data/PUMA-1204-2012to2016-5%_Treated.csv')
df

Unnamed: 0,HINCP,NP,AGEP,RAC1P,ESR,SEX,WIF,HUPAC,HHT,PUMA,ST
0,4,3,31,1,4,1,3,1,1,1203,24
1,4,3,30,1,6,2,3,1,1,1203,24
2,4,3,2,1,0,2,3,1,1,1203,24
3,5,5,48,1,1,2,3,2,1,1203,24
4,5,5,48,1,1,1,3,2,1,1203,24
...,...,...,...,...,...,...,...,...,...,...,...
5031,5,5,7,6,0,2,4,2,1,1203,24
5032,6,4,41,1,1,1,2,1,2,1203,24
5033,6,4,4,1,0,2,2,1,2,1203,24
5034,6,4,1,1,0,2,2,1,2,1203,24


In [3]:
df=df.fillna(df.mean())
df

Unnamed: 0,HINCP,NP,AGEP,RAC1P,ESR,SEX,WIF,HUPAC,HHT,PUMA,ST
0,4,3,31,1,4,1,3,1,1,1203,24
1,4,3,30,1,6,2,3,1,1,1203,24
2,4,3,2,1,0,2,3,1,1,1203,24
3,5,5,48,1,1,2,3,2,1,1203,24
4,5,5,48,1,1,1,3,2,1,1203,24
...,...,...,...,...,...,...,...,...,...,...,...
5031,5,5,7,6,0,2,4,2,1,1203,24
5032,6,4,41,1,1,1,2,1,2,1203,24
5033,6,4,4,1,0,2,2,1,2,1203,24
5034,6,4,1,1,0,2,2,1,2,1203,24


In [4]:
df.shape

(5036, 11)

# Data Preprocessing

In [5]:
X_train,X_test=train_test_split(df,test_size=200)

In [6]:
minmax=MinMaxScaler()
X_train = minmax.fit_transform(X_train)
X_test = minmax.fit_transform(X_test)

In [7]:
def fit_batchsize(X,batch_size):
    n_size = (len(X)//batch_size)*batch_size
    X = X[0:n_size]

    return X
batch_size = 10
X_train = fit_batchsize(X_train, batch_size)
X_test = fit_batchsize(X_test, batch_size)

In [8]:
X_train=X_train.astype(np.float32)

# Define GAN Model

In [9]:
def discriminator():
    
    d = Sequential()
    d.add(Reshape(target_shape=(11,1),input_shape=(11,)))
    d.add(Conv1D(12,3, activation="relu", strides=1, padding="same"))
    d.add(Conv1D(24,3, activation="relu", strides=1, padding="same"))
    d.add(Flatten())
    d.add(Dense(11))
    return d

In [10]:
def generator(latent_dim=2):

    g = Sequential()
    g.add(Dense(11*24,activation="relu", input_shape=(latent_dim,)))
    g.add(Reshape(target_shape=(11,24)))
    g.add(Conv1DTranspose(24,3,activation="relu", strides=1, padding="same"))
    g.add(Conv1DTranspose(12,3,activation="relu", strides=1, padding="same"))
    g.add(Conv1DTranspose(1,3,activation="relu", strides=1, padding="same"))
    g.add(Reshape(target_shape=(11,)))

    return g

In [11]:
discriminator = discriminator()
generator = generator()
print(discriminator.summary(), generator.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 11, 1)             0         
_________________________________________________________________
conv1d (Conv1D)              (None, 11, 12)            48        
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 11, 24)            888       
_________________________________________________________________
flatten (Flatten)            (None, 264)               0         
_________________________________________________________________
dense (Dense)                (None, 11)                2915      
Total params: 3,851
Trainable params: 3,851
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Out

In [12]:
class GAN(keras.Model):
    
  # initialize models with latent dimensions
  def __init__(self, disc, gen, latent_dim=2):
    super(GAN, self).__init__()
    self.discriminator = disc
    self.generator = gen
    self.latent_dim = latent_dim
  
  # compile with optimizers and loss function
  def compile(self, optD, optG, loss_fn):
    super(GAN, self).compile()
    self.optD = optD
    self.optG = optG
    self.loss_fn = loss_fn
    
  # custom training function
  def train_step(self, real_data):
    if isinstance(real_data, tuple):
      real_data = real_data[0]
    
    # get current batch size
    bs = tf.shape(real_data)[0]
    z = tf.random.normal(shape=(bs, self.latent_dim))
    fake_data = self.generator(z)
    
    # combine real and fake images in a single vector along with their labels
    combined_data = tf.concat([real_data, fake_data], axis=0)
    labels = tf.concat([tf.ones((bs, 11)), tf.zeros((bs, 11))], axis=0)
    
    # train your discriminator
    with tf.GradientTape() as tape:
      preds = self.discriminator(combined_data)
      d_loss = self.loss_fn(labels, preds)
      grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
      self.optD.apply_gradients(zip(grads, self.discriminator.trainable_weights))
    
    # misleading labels for generator
    misleading_labels = tf.ones((bs, 11))
    z = tf.random.normal(shape=(bs, self.latent_dim))
    
    # train your generator
    with tf.GradientTape() as tape:
      fake_preds = self.discriminator(self.generator(z))
      g_loss = self.loss_fn(misleading_labels, fake_preds)
      grads = tape.gradient(g_loss, self.generator.trainable_weights)
      self.optG.apply_gradients(zip(grads, self.generator.trainable_weights))
    return {"d_loss": d_loss, "g_loss": g_loss}
# create GAN model using already built D and G
gan = GAN(discriminator, generator)
# compile your model with loss and optimizers
gan.compile(
    keras.optimizers.Adam(),
    keras.optimizers.Adam(),
    keras.losses.BinaryCrossentropy(from_logits=True))

In [13]:
hist=gan.fit(X_train,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
latent_dim=2
z=tf.random.normal(shape=(10000,latent_dim))
generated_data=generator(z)
generated_data

<tf.Tensor: shape=(10000, 11), dtype=float32, numpy=
array([[0.7711071 , 0.        , 0.4339139 , ..., 0.21167523, 0.        ,
        0.        ],
       [1.0081478 , 0.        , 0.94023436, ..., 1.3203225 , 0.07708627,
        0.01019686],
       [0.8961567 , 0.        , 0.45171696, ..., 0.03224733, 0.        ,
        0.        ],
       ...,
       [1.5696151 , 0.        , 0.84901655, ..., 0.8477404 , 0.        ,
        0.02604651],
       [0.9913803 , 0.        , 0.9610695 , ..., 1.3880033 , 0.09693852,
        0.        ],
       [0.78214186, 0.        , 0.52953106, ..., 0.32368693, 0.        ,
        0.        ]], dtype=float32)>

In [15]:
generated_data=minmax.inverse_transform(generated_data)
generated_data

array([[5.08442831e+00, 2.00000000e+00, 3.77505081e+01, ...,
        1.42335045e+00, 1.20300000e+03, 2.40000000e+01],
       [6.03259134e+00, 2.00000000e+00, 8.18003896e+01, ...,
        3.64064503e+00, 1.20307709e+03, 2.40101969e+01],
       [5.58462691e+00, 2.00000000e+00, 3.92993755e+01, ...,
        1.06449467e+00, 1.20300000e+03, 2.40000000e+01],
       ...,
       [8.27846050e+00, 2.00000000e+00, 7.38644396e+01, ...,
        2.69548082e+00, 1.20300000e+03, 2.40260465e+01],
       [5.96552110e+00, 2.00000000e+00, 8.36130486e+01, ...,
        3.77600670e+00, 1.20309694e+03, 2.40000000e+01],
       [5.12856746e+00, 2.00000000e+00, 4.60692024e+01, ...,
        1.64737386e+00, 1.20300000e+03, 2.40000000e+01]])

In [16]:
generated_data=np.round(generated_data)
generated_data

array([[5.000e+00, 2.000e+00, 3.800e+01, ..., 1.000e+00, 1.203e+03,
        2.400e+01],
       [6.000e+00, 2.000e+00, 8.200e+01, ..., 4.000e+00, 1.203e+03,
        2.400e+01],
       [6.000e+00, 2.000e+00, 3.900e+01, ..., 1.000e+00, 1.203e+03,
        2.400e+01],
       ...,
       [8.000e+00, 2.000e+00, 7.400e+01, ..., 3.000e+00, 1.203e+03,
        2.400e+01],
       [6.000e+00, 2.000e+00, 8.400e+01, ..., 4.000e+00, 1.203e+03,
        2.400e+01],
       [5.000e+00, 2.000e+00, 4.600e+01, ..., 2.000e+00, 1.203e+03,
        2.400e+01]])

In [17]:
generated_data=pd.DataFrame(generated_data)

In [18]:
generated_data.to_csv('GAN reconstruction PUMA==1204.csv')