In [None]:
#pip install tensorflow

In [None]:
import sys
import numpy as np
import tensorflow.keras as keras
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input, Dense, Activation, LeakyReLU, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
#from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.models import save_model
from tensorflow.keras.models import model_from_json
#plt.switch_backend('agg')
from tensorflow.keras import regularizers
#from tensorflow.keras.utils import multi_gpu_model
from sklearn.decomposition import PCA

import seaborn as sns
import matplotlib.pyplot as plt

pandas.set_option('display.max_rows', None)

In [None]:
inpt = "data/eICU_age.npy" #hapt format input file
latent_size = 600 #size of noise input
alph = 0.01 #alpha value for LeakyReLU
g_learn = 1e-3 #generator learning rate
d_learn = 1e-5 #discriminator learning rate
epochs = 50
batch_size = 280 # just making it easier for length of 2520
ag_size = 2520 #number of  ages to return to show a meaningful distribution
save_that = 25 #epoch interval for saving outputs

mean_age = 38.5 # mean US age
stdv_age = 3.52 # standard deviation of US age

In [None]:
print('inpt: ', inpt)
print('latent_size: ', latent_size)
print('alph: ', alph)
print('g_learn: ', g_learn)
print('d_learn: ', d_learn)
print('epochs: ', epochs)
print('batch_size: ', batch_size)
print('ag_size: ', ag_size)
print('save_that: ', save_that)
print('mean_age: ', mean_age)
print('stdv_age: ', stdv_age)

In [None]:
#Read input
ages_np = np.load(inpt)
print('length: ', len(ages_np))
print(ages_np[0:5])
ages_np_ary = np.asarray(ages_np.flatten().tolist()).flatten()
print('length: ', len(ages_np_ary))
print(ages_np_ary[0:5])

df_ages = pd.DataFrame(ages_np_ary, columns=['age'])
print(df_ages.shape)
df_ages.head()

In [None]:
df_ages_describe = df_ages.copy(deep=True)
df_ages_describe['Y'] = 'Y'
df_ages_describe.groupby('age').count()

In [None]:
sns.set_theme()
sns.histplot(ages_np_ary, bins=90, kde=True)

US_population_ages = np.random.normal(loc=mean_age, scale=stdv_age, size=latent_size) 
sns.histplot(US_population_ages, bins=90, kde=True, color='orange')

In [None]:
# clear memory used by models
K.clear_session()

# Make generator - simple sequential network
generator = Sequential()
generator.add(Dense(1024, input_shape=(latent_size,), kernel_regularizer=regularizers.l2(0.0001)))
generator.add(LeakyReLU(alpha=alph))
generator.add(Dense(512, kernel_regularizer=regularizers.l2(0.0001)))
generator.add(LeakyReLU(alpha=alph))
generator.add(Dense(1, activation = 'relu'))

print('==' * 30)
print('Generator:')
generator.summary()

In [None]:
#Make discriminator
discriminator = Sequential()
discriminator.add(Dense(256, input_shape=(1,), kernel_regularizer=regularizers.l2(0.0001)))
discriminator.add(LeakyReLU(alpha=alph))
discriminator.add(Dense(128, kernel_regularizer=regularizers.l2(0.0001)))
discriminator.add(LeakyReLU(alpha=alph))
discriminator.add(Dense(1, activation = 'sigmoid'))
discriminator.compile(optimizer=Adam(learning_rate=d_learn), loss='binary_crossentropy')
#Set discriminator to non-trainable
discriminator.trainable = False

print('==' * 30)
print('Discriminator:')
discriminator.summary()

In [None]:
# combine G and D to make the GAN
gan = Sequential()
gan.add(generator)
gan.add(discriminator)
gan.compile(optimizer=Adam(learning_rate=g_learn), loss='binary_crossentropy')

print('==' * 30)
print('GAN:')
gan.summary()

In [None]:
# create discrimnator targets: indicating Real ==1 and Fake == 0
y_real, y_fake = np.ones([batch_size, 1]), np.zeros([batch_size, 1])
X_real = ages_np_ary



### Train for number of epochs

In [None]:
# array to save loss tuples to
losses = []

# number of batches
batches = len(X_real)//batch_size

for e in range(0,epochs):
    print('Started Epoch: ', e)
    
    for b in range(batches):
        # TODO: get the current batch - randomly select a batch-size number of samples from the data here
        X_batch_real = X_real[b*batch_size:(b+1)*batch_size] 

        # randomly selected indices for the samples for the batch
        indices = np.random.randint(0, len(X_real), batch_size)

        X_batch_real = X_real[indices]
        

        # create the noisy data to be sent to the generator of size: latent_size
        latent_samples = np.random.normal(loc=mean_age, scale=stdv_age, size=(batch_size, latent_size)) 
        
        
        # create the generated data from the latent samples
        X_batch_fake = generator.predict_on_batch(latent_samples).flatten()

        

        # train the discriminator on both the real (y == 1) and the fake (y == 0) data
        # set the discriminator to trainable
        # TODO: see whether this is actually setting the discriminator is getting set to True and False
        discriminator.trainable = True
        # get the loss for both discriminating real and discriminating fake
        d_loss_real = discriminator.train_on_batch(X_batch_real, y_real)
        d_loss_fake = discriminator.train_on_batch(X_batch_fake, y_fake)
        # total discriminator loss
        d_loss = d_loss_real + d_loss_fake

        # make discriminator non-trainable and train gan to get the gan loss
        # TODO: see whether this is actually setting the discriminator is getting set to True and False
        discriminator.trainable = False
        g_loss = gan.train_on_batch(latent_samples, y_real)

    losses.append((d_loss, d_loss_real, d_loss_fake, g_loss))
    print("Epoch:\t%d/%d Discriminator loss: %6.4f Generator loss: %6.4f"%(e+1, epochs, d_loss, g_loss))
    if e%save_that == 0 or e == (epochs-1):

        # TO DO: Save the Model

        print("=========" * 30)
        print("Epoch:\t%d/%d Real, Latent, and Generated Ages:"%(e+1, epochs))
        print("First 5 Real Ages: ", X_batch_real[0:5])
        print("Noisy Data Sent to Generator: ", latent_samples[1,1:5])
        print("First 5 Generated Ages: ", X_batch_fake[0:5])

        # create the generated ages
        # create noise to feed to generator
        latent_samples = np.random.normal(loc=mean_age, scale=stdv_age, size=(ag_size, latent_size))
        # generate some data
        generated_ages = generator.predict(latent_samples)
        # round any elements to the nearest integer as we know we want age integers
        generated_ages = np.rint(generated_ages)

        fig, ax = plt.subplots()
        sns.histplot(ages_np_ary, bins=90, kde=True)
        sns.histplot(generated_ages.flatten(), bins=90, kde=True, color='orange', alpha=0.5)
        plt.show()

        fig, ax = plt.subplots()
        plt.plot(np.array([losses]).T[0], label='Discriminator', color='crimson')
        plt.legend()

        fig, ax = plt.subplots()
        plt.plot(np.array([losses]).T[1], label='Discriminator: Real', color='crimson', alpha=0.3)
        plt.legend()

        fig, ax = plt.subplots()
        plt.plot(np.array([losses]).T[2], label='Discriminator: Fake', color='crimson', alpha=0.7)
        plt.legend()

        fig, ax = plt.subplots()
        plt.plot(np.array([losses]).T[3], label='Generator', color='xkcd:grass green')
        plt.legend()
