In [1]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import backend as K
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


# GAN

In [72]:
n = np.zeros(shape=(30,12))
n[:,-0:]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.,

In [109]:
class CGAN(keras.Model):
    
    def __init__(self, input_dim, latent_dim, optimizer = None, conditional_dim=None):
        super(CGAN, self).__init__()
        
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.conditional_dim = conditional_dim
        self.optimizer = optimizer
        
        self.generator = self.create_generator()
        self.discriminator = self.create_discrimenator()
        
        self.bc = tf.keras.losses.BinaryCrossentropy()
        self.generator_optimizer = optimizer
        self.discriminator_optimizer = optimizer
        
    def create_generator(self):
         # generator of single events/cells
        generator_input = keras.Input(shape=(self.latent_dim + self.conditional_dim,), name="generator_input")
        x = layers.Dense(10, activation="relu", name="generator_l1")(generator_input)
        x = layers.Dense(8, activation="relu",name="generator_l2")(x)
        x = layers.Dense(6, activation="relu",name="generator_l3")(x)
        generator_output = layers.Dense(self.input_dim - self.conditional_dim, activation="relu", name = "generator_output")(x)
        return keras.Model(generator_input, generator_output, name="generator")
    
    def create_discrimenator(self):
        # generator of single events/cells
        discriminator_input = keras.Input(shape=(self.input_dim,), name="discriminator_input")
        x = layers.Dense(10, activation="relu", name="discriminator_l1")(discriminator_input)
        x = layers.Dense(8, activation="relu",name="discriminator_l2")(x)
        x = layers.Dense(6, activation="relu",name="discriminator_l3")(x)
        discriminator_output = layers.Dense(1, activation="sigmoid", name = "discriminator_output")(x)
        return keras.Model(discriminator_input, discriminator_output, name="discriminator")
    
    
    def generate_latent(self, shape):
        # currently random noice, consider using a some distribution?
        return np.random.normal(0,1, shape)
    
    def call(self, inputs, training=False):
        return self.discriminator(inputs, training)
    
    def generator_loss(self,fake_output):
        return self.bc(tf.ones_like(fake_output), fake_output)
    
    def discriminator_loss(self,real_output, fake_output):
        real_loss = self.bc(tf.ones_like(fake_output), fake_output)
        fake_loss = self.bc(tf.ones_like(fake_output), fake_output)
        return real_loss + fake_loss
    
    def train_step(self, data):
        if isinstance(data, tuple):
            data = data[0] # because data can be (x_batch, y_batch)
        
        noise = tf.random.normal([data.shape[0], self.latent_dim])
        
        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            
            if self.conditional_dim > 0:
                
                condition = data[:, -self.conditional_dim:]
            
                conditional_noise = keras.layers.concatenate([noise, condition], axis=1)
            
                generated_data_ = self.generator(conditional_noise, training=True)
            
                generated_data = keras.layers.concatenate([generated_data_, condition], axis=1)
            
            else:
                generated_data = self.generator(noise, training=True)
                
                
            real_output = self.discriminator(data, training=True)
            fake_output = self.discriminator(generated_data, training=True)
            
            gen_loss = self.generator_loss(fake_output)
            disc_loss = self.discriminator_loss(real_output, fake_output)

            gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables)
            gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)

            self.generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
            self.discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_variables))
    
        return {
            "generator_loss": gen_loss,
            "discriminator_loss": disc_loss
        }

# Data

In [2]:
cells = pd.read_csv("ModifiedDATA/scaled_ra.csv")

In [80]:
def add_id(df):
    cond = pd.get_dummies(df["group"]).astype("float32")

    data = df[df.columns.difference(["id","group"])]
    data = np.concatenate((data, cond), axis=1)
    
    return cond, data

cond, data = add_id(cells)
cond_shape = cond.shape[1]
data_shape = data.shape[1]

In [74]:
# SELECTED HYPERPARAMETERS
LEARNING_RATE = 0.01
OPTIMIZER = keras.optimizers.Adam(LEARNING_RATE)
EPOCHS = 1
BATCH_SIZE = 128


LATENT_DIM = 2 

d = cells[cells.columns.difference(["id","group"])]

model = CGAN(d.shape[1], LATENT_DIM, optimizer=OPTIMIZER, conditional_dim = 0)
#model.build((None,input_shape))

model.compile()

model.fit(d, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2)



<tensorflow.python.keras.callbacks.History at 0x151b19fd0>

In [143]:
class GAN_MODEL():
    
    def __init__(self):
        pass
    
    def convert_group(self, df):
        cond = pd.get_dummies(df["group"]).astype("float32")

        data = df[df.columns.difference(["id","group"])]
        data = np.concatenate((data, cond), axis=1)

        return cond, data
    
    def fit(self, data, latent_dim, epochs, batch_size, optimizer):

        # initilize the CVAE model
        cond, data = self.convert_group(data)
        cond_shape = cond.shape[1]
        data_shape = data.shape[1]

        self.model = CGAN(latent_dim = latent_dim, input_dim = data_shape, optimizer=optimizer, conditional_dim = cond_shape)

        # compile the model
        self.model.compile(optimizer=optimizer)
        
        # fit the model
        self.model.fit(data, epochs=epochs, batch_size=batch_size, validation_split=0.2)

    def save(self, filepath):
        # save the model
        self.model.save(filepath)
    
    def load(self, filepath):
        # load the model
        # set self.model
        
        # TODO if filepath exists.
        self.model = keras.models.load_model(filepath)
        
        
        
    def generate_patients(self, nr_markers, latent_dim, conditional_dim, nr_cells = 20000, nr_patients = 20, column_names=None, group=None): 
        """
        PARAMETERS
        ----------
        nr_markers : int
        latent_dim : int
        conditional_dim : int
        nr_cells : int
        nr_patients : int
        column_names : Dataframe.columns
        group : str
            "control" or "diseased"
        
        RETURNS
        -------
        
        patients : dataframe
        
        """
        if self.model:
            
            patients = np.empty(shape=(nr_patients* nr_cells, nr_markers))
            p_id = np.empty(shape=nr_patients*nr_cells, dtype="int32")

            if group == "control":
                gr = np.concatenate((np.ones(shape=(nr_cells,1)),np.zeros(shape=(nr_cells,1))), axis=1)
            else:
                gr = np.concatenate((np.zeros(shape=(nr_cells,1)),np.ones(shape=(nr_cells,1))), axis=1)

            for i in range(nr_patients):
                # patient ids
                p_id[nr_cells*i : nr_cells*(i+1)] = np.full(shape=(nr_cells), fill_value=i+1, dtype="int32")

                sample = np.random.normal(0,1, size = (nr_cells, latent_dim))
                latent_vals = np.concatenate((sample, gr),axis=1)

                # sampled patient cells
                patients[nr_cells*i : nr_cells*(i+1)] = self.model.generator.predict(latent_vals)#[:,:-conditional_dim]

            patients_df = pd.DataFrame(patients, columns=column_names)


            patients_df["id"] = p_id
            patients_df["group"] = group

            return patients_df
    

    

In [144]:
gmod = GAN_MODEL()

In [145]:
gmod.fit(cells,LATENT_DIM, EPOCHS, BATCH_SIZE, OPTIMIZER)



In [123]:
filepath = "GenerativeModels/CGAN_1.tf"

In [124]:
gmod.save(filepath = "GenerativeModels/CGAN_1.tf")

INFO:tensorflow:Assets written to: GenerativeModels/CGAN_1.tf/assets


In [125]:
gmod.load(filepath)

In [138]:
cells.columns

Index(['145Nd_CD4', '146Nd_CD8a', '147Sm_CD20', '148Nd_CD16', '151Eu_CD123',
       '159Tb_CD11c', '160Gd_CD14', '169Tm_CD45RA', '170Er_CD3',
       '174Yb_HLA-DR', '176Yb_CD56', '209Bi_CD61', 'id', 'group'],
      dtype='object')

In [146]:
col_names = cells.columns.difference(["id", "group"])
gmod.generate_patients(12,LATENT_DIM,2, column_names = col_names, group="control")

Unnamed: 0,145Nd_CD4,146Nd_CD8a,147Sm_CD20,148Nd_CD16,151Eu_CD123,159Tb_CD11c,160Gd_CD14,169Tm_CD45RA,170Er_CD3,174Yb_HLA-DR,176Yb_CD56,209Bi_CD61,id,group
0,0.0,90.422554,53.260834,0.0,4.585601,0.0,56.233582,0.0,0.0,70.866409,60.575691,112.037247,1,control
1,0.0,82.556984,48.503777,0.0,3.966102,0.0,51.078922,0.0,0.0,64.835732,55.349056,102.013855,1,control
2,0.0,105.428802,62.352001,0.0,5.391577,0.0,65.665985,0.0,0.0,82.396088,70.411209,130.778259,1,control
3,0.0,70.906738,41.352383,0.0,3.022319,0.0,43.347652,0.0,0.0,55.980492,47.669392,87.064865,1,control
4,0.0,113.416939,66.260117,0.0,2.835961,0.0,66.892845,0.0,0.0,89.490562,75.266983,137.063843,1,control
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,0.0,76.429535,44.484463,0.0,2.228112,0.0,45.511868,0.0,0.0,60.483295,51.065639,92.701370,20,control
399996,0.0,84.703880,49.787182,0.0,4.078473,0.0,52.415287,0.0,0.0,66.498039,56.766674,104.681229,20,control
399997,0.0,68.308121,39.504189,0.0,1.347192,0.0,39.885887,0.0,0.0,54.328674,45.633835,82.056732,20,control
399998,0.0,68.990768,39.948502,0.0,1.527784,0.0,40.489857,0.0,0.0,54.817314,46.108990,83.078445,20,control
