# GANs _Building_ and _Training_

###### Import libraries

In [1]:
import pdb
import os
import shutil
import time

In [2]:
import numpy as np
import pandas as pd

In [3]:
import tensorflow as tf

In [4]:
import sklearn.preprocessing

###### Preprocess training samples of a section

Construct MinMaxScaler using training set, then use this MinMaxScaler transform training set and test set putting

them into corresponding "preprocessed" directory. 

After preprocessing, the structure of jobs directory would be like below:

- JOBXXXX
    - status
        - parts
            - part
                - section
                    - samples
                        - normal
                            - train
                                - status.csv
                                - preprocessed
                                    - status.csv
                            - test
                                - status.csv
                                - preprocessed
                                    - status.csv
        - raw
        - valid

In [69]:
def fn_makeMinMaxScaler(strPart, strSection, strJobsDir):
    oMinMaxScaler = sklearn.preprocessing.MinMaxScaler()
    pdDfSectionsTrainSamples = fn_getSectionsTrainSamples(strPart, strSection, strJobsDir)
    oMinMaxScaler.fit(pdDfSectionsTrainSamples.values)
    return oMinMaxScaler
def fn_getSectionsTrainSamples(strPart, strSection, strJobsDir):
    listPdDfSectionTrainSamples = []
    for job in os.listdir(strJobsDir):
        strJobDir = os.path.join(strJobsDir, job)
        listStrStatusDirs = [os.path.join(strJobDir, name) for name in os.listdir(strJobDir) if "Demod" in name]
        for strStatusDir in listStrStatusDirs:
            strSectionTrainSamplesFile = os.path.join(strStatusDir, "parts/" + strPart + "/" + strSection
                                                    + "/samples/normal/train/samples.csv")
            pdDfSectionTrainSamples = pd.read_csv(strSectionTrainSamplesFile)
            listPdDfSectionTrainSamples.append(pdDfSectionTrainSamples)
    pdDfSectionsTrainSamples = pd.concat(listPdDfSectionTrainSamples)
    return pdDfSectionsTrainSamples
        
def fn_preprocessSectionTrainSamples(strPart, strSection, oMinMaxScaler, strJobsDir):
    for job in os.listdir(strJobsDir):
        strJobDir = os.path.join(strJobsDir, job)
        listStrStatusDirs = [os.path.join(strJobDir, name) for name in os.listdir(strJobDir) if "Demod" in name]
        for strStatusDir in listStrStatusDirs:
            strSectionTrainSamplesFile = os.path.join(strStatusDir, "parts/" + strPart + "/" + strSection +
                                                    "/samples/normal/train/samples.csv")
            pdDfSectionTrainSamples = pd.read_csv(strSectionTrainSamplesFile)
            strPreprocessedSectionTrainSamplesDir = os.path.join(os.path.split(strSectionTrainSamplesFile)[0], "preprocessed")
            if os.path.exists(strPreprocessedSectionTrainSamplesDir):
                shutil.rmtree(strPreprocessedSectionTrainSamplesDir)
            os.mkdir(strPreprocessedSectionTrainSamplesDir)
            
            """
            Since empty section train samples will cause a error in oMinMaxScaler, check it first.
            If section train samples is emtpy, the corresponding preprocessed directory contains 
            an empty dataframe with same columns.
            """
            if not pdDfSectionTrainSamples.empty:
                pdDfPreprocessedSectionTrainSamples = \
                pd.DataFrame(data = oMinMaxScaler.transform(pdDfSectionTrainSamples.values), 
                             columns = pdDfSectionTrainSamples.columns)
            else:
                pdDfPreprocessedSectionTrainSamples = \
                pd.DataFrame(columns=pdDfSectionTrainSamples.columns)
                
            strPreprocessedSectionTrainSamplesFile = os.path.join(strPreprocessedSectionTrainSamplesDir, 
                                                                 "samples.csv")
            pdDfPreprocessedSectionTrainSamples.to_csv(strPreprocessedSectionTrainSamplesFile, index = False)

###### Get dataset

In [6]:
def fn_splitCsvLine(strLine):
    tensorDefs = [0.] * g_nFeatures
    tensorRecord = tf.io.decode_csv(strLine, record_defaults=tensorDefs)
    tensorX = tf.stack(tensorRecord)
    return tensorX
def fn_getRealDataset(strPart, strSection, strJobsDir, 
                      nReaders = 5, nReadThreads = 5, nParseTreads = 5, nShuffleBufferSize = 1000):
    oDataset = tf.data.Dataset.list_files(strJobsDir + "/*/*/parts/" + strPart + "/"
                                         + strSection + "/samples/normal/train/preprocessed/samples.csv")
    oDataset = oDataset.interleave(lambda strSamplesFile: tf.data.TextLineDataset(strSamplesFile).skip(1), cycle_length=nReaders, 
                                  num_parallel_calls=nReadThreads)
    oDataset = oDataset.map(fn_splitCsvLine, nParseTreads)
    oDataset = oDataset.shuffle(nShuffleBufferSize)
    return oDataset.batch(g_nBatchSize)

## Input section

In [7]:
oMinMaxScalerInput = fn_makeMinMaxScaler("framelock", "input", "../jobs")

In [8]:
fn_preprocessSectionTrainSamples("framelock", "input", oMinMaxScaler, "../jobs")

set global hyperparameters

In [87]:
g_nBatchSize = 250
g_nFeatures = 3
oDatasetFramelockInput = fn_getRealDataset("framelock", "input", "../jobs")

### Building model

In [88]:
class InputGAN(object):
    def __init__(self):
        self.fn_makeGenerator()
        self.fn_makeDiscriminator()
        
    def fn_makeGenerator(self):
        self.oSeqGe = tf.keras.Sequential(name="Ge")
        self.oSeqGe.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        self.oSeqGe.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        
        self.oSeqGd = tf.keras.Sequential(name="Gd")
        self.oSeqGd.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        self.oSeqGd.add(tf.keras.layers.Dense(3, activation = tf.keras.activations.sigmoid))
        
        self.oSeqGe1 = tf.keras.Sequential(name="Ge1")
        self.oSeqGe1.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        self.oSeqGe1.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
    def fn_makeDiscriminator(self):
        self.oSeqDInner = tf.keras.Sequential(name="DInner")
        self.oSeqDInner.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        self.oSeqDInner.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        
        self.oSeqD = tf.keras.Sequential([
            self.oSeqDInner,
            tf.keras.layers.Dense(1, activation = tf.keras.activations.sigmoid)
        ], name="D")

In [92]:
tf.keras.backend.clear_session()
oInputGAN = InputGAN()

In [93]:
#strPrefix = time.strftime("run_%Y_%m_%d_%H_%M_%S")
strPrefix = "epoch=20_batchsize=250_relu_change"
oSummaryWriterInputDLoss = tf.summary.create_file_writer("logs/input/" + strPrefix + "D")
oSummaryWriterInputALoss = tf.summary.create_file_writer("logs/input/" + strPrefix + "A")

In [74]:
oOptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [94]:
nStep = 0
for nEphoch in range(20):
    for tensorBatch in oDatasetFramelockInput:
        #if nStep % 100 == 0:
        #   print(nStep)
        # Train discriminator
        with tf.GradientTape() as oGradientTapeD:
            # Generate fake samples
            tensorEncodedSamples = oInputGAN.oSeqGe(tensorBatch)
            tensorFakeSamples = oInputGAN.oSeqGd(tensorEncodedSamples)
        
            tensorTrainingSamples = tf.concat([tensorBatch, tensorFakeSamples], axis=0)
            tensorPreds = oInputGAN.oSeqD(tensorTrainingSamples)
            tensorLabels = tf.constant([[1.]] * tensorBatch.shape[0] + [[0.]] * tensorBatch.shape[0])
            tensorDLoss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(tensorLabels, tensorPreds))
        listDGradients = oGradientTapeD.gradient(tensorDLoss, oInputGAN.oSeqD.trainable_variables)
        oOptimizer.apply_gradients(zip(listDGradients, oInputGAN.oSeqD.trainable_variables))
        with oSummaryWriterInputDLoss.as_default():
            tf.summary.scalar("D_loss", tensorDLoss, nStep)
        
        # Train genertor
        with tf.GradientTape() as oGradientTapeG:
            # Apperant loss
            tensorEncodedSamples = oInputGAN.oSeqGe(tensorBatch)
            tensorFakeSamples = oInputGAN.oSeqGd(tensorEncodedSamples)
            tensorApperantLoss = tf.reduce_mean(tf.losses.mean_absolute_error(tensorBatch, tensorFakeSamples))
            
            # Latent loss
            tensorEncoded1Samples = oInputGAN.oSeqGe1(tensorFakeSamples)
            tensorLatentLoss = \
            tf.reduce_mean(tf.losses.mean_squared_error(tensorEncodedSamples, tensorEncoded1Samples))
            
            # Feature matching loss
            tensorPredFeatures = oInputGAN.oSeqDInner(tensorFakeSamples)
            tensorTrueFeatures = oInputGAN.oSeqDInner(tensorBatch)
            tensorFeatureLoss = tf.reduce_mean(tf.losses.mean_squared_error(tensorPredFeatures, tensorTrueFeatures))
            
            tensorGLoss = tensorApperantLoss + tensorLatentLoss + tensorFeatureLoss
            
        # Insert all generator trainable variables into a list
        listTrainableGVariables = []
        listTrainableGVariables.extend(oInputGAN.oSeqGe.trainable_variables)
        listTrainableGVariables.extend(oInputGAN.oSeqGd.trainable_variables)
        listTrainableGVariables.extend(oInputGAN.oSeqGe1.trainable_variables)
        
        listGGradients = oGradientTapeG.gradient(tensorGLoss, listTrainableGVariables)
        oOptimizer.apply_gradients(zip(listGGradients, listTrainableGVariables))
        with oSummaryWriterInputALoss.as_default():
            tf.summary.scalar("A_loss", tensorGLoss, nStep)
            
        nStep = nStep + 1
            
        