# GANs _Building_ and _Training_

###### Import libraries

In [1]:
import pdb
import os
import shutil
import time

In [2]:
import numpy as np
import pandas as pd

In [3]:
import tensorflow as tf

In [4]:
import sklearn.preprocessing

###### Preprocess training samples of a section

Construct MinMaxScaler using training set, then use this MinMaxScaler transform training set and test set putting

them into corresponding "preprocessed" directory. 

After preprocessing, the structure of jobs directory would be like below:

- JOBXXXX
    - status
        - parts
            - part
                - section
                    - samples
                        - normal
                            - train
                                - status.csv
                                - preprocessed
                                    - status.csv
                            - test
                                - status.csv
                                - preprocessed
                                    - status.csv
        - raw
        - valid

In [5]:
def fn_makeMinMaxScaler(strPart, strSection, strJobsDir):
    oMinMaxScaler = sklearn.preprocessing.MinMaxScaler()
    pdDfSectionsTrainSamples = fn_getSectionsTrainSamples(strPart, strSection, strJobsDir)
    oMinMaxScaler.fit(pdDfSectionsTrainSamples.values)
    return oMinMaxScaler
def fn_getSectionsTrainSamples(strPart, strSection, strJobsDir):
    listPdDfSectionTrainSamples = []
    for job in os.listdir(strJobsDir):
        strJobDir = os.path.join(strJobsDir, job)
        listStrStatusDirs = [os.path.join(strJobDir, name) for name in os.listdir(strJobDir) if "Demod" in name]
        for strStatusDir in listStrStatusDirs:
            strSectionTrainSamplesFile = os.path.join(strStatusDir, "parts/" + strPart + "/" + strSection
                                                    + "/samples/normal/train/samples.csv")
            pdDfSectionTrainSamples = pd.read_csv(strSectionTrainSamplesFile)
            listPdDfSectionTrainSamples.append(pdDfSectionTrainSamples)
    pdDfSectionsTrainSamples = pd.concat(listPdDfSectionTrainSamples)
    return pdDfSectionsTrainSamples
        
def fn_preprocessSectionTrainSamples(strPart, strSection, oMinMaxScaler, strJobsDir):
    for job in os.listdir(strJobsDir):
        strJobDir = os.path.join(strJobsDir, job)
        listStrStatusDirs = [os.path.join(strJobDir, name) for name in os.listdir(strJobDir) if "Demod" in name]
        for strStatusDir in listStrStatusDirs:
            strSectionTrainSamplesFile = os.path.join(strStatusDir, "parts/" + strPart + "/" + strSection +
                                                    "/samples/normal/train/samples.csv")
            pdDfSectionTrainSamples = pd.read_csv(strSectionTrainSamplesFile)
            strPreprocessedSectionTrainSamplesDir = os.path.join(os.path.split(strSectionTrainSamplesFile)[0], "preprocessed")
            if os.path.exists(strPreprocessedSectionTrainSamplesDir):
                shutil.rmtree(strPreprocessedSectionTrainSamplesDir)
            os.mkdir(strPreprocessedSectionTrainSamplesDir)
            
            """
            Since empty section train samples will cause a error in oMinMaxScaler, check it first.
            If section train samples is emtpy, the corresponding preprocessed directory contains 
            an empty dataframe with same columns.
            """
            if not pdDfSectionTrainSamples.empty:
                pdDfPreprocessedSectionTrainSamples = \
                pd.DataFrame(data = oMinMaxScaler.transform(pdDfSectionTrainSamples.values), 
                             columns = pdDfSectionTrainSamples.columns)
            else:
                pdDfPreprocessedSectionTrainSamples = \
                pd.DataFrame(columns=pdDfSectionTrainSamples.columns)
                
            strPreprocessedSectionTrainSamplesFile = os.path.join(strPreprocessedSectionTrainSamplesDir, 
                                                                 "samples.csv")
            pdDfPreprocessedSectionTrainSamples.to_csv(strPreprocessedSectionTrainSamplesFile, index = False)

###### Get dataset

In [6]:
def fn_splitCsvLine(strLine):
    tensorDefs = [0.] * g_nFeatures
    tensorRecord = tf.io.decode_csv(strLine, record_defaults=tensorDefs)
    tensorX = tf.stack(tensorRecord)
    return tensorX
def fn_getRealDataset(strPart, strSection, strJobsDir, 
                      nReaders = 5, nReadThreads = 5, nParseTreads = 5, nShuffleBufferSize = 1000):
    oDataset = tf.data.Dataset.list_files(strJobsDir + "/*/*/parts/" + strPart + "/"
                                         + strSection + "/samples/normal/train/preprocessed/samples.csv")
    oDataset = oDataset.interleave(lambda strSamplesFile: tf.data.TextLineDataset(strSamplesFile).skip(1), cycle_length=nReaders, 
                                  num_parallel_calls=nReadThreads)
    oDataset = oDataset.map(fn_splitCsvLine, nParseTreads)
    oDataset = oDataset.shuffle(nShuffleBufferSize)
    return oDataset.batch(g_nBatchSize)

## Input section

In [7]:
oMinMaxScalerInput = fn_makeMinMaxScaler("framelock", "input", "../jobs")

In [8]:
fn_preprocessSectionTrainSamples("framelock", "input", oMinMaxScaler, "../jobs")

set global hyperparameters

In [8]:
g_nBatchSize = 5
g_nFeatures = 3
oDatasetFramelockInput = fn_getRealDataset("framelock", "input", "../jobs")

### Building model

In [9]:
class InputGAN(object):
    def __init__(self):
        #self.fn_lossD = tf.losses.binary_crossentropy
        #self.fn_lossA = tf.losses.binary_crossentropy
        #self.fn_metricD = tf.metrics.binary_accuracy
        #self.fn_metricA = tf.metrics.binary_accuracy
        
        self.fn_makeGenerator()
        self.fn_makeDiscriminator()
        #self.fn_makeDiscriminatorTrainingModel()
        #self.fn_makeAdversariaTrainingModel()
        
    def fn_makeGenerator(self):
        self.oSeqGe = tf.keras.Sequential()
        self.oSeqGe.add(tf.keras.layers.Dense(g_nFeatures, activation = tf.keras.activations.relu))
        self.oSeqGe.add(tf.keras.layers.Dense(2))
        
        self.oSeqGd = tf.keras.Sequential()
        self.oSeqGd.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        self.oSeqGd.add(tf.keras.layers.Dense(g_nFeatures, activation = tf.keras.activations.sigmoid))
        
        self.oSeqGe1 = tf.keras.models.clone_model(self.oSeqGe)
        
        self.oSeqG = tf.keras.Sequential([
            self.oSeqGe,
            self.oSeqGd,
            self.oSeqGe1
        ])
    def fn_makeDiscriminator(self):
        self.oSeqDInner = tf.keras.Sequential()
        self.oSeqDInner.add(tf.keras.layers.Dense(2, activation = tf.keras.activations.relu))
        
        self.oSeqDOutput = tf.keras.Sequential()
        self.oSeqDOutput.add(tf.keras.layers.Dense(1, activation = tf.keras.activations.sigmoid))
    
        self.oSeqD = tf.keras.Sequential([
            self.oSeqDInner,
            self.oSeqDOutput
        ])
"""
    def fn_makeDiscriminatorTrainable(self):
        self.oSeqDiscriminatorModel = tf.keras.Sequential()
        self.oSeqDiscriminatorModel.add(self.oSeqDiscriminator)
        oOptimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
        self.oSeqDiscriminatorModel.compile(loss=self.fn_lossD, optimizer=oOptimizer)
    def fn_makeAdversariaTrainingModel(self):
        self.oSeqAdversarialModel = tf.keras.Sequential()
        self.oSeqAdversarialModel.add(self.oSeqGenerator)
        self.oSeqAdversarialModel.add(self.oSeqDiscriminator)
        oOptimizer = tf.optimizers.Adam(learning_rate=0.01)
        self.oSeqDiscriminator.trainable = False
        self.oSeqAdversarialModel.compile(loss = self.fn_lossA, optimizer=oOptimizer)
"""

'\n    def fn_makeDiscriminatorTrainable(self):\n        self.oSeqDiscriminatorModel = tf.keras.Sequential()\n        self.oSeqDiscriminatorModel.add(self.oSeqDiscriminator)\n        oOptimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)\n        self.oSeqDiscriminatorModel.compile(loss=self.fn_lossD, optimizer=oOptimizer)\n    def fn_makeAdversariaTrainingModel(self):\n        self.oSeqAdversarialModel = tf.keras.Sequential()\n        self.oSeqAdversarialModel.add(self.oSeqGenerator)\n        self.oSeqAdversarialModel.add(self.oSeqDiscriminator)\n        oOptimizer = tf.optimizers.Adam(learning_rate=0.01)\n        self.oSeqDiscriminator.trainable = False\n        self.oSeqAdversarialModel.compile(loss = self.fn_lossA, optimizer=oOptimizer)\n'

In [10]:
tf.keras.backend.clear_session()
oInputGAN = InputGAN()

In [11]:
strPrefix = time.strftime("run_%Y_%m_%d_%H_%M_%S")
oSummaryWriterInputDLoss = tf.summary.create_file_writer("logs/input/" + strPrefix + "D")
oSummaryWriterInputALoss = tf.summary.create_file_writer("logs/input/" + strPrefix + "A")

In [12]:
oOptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [29]:
nStep = 0
for nEphoch in range(10):
    for tensorBatch in oDatasetFramelockInput:
        # Train discriminator
        with tf.GradientTape() as oGradientTapeD:
            # Generate fake samples
            tensorEncodedSamples = oInputGAN.oSeqGe(tensorBatch)
            tensorFakeSamples = oInputGAN.oSeqGd(tensorEncodedSamples)
        
            tensorTrainingSamples = tf.concat([tensorBatch, tensorFakeSamples], axis=0)
            tensorPreds = oInputGAN.oSeqD(tensorTrainingSamples)
            tensorLabels = tf.constant([[1.]] * g_nBatchSize + [[0.]] * g_nBatchSize)
            tensorDLoss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(tensorLabels, tensorPreds))
        listDGradients = oGradientTapeD.gradient(tensorDLoss, oInputGAN.oSeqD.trainable_variables)
        oOptimizer.apply_gradients(zip(listDGradients, oInputGAN.oSeqD.trainable_variables))
        with oSummaryWriterInputDLoss.as_default():
            tf.summary.scalar("D_loss", tensorDLoss, nStep)
        
        # Train genertor
        with tf.GradientTape() as oGradientTapeG:
            # Apperant loss
            tensorApperantLoss = tf.reduce_mean(tf.losses.mean_absolute_error(tensorBatch, tensorFakeSamples))
            
            # Latent loss
            tensorEncoded1Samples = oInputGAN.oSeqGe1(tensorFakeSamples)
            tensorLatentLoss = \
            tf.reduce_mean(tf.losses.mean_squared_error(tensorEncodedSamples, tensorEncoded1Samples))
            
            # Feature matching loss
            tensorPredFeatures = oInputGAN.oSeqDInner(tensorFakeSamples)
            tensorTrueFeatures = oInputGAN.oSeqDInner(tensorBatch)
            tensorFeatureLoss = tf.reduce_mean(tf.losses.mean_squared_error(tensorPredFeatures, tensorTrueFeatures))
            
            tensorGLoss = tensorApperantLoss + tensorLatentLoss + tensorFeatureLoss
        listGGradients = oGradientTapeG.gradient(tensorGLoss, oInputGAN.oSeqG.trainable_variables)
        oOptimizer.apply_gradients(zip(listGGradients, oInputGAN.oSeqG.trainable_variables))
        with oSummaryWriterInputALoss.as_default():
            tf.summary.scalar("A_loss", tensorGLoss, nStep)
            
        nStep = nStep + 1
            
        """
        tensorNoise = tf.random.normal(shape=[g_nBatchSize,  g_nCodingSize])
        tensorGeneratedSamples = oInputGAN.oSeqGenerator(tensorNoise)
        tensorRealAndFakeSamples = tf.concat([tensorBatch, tensorGeneratedSamples], axis=0)
        tensorLabels = tf.constant([[1.]] * g_nBatchSize + [[0.]] * g_nBatchSize)
        fDLoss = oInputGAN.oSeqDiscriminatorModel.train_on_batch(tensorRealAndFakeSamples, tensorLabels)
        with oSummaryWriterInputDLoss.as_default():
            tf.summary.scalar("discriminator_loss", fDLoss, nStep)
        '''
        with oSummaryWriterInputDAcc.as_default():
            tf.summary.scalar("discriminator_acc", fDAcc, nStep)
        '''
        tensorNoise = tf.random.normal(shape=[g_nBatchSize,  g_nCodingSize])
        tensorLabels = tf.constant([[1.]] * g_nBatchSize)
        fALoss = oInputGAN.oSeqAdversarialModel.train_on_batch(tensorNoise, tensorLabels)
        with oSummaryWriterInputALoss.as_default():
            tf.summary.scalar("adversarial_loss", fALoss, nStep)
        '''
        with oSummaryWriterInputAAcc.as_default():
            tf.summary.scalar("adversarial_acc", fAAcc, nStep)
        '''
        nStep += 1
        """

ValueError: Weights for model sequential_2 have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.

In [63]:
tf.random.normal(shape = [2, 3])

<tf.Tensor: id=28609693, shape=(2, 3), dtype=float32, numpy=
array([[-0.07572332, -0.94126445,  0.08374636],
       [-0.5398398 , -0.97281134,  1.2739346 ]], dtype=float32)>