# GANs _Building_ and _Training_

###### Import libraries

In [1]:
import pdb
import os
import shutil

In [2]:
import numpy as np
import pandas as pd

In [3]:
import tensorflow as tf

In [4]:
import sklearn.preprocessing

###### Preprocess training samples of a section

Preprocess the samples from __jobs/JOBXXXX/samples/sections/section/samples.csv__ 
and put them into __jobs/JOBXXXX/samples/sections/section/preprocessed/samples.csv__.

In [5]:
def fn_preprocessJobsSectionSamples(strPart, strJobsDir, strJobsSamplesDir):
    strJobsSectionSamplesDir = os.path.join(strJobsSamplesDir, "sections/" + strPart)
    oMinMaxScaler = fn_makeScaler(strJobsSectionSamplesDir)
    
    for name in os.listdir(strJobsDir):
        strJobDir = os.path.join(strJobsDir, name)
        fn_preprocessJobSectionSamples(strPart, oMinMaxScaler, strJobDir)
    return oMinMaxScaler
    
def fn_makeScaler(strJobsSectionSamplesDir):
    strJobsSectionSamplesFile = os.path.join(strJobsSectionSamplesDir, "samples.csv")
    pdDfJobsSectionSamples = pd.read_csv(strJobsSectionSamplesFile)
    oMinMaxScaler = sklearn.preprocessing.MinMaxScaler()
    oMinMaxScaler.fit(pdDfJobsSectionSamples.values)
    return oMinMaxScaler
def fn_preprocessJobSectionSamples(strPart, oMinMaxScaler, strJobDir):
    strSectionSamplesFile = os.path.join(strJobDir, "samples/sections/" + strPart + "/samples.csv")
    pdDfSectionSamples = pd.read_csv(strSectionSamplesFile)
    npNArrPreprocessedSectionSamples = oMinMaxScaler.transform(pdDfSectionSamples.values)
    pdDfPreprocessedSectionSamples = pd.DataFrame(npNArrPreprocessedSectionSamples, columns=pdDfSectionSamples.columns)
    strPreprocessedSectionSamplesDir = os.path.join(strJobDir, "samples/sections/" + strPart + "/preprocessed")
    if os.path.exists(strPreprocessedSectionSamplesDir):
        shutil.rmtree(strPreprocessedSectionSamplesDir)
    os.mkdir(strPreprocessedSectionSamplesDir)
    strPreprocessedSectionSamplesFile = os.path.join(strPreprocessedSectionSamplesDir, "samples.csv")
    pdDfPreprocessedSectionSamples.to_csv(strPreprocessedSectionSamplesFile, index = False)

###### Get dataset

In [6]:
def fn_splitCsvLine(strLine):
    tensorDefs = [0.] * g_nFeatures
    tensorRecord = tf.io.decode_csv(strLine, record_defaults=tensorDefs)
    tensorX = tf.stack(tensorRecord)
    return tensorX
def fn_getRealDataset(strPart, strJobsDir, nReaders = 5, nReadThreads = 5, nParseTreads = 5, nShuffleBufferSize = 1000):
    oDataset = tf.data.Dataset.list_files(strJobsDir + "/*/samples/sections/" + strPart + "/preprocessed/samples.csv")
    oDataset = oDataset.interleave(lambda strSampleFile: tf.data.TextLineDataset(strSampleFile).skip(1), cycle_length=nReaders, 
                                  num_parallel_calls=nReadThreads)
    oDataset = oDataset.map(fn_splitCsvLine, nParseTreads)
    oDataset = oDataset.shuffle(nShuffleBufferSize)
    return oDataset.batch(g_nBatchSize)

## Input section

set global hyperparameters

In [7]:
g_nBatchSize = 1
g_nFeatures = 3
g_nCodingSize = 30

In [37]:
oMinMaxScalerInput = fn_preprocessJobsSectionSamples("input", "jobs", "samples")

In [8]:
oDatasetInputReal = fn_getRealDataset("input", "jobs")

### Building model

In [135]:
class InputGAN(object):
    def __init__(self):
        self.fn_lossD = tf.losses.binary_crossentropy
        self.fn_lossA = tf.losses.binary_crossentropy
        self.fn_metricD = tf.metrics.binary_accuracy
        self.fn_metricA = tf.metrics.binary_accuracy
        
        self.fn_makeGenerator()
        self.fn_makeDiscriminator()
        self.fn_makeDiscriminatorTrainingModel()
        self.fn_makeAdversariaTrainingModel()
        
    def fn_makeGenerator(self):
        self.oSeqGenerator = tf.keras.Sequential()
        self.oSeqGenerator.add(tf.keras.layers.Dense(64, activation = tf.keras.activations.relu))
        self.oSeqGenerator.add(tf.keras.layers.Dense(g_nFeatures, activation = tf.keras.activations.sigmoid))
    def fn_makeDiscriminator(self):
        self.oSeqDiscriminator = tf.keras.Sequential()
        self.oSeqDiscriminator.add(tf.keras.layers.Dense(64, activation = tf.keras.activations.relu))
        self.oSeqDiscriminator.add(tf.keras.layers.Dense(1, activation = tf.keras.activations.sigmoid))
    
    def fn_makeDiscriminatorTrainingModel(self):
        self.oSeqDiscriminatorModel = tf.keras.Sequential()
        self.oSeqDiscriminatorModel.add(self.oSeqDiscriminator)
        oOptimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
        self.oSeqDiscriminatorModel.compile(loss=self.fn_lossD, optimizer=oOptimizer)
    def fn_makeAdversariaTrainingModel(self):
        self.oSeqAdversarialModel = tf.keras.Sequential()
        self.oSeqAdversarialModel.add(self.oSeqGenerator)
        self.oSeqAdversarialModel.add(self.oSeqDiscriminator)
        oOptimizer = tf.optimizers.Adam(learning_rate=0.01)
        self.oSeqDiscriminator.trainable = False
        self.oSeqAdversarialModel.compile(loss = self.fn_lossA, optimizer=oOptimizer)

In [156]:
tf.keras.backend.clear_session()
oInputGAN = InputGAN()

In [157]:
!rm -rf logs/input/d_64_relu_1_sigmoid_loss
!rm -rf logs/input/a_g_64_relu_3_sigmoid_loss

In [158]:
oSummaryWriterInputDLoss = tf.summary.create_file_writer("logs/input/d_64_relu_1_sigmoid_loss")
oSummaryWriterInputALoss = tf.summary.create_file_writer("logs/input/a_g_64_relu_3_sigmoid_loss")

In [159]:
nStep = 0
for nEphoch in range(10):
    for tensorBatch in oDatasetInputReal:
        tensorNoise = tf.random.normal(shape=[g_nBatchSize,  g_nCodingSize])
        tensorGeneratedSamples = oInputGAN.oSeqGenerator(tensorNoise)
        tensorRealAndFakeSamples = tf.concat([tensorBatch, tensorGeneratedSamples], axis=0)
        tensorLabels = tf.constant([[1.]] * g_nBatchSize + [[0.]] * g_nBatchSize)
        fDLoss = oInputGAN.oSeqDiscriminatorModel.train_on_batch(tensorRealAndFakeSamples, tensorLabels)
        with oSummaryWriterInputDLoss.as_default():
            tf.summary.scalar("discriminator_loss", fDLoss, nStep)
        '''
        with oSummaryWriterInputDAcc.as_default():
            tf.summary.scalar("discriminator_acc", fDAcc, nStep)
        '''
        tensorNoise = tf.random.normal(shape=[g_nBatchSize,  g_nCodingSize])
        tensorLabels = tf.constant([[1.]] * g_nBatchSize)
        fALoss = oInputGAN.oSeqAdversarialModel.train_on_batch(tensorNoise, tensorLabels)
        with oSummaryWriterInputALoss.as_default():
            tf.summary.scalar("adversarial_loss", fALoss, nStep)
        '''
        with oSummaryWriterInputAAcc.as_default():
            tf.summary.scalar("adversarial_acc", fAAcc, nStep)
        '''
        nStep += 1

In [63]:
tf.random.normal(shape = [2, 3])

<tf.Tensor: id=28609693, shape=(2, 3), dtype=float32, numpy=
array([[-0.07572332, -0.94126445,  0.08374636],
       [-0.5398398 , -0.97281134,  1.2739346 ]], dtype=float32)>