In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pathlib
import sqlite3
import time
import os
from tensorflow.keras.layers import Dense, Conv1D, Conv1DTranspose, MaxPooling1D, LeakyReLU, BatchNormalization, Flatten


In [2]:
BASE_FOLDER = pathlib.Path(".\\data\\Datasets\\Phase")
DATABASE_FILE = BASE_FOLDER / "database_phase_normalized.db"
MODEL_SAVE_FOLDER = BASE_FOLDER / "model_saves/V3"
TRAIN_TABLE_NAME = "Train"
VALIDATION_TABLE_NAME = "Validation"
TEST_TABLE_NAME = "Test"
LEN_TRAIN = 9216                  ### Update on dataset change ###
LEN_TEST = 6567                   ### Update on dataset change ###
LEN_VAL = 2985                    ### Update on dataset change ###
BATCH_SIZE = 512
EPOCH_NUM = 10

DESCRIMINATOR_PATH = MODEL_SAVE_FOLDER / "new try at 2021-09-01 00 29\\epoch 300 at 2021-09-01 00 37\\discriminator loss 0.5791367292404175"
ENCODER_PATH = MODEL_SAVE_FOLDER / "new try at 2021-09-01 00 29\\epoch 300 at 2021-09-01 00 37\\auto_encoder loss 0.823987603187561"

In [3]:
class DataGenerator(tf.keras.utils.Sequence):
    '''Generates data for Keras'''
    def __init__(self, sql_file, table_name, len_of_table, feature_or_lable="feature", batch_size=128, shuffle=True):
        '''Initialization'''
        self.sql_file = sql_file
        self.table_name = table_name
        self.len_of_table = len_of_table
        self.feature_or_lable = feature_or_lable
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        '''Denotes the number of batches per epoch'''
        return int(np.floor(self.len_of_table / self.batch_size))
    
    def create_connect_database(self, db_file):
        """ create a database connection to a SQLite database """
        conn = None
        try:
            conn = sqlite3.connect(db_file)
        except sqlite3.Error as e:
            print(e)
        if conn:
                return conn
    
    def fetch_data_by_index(self, indexes_to_get, conn):
        '''Interacts with db file to get rows by index'''
        cond = "myindex IN ("
        for ind in indexes_to_get:
            cond += f"{ind},"
        cond = cond[:-1] + ")"
        
        request_sql = f"SELECT * FROM '{self.table_name}' WHERE " + cond
        #print(request_sql)
        fetched_data = pd.read_sql(request_sql, conn, index_col="myindex")
        return fetched_data


    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        if (index + 1) * self.batch_size <= self.len_of_table:
            indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
        elif (index + 1) * self.batch_size > self.len_of_table:
            indexes = self.indexes[index * self.batch_size : self.len_of_table]
            
        # Load data, clean it and create lables
        connection = self.create_connect_database(self.sql_file)
        data_batch = self.fetch_data_by_index(indexes, connection)
        data_batch = data_batch.sample(frac=1)
        label_batch = np.empty((len(indexes)))
        for i in range(len(data_batch)):
                if data_batch["label"].iloc[i] == 0:
                    label_batch[i] = 0
                else:
                    label_batch[i] = 1
        data_batch.drop(["folder_name", "file_name", "label"] , axis=1, inplace=True)
        feature_batch = np.expand_dims(np.array(data_batch, dtype=np.float32), axis=2)
            
        if self.feature_or_lable == "feature":
            return feature_batch
        elif self.feature_or_lable == "label":
            return label_batch
        elif self.feature_or_lable == "both":
            return feature_batch, label_batch

    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(self.len_of_table)
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            print("shuffle worked")


In [43]:
# Main model trainer
class Ganamoly:
    def __init__(self, model_save_folder, generator_class, batch_size):
        self.input_shape = (122, 1,) 
        self.model_save_folder = model_save_folder
        self.generator_class = generator_class
        self.batch_size = batch_size
        
        # Loss function
        self.loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
        # Create Discriminator
        self.discriminator = self.build_discriminator(load=False, load_path=DESCRIMINATOR_PATH)
        self.discriminator.summary()

        #create Auto-Encoder
        self.auto_encoder = self.build_auto_encoder(load=False, load_path=ENCODER_PATH)
        self.auto_encoder.summary()
        
        
    def build_discriminator(self, load=False, load_path=None):
            if load:
                model = tf.keras.models.load_model(load_path)
            else:
                input_layer_discriminator = tf.keras.Input(shape = self.input_shape)
                conv1 = Conv1D(filters=8, kernel_size=5, strides=3, padding='valid', activation='relu')(input_layer_discriminator)
                conv1 = MaxPooling1D(2)(conv1)
                conv1 = BatchNormalization()(conv1)
                conv2 = Conv1D(filters=16, kernel_size=3, padding='valid', activation='relu')(conv1)
                conv2 = BatchNormalization()(conv2)
                conv2 = MaxPooling1D(2)(conv2)
                dense = Flatten()(conv2)
                dense = Dense(12, activation='relu')(dense)
                dense = Dense(12, activation='relu')(dense)
                discriminator = Dense(1, activation='sigmoid')(dense)
                model = tf.keras.Model(input_layer_discriminator, discriminator, name="discriminator_model")
            return model
        
    def build_auto_encoder(self, load=False, load_path=None):
        if load:
            model = tf.keras.models.load_model(load_path)
        else:
            input_layer_encoder = tf.keras.Input(shape = self.input_shape)
            conv1 = Conv1D(filters=6, kernel_size=5, strides=2, padding='valid')(input_layer_encoder)
            conv1 = LeakyReLU()(conv1)
            conv2 = Conv1D(filters=12, kernel_size=5, strides=2, padding='valid')(conv1)
            conv2 = LeakyReLU()(conv2)
            conv2 = BatchNormalization()(conv2)
            conv3 = Conv1D(filters=24, kernel_size=3, strides=2, padding='valid')(conv2)
            conv3 = LeakyReLU()(conv3)
            self.encoder = BatchNormalization(name='encoder')(conv3)
            
            # Decoder
            convt1 = Conv1DTranspose(filters=12, kernel_size=5, strides=2, padding='valid', activation='relu')(self.encoder)
            convt1 = BatchNormalization()(convt1)
            convt2 = Conv1DTranspose(filters=6, kernel_size=5, strides=2, padding='valid', activation='relu')(convt1)
            convt2 = BatchNormalization()(convt2)
            decoder = Conv1DTranspose(filters=1, kernel_size=5, strides=2, padding='same', activation='sigmoid', name='decoder')(convt2)

            model = tf.keras.Model(inputs=input_layer_encoder, outputs=decoder, name="auto_encoder")
        return model
    

    
    @tf.function
    def train_step(self, one_batch):
        # Sample random points from generator as fake signal
        generated_sig = self.auto_encoder(one_batch)
        # Combine them with real signal
        combined_sig = tf.concat([generated_sig, one_batch], axis=0)
        # Assemble labels discriminating real(0) from fake(1) 
        labels = tf.concat([tf.ones((self.batch_size, 1)), tf.zeros((self.batch_size, 1))], axis=0)

        # Train the discriminator
        with tf.GradientTape() as tape:
            predictions = self.discriminator(combined_sig)
            d_loss = self.loss_fn(labels, predictions)
        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
        self.d_opt.apply_gradients(zip(grads, self.discriminator.trainable_weights))

        # Assemble labels that say "signals are real(0)"
        misleading_labels = tf.zeros((self.batch_size, 1))

        # Train the generator (note that we should *not* update the weights of the discriminator)!
        with tf.GradientTape() as tape:
            predictions = self.discriminator(self.auto_encoder(one_batch))
            g_loss = self.loss_fn(misleading_labels, predictions)
        grads = tape.gradient(g_loss, self.auto_encoder.trainable_weights)
        self.g_opt.apply_gradients(zip(grads, self.auto_encoder.trainable_weights))
        return d_loss, g_loss
        
        
    def train(self, epochs, save_interval=50, d_rate=0.004, g_rate=0.004):
        # Two optimizers for generator and discriminator
        self.d_opt = tf.keras.optimizers.SGD(learning_rate=d_rate)
        self.g_opt = tf.keras.optimizers.SGD(learning_rate=g_rate)
        
        for epoch in range(epochs):
            X_train = self.load_data(batch_size=self.batch_size, fchr_or_lbl="feature")
            for step, one_batch in enumerate(X_train):
                d_loss, g_loss = self.train_step(one_batch)
                if step % 8 == 0:
                    # Print metrics every 8 steps
                    print("epoch ", epoch)
                    print("discriminator loss at step %d: %.2f" % (step, d_loss))
                    print("a e loss at step %d: %.2f" % (step, g_loss))
            # If at save interval => save modeld
            if epoch % save_interval == 0:
                self.save_models(epoch, g_loss, d_loss)

    def save_models(self, epoch, encoder_loss, discriminator_loss):
        folder_name = self.model_save_folder / (f"epoch {epoch} at " + str(time.strftime("%Y-%m-%d %H %M")))
        if not os.path.isdir(folder_name):
            os.mkdir(folder_name)

        with open(folder_name/f"auto-encoder summary.txt", "w") as sum_file:
            self.auto_encoder.summary(print_fn=lambda x: sum_file.write(x + '\n'))
        self.auto_encoder.save(str(folder_name / f"auto_encoder loss {encoder_loss}"))

        with open(folder_name/f"discriminator summary.txt", "w") as sum_file:
            self.discriminator.summary(print_fn=lambda x: sum_file.write(x + '\n'))
        self.discriminator.save(str(folder_name / f"discriminator loss {discriminator_loss}"))

                                    
    def load_data(self, batch_size, fchr_or_lbl):
        data_generated = self.generator_class(sql_file=DATABASE_FILE, table_name=TRAIN_TABLE_NAME, len_of_table=LEN_TRAIN,
                                       feature_or_lable=fchr_or_lbl, batch_size=batch_size, shuffle=True)
        return data_generated

In [44]:
save_folder_name = MODEL_SAVE_FOLDER / ("new try at " + str(time.strftime("%Y-%m-%d %H %M")))
if not os.path.isdir(save_folder_name):
    os.mkdir(save_folder_name)

ganamoly = Ganamoly(save_folder_name, generator_class = DataGenerator, batch_size = 512)
#ganamoly.train(epochs=1000+1, save_interval=50)

Model: "discriminator_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        [(None, 122, 1)]          0         
_________________________________________________________________
conv1d_25 (Conv1D)           (None, 40, 8)             48        
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, 20, 8)             0         
_________________________________________________________________
batch_normalization_25 (Batc (None, 20, 8)             32        
_________________________________________________________________
conv1d_26 (Conv1D)           (None, 18, 16)            400       
_________________________________________________________________
batch_normalization_26 (Batc (None, 18, 16)            64        
_________________________________________________________________
max_pooling1d_11 (MaxPooling (None, 9, 16)     

In [None]:
# seperate training of the same model
ganamoly.train(epochs=551, save_interval=50, d_rate=0.005, g_rate=0.005)

In [36]:
#testing the valid
valid_dataset = DataGenerator(sql_file=DATABASE_FILE, table_name=TEST_TABLE_NAME, len_of_table=LEN_TEST,
                                       feature_or_lable="both", batch_size=512, shuffle=True)

shuffle worked


In [49]:
trues = 0
for fchr, lbl in valid_dataset:   
    result = ganamoly.discriminator.predict(fchr)
    mean_result = (result - min(result)) / (max(result) - min(result))   #normalizing the output to see bigger contrast in results
    #trues = 0
    for i in range(512):
        if mean_result[i] >= 0.5 and lbl[i] == 1:
            trues += 1
        if mean_result[i] < 0.5 and lbl[i] == 0:
            trues += 1
        #print(result[i]," ", lbl[i])               # actual result
        #print(mean_result[i], "  ", lbl[i])       # normalized result
    #print(trues / 512)
print("all",trues/(512 * len(valid_dataset)))

all 0.4954427083333333
