In [None]:
import tensorflow as tf
import numpy as np
import random
import time
import sys
import os

In [None]:
class Generator:
    #initialize variables
    def __init__(self, dataset_path, shape, shuffle=True):
        self.dataset_path = dataset_path
        self.datapaths = list()
        self.labels = list()
        self.read_samples()
        self.num_sample = len(self.datapaths)
        self.shape = shape
        self.shuffle=shuffle
      
    #populate lists with csv file paths and their corresponding labels from dataset dir
    #dataset folder should be organised as following
    #data_path
    #    class 1
    #        sample 1
    #        sample 2
    #            .
    #            .
    #            .
    #    class 2
    #        sample 1
    #        sample 2
    #            .
    #            .
    #            .
    #
    def read_samples(self):
        label = 0

        classes = sorted(os.walk(self.dataset_path).__next__()[1])

        # List each sub-directory (the classes)
        for c in classes:
            c_dir = os.path.join(self.dataset_path, c)
            walk = os.walk(c_dir).__next__()
            # Add each image to the training set
            for sample in walk[2]:
                # Only keeps csv samples
                if sample.endswith('.csv'):
                    self.datapaths.append(os.path.join(c_dir, sample))
                    self.labels.append(label)
            label += 1
            
    #shuffle data paths along with their labels together
    def combined_shuffle(self):
        combined = list(zip(self.datapaths, self.labels ))
        random.shuffle(combined)
        self.datapaths[:], self.labels [:] = zip(*combined)
    
    #function called by the tensorflow object
    #returns pairs of numpy arrays read from csv files along with their labels
    def __iter__(self):
        self.combined_shuffle()
        for i in range(self.num_sample):
            data = np.loadtxt(open(self.datapaths[i], "rb"), delimiter=",")
            data_mid = int(data.shape[0]/2)
            data = data[data_mid-int(self.shape[0]/2):data_mid+int(self.shape[0]/2)]
            label = self.labels[i]
            if data.shape == self.shape:
                yield data, label
            else:
                continue

In [None]:
def get_time(time):
    if time < 60:
        string = "{:.0f}s".format(time)
    elif time < 3600:
        string = "{:.0f}m".format(time/60)
    else:
        string = "{:.2f}h".format(time/60/60)
    
    return string

In [None]:
# see https://github.com/tensorflow/models/blob/master/tutorials/image/cifar10/cifar10_multi_gpu_train.py#L101
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):

        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = [g for g, _ in grad_and_vars]
        grad = tf.reduce_mean(grads, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [None]:
#Seq2Seq Autoencoder with a single LSTM for both encoder and decoder,
#along with optional rollout for the decoder LSTM
def autoencoder(i, inputs, rollout):
    
    def encoder_loop_fn(time, cell_output, cell_state, loop_state):
        emit_output = cell_output  # == None for time == 0
        
        if cell_output is None:  # time == 0
            next_cell_state = encoder_cell.zero_state(batch_size, tf.float32)
        else:
            next_cell_state = cell_state
            
        elements_finished = (time >= sequence_length)
        finished = tf.reduce_all(elements_finished)
        
        next_input = tf.cond(finished,
                             lambda: tf.zeros([batch_size, input_width], dtype=tf.float32),
                             lambda: inputs_ta.read(time))
        
        next_loop_state = None
        return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)


    def decoder_loop_fn(time, cell_output, cell_state, loop_state):         
        emit_output = cell_output
        
        if cell_output is None:  # time == 0
            next_cell_state = encoder_cell_states
            next_input = tf.zeros([batch_size, input_width], dtype=tf.float32)
        else:
            next_cell_state = cell_state
            next_input = tf.cond(rollout,
                                 lambda: tf.layers.dense(cell_output, input_width, activation=tf.nn.sigmoid, name="FC1", reuse=tf.AUTO_REUSE), 
                                 lambda: inputs_ta.read(time-1))
            
        elements_finished = (time >= sequence_length)
        
        next_loop_state = None
        return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)

    with tf.name_scope('autoencoder_{}'.format(i)):
        #convert sample into tensor array
        inputs_ta = tf.TensorArray(dtype=tf.float32, size=sequence_length, clear_after_read=False)
        inputs_ta = inputs_ta.unstack(inputs)

        with tf.name_scope('encoder'):
            #encoder lstm
            encoder_cell = tf.contrib.rnn.LSTMCell(lstm_size)
            _, encoder_cell_states, _ = tf.nn.raw_rnn(encoder_cell, encoder_loop_fn)

        with tf.name_scope('decoder'):
            #decoder lstm
            decoder_cell = tf.contrib.rnn.LSTMCell(lstm_size)
            decoder_hidden_states_ta, _, _ = tf.nn.raw_rnn(decoder_cell, decoder_loop_fn)

        #convert lstm output array into a tensor
        outputs = decoder_hidden_states_ta.stack()
        outputs = tf.layers.dense(outputs, input_width, activation=tf.nn.sigmoid, name="FC1", reuse=tf.AUTO_REUSE)

    with tf.name_scope("loss_{}".format(i)):       
        loss = tf.reduce_mean(tf.square(inputs-outputs))
    
    return outputs, encoder_cell_states, loss

In [None]:
def model():
    global_step = tf.train.get_or_create_global_step()
    learning_rate = tf.train.exponential_decay(lr, 
                                               global_step, 
                                               decay_steps, 
                                               decay_rate, 
                                               staircase=True) 
    tf.summary.scalar("learning_rate", learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

    rollout = tf.constant(rollout_, dtype=tf.bool)
    
    dataset = tf.data.Dataset().from_generator(lambda: gen, output_types=(tf.float32, tf.int32)).prefetch(2 * batch_size * num_gpus).batch(batch_size).repeat(count=None)
    iterator = dataset.make_one_shot_iterator()  

    tower_grads = []
    losses = []

    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
        for i in range(num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('Tower_%d' % (i)) as scope:
                    # Dequeues one batch for the GPU
                    inputs, labels = iterator.get_next()
                    inputs = tf.transpose(inputs, perm=[2, 0, 1])
                    inputs.set_shape([sequence_length, batch_size, input_width])

                    loss, encoder_cell_states = autoencoder(i, inputs, rollout)
                    grads = opt.compute_gradients(loss)
                    tower_grads.append(grads)
                    losses.append(loss)

    gradients = average_gradients(tower_grads)
    global_step = tf.train.get_or_create_global_step()
    apply_gradient_op = opt.apply_gradients(gradients, global_step)
    
    avg_loss = tf.reduce_mean(losses)
    tf.summary.scalar("avg_loss", avg_loss) 

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
    merged = tf.summary.merge_all()
    init = tf.global_variables_initializer()
    
    return init, merged, saver, avg_loss, apply_gradient_op, encoder_cell_states, labels

In [None]:
##Training
data_path = "/home/kalvik/shared/CSI_DATA/preprocessed_final/train"
weight_path = "/home/kalvik/shared/autoencoder/weights/mse/"
tensorboard_path = "/home/kalvik/shared/autoencoder/tensorboard/train_mse_8000_4000"
sequence_length = 540
input_width = 8000 
decay_rate = 0.9
lstm_size = 4000
rollout_ = False
batch_size = 8
save_epoch = 5
num_gpus = 4
epochs = 5
lr = 5E-4
gen = Generator(data_path, (input_width, sequence_length), shuffle=True)
data_steps = int(gen.num_sample//(batch_size*num_gpus))
decay_steps = data_steps

tf.reset_default_graph()
with tf.Graph().as_default(), tf.device('/cpu:0'):
    init, merged, saver, avg_loss, apply_gradient_op, encoded_data, encoded_labels = model()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if tf.train.latest_checkpoint(weight_path) != None:
            saver.restore(sess, tf.train.latest_checkpoint(weight_path))
        else:
            sess.run(init)
            
        writer = tf.summary.FileWriter(tensorboard_path, sess.graph)

        for epoch in range(1, epochs + 1):
            print("\nEpoch {}/{}".format(epoch, epochs))
            batch_time = []
            epoch_time = time.time()
    
            for step in range(1, data_steps + 1):
                time_start = time.time()
                _, batch_loss, summary = sess.run([apply_gradient_op, avg_loss, merged])
                batch_time.append(time.time()-time_start)
                
                writer.add_summary(summary, ((epoch-1) * data_steps) + step)
                
                if (step == data_steps):
                    sys.stdout.write("\r - {}/{} - {} - loss: {:.4f}".format(step, data_steps, get_time(time.time()-epoch_time), batch_loss))
                else:
                    sys.stdout.write("\r - {}/{} - {} - loss: {:.4f}".format(step, data_steps, get_time(np.mean(batch_time)*(data_steps-step)), batch_loss))
                sys.stdout.flush()

            #save model
            if epoch % save_epoch == 0:
                saver.save(sess, os.path.join(weight_path, "autoencoder_loss-{}_epoch-{}".format(batch_loss, epoch)))

        #save model
        saver.save(sess, os.path.join(weight_path, "autoencoder_loss-{}_epoch-{}".format(batch_loss, epoch)))

        print("\nFinished!")          

In [None]:
##Testing
data_path = "/home/kalvik/shared/CSI_DATA/preprocessed_final/test"
weight_path = "/home/kalvik/shared/autoencoder/weights/mse/"
tensorboard_path = "/home/kalvik/shared/autoencoder/tensorboard/test_mse_rollout"
sequence_length = 540
input_width = 8000 
decay_rate = 0.9
lstm_size = 4000
rollout_ = True
batch_size = 8
num_gpus = 4
lr = 1E-4
gen = Generator(data_path, (input_width, sequence_length), shuffle=True)
data_steps = int(gen.num_sample//(batch_size*num_gpus))
decay_steps = data_steps

tf.reset_default_graph()
with tf.Graph().as_default(), tf.device('/cpu:0'):
    init, merged, saver, avg_loss, apply_gradient_op, encoded_data, encoded_labels = model()
    
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        saver.restore(sess, tf.train.latest_checkpoint(weight_path))
        writer = tf.summary.FileWriter(tensorboard_path, sess.graph)
        batch_time = []
        print("Started Testing")

        for step in range(1, data_steps + 1):
            time_start = time.time()
            batch_loss, summary = sess.run([avg_loss, merged])
            batch_time.append(time.time()-time_start)

            writer.add_summary(summary, step)

            if (step == data_steps):
                sys.stdout.write("\r - {}/{} - {} - loss: {:.4f}".format(step, data_steps, get_time(time.time()-epoch_time), batch_loss))
            else:
                sys.stdout.write("\r - {}/{} - {} - loss: {:.4f}".format(step, data_steps, get_time(np.mean(batch_time)*(data_steps-step)), batch_loss))
            sys.stdout.flush()

        print("\nFinished!")          

In [None]:
##Sampling
data_path = "/home/kalvik/shared/CSI_DATA/preprocessed_final/test"
weight_path = "/home/kalvik/shared/autoencoder/weights/mse/"
sequence_length = 540
input_width = 8000 
decay_rate = 0.9
lstm_size = 4000
rollout_ = True
batch_size = 10
num_gpus = 1
lr = 1E-4
gen = Generator(data_path, (input_width, sequence_length), shuffle=False)
data_steps = int(gen.num_sample//(batch_size*num_gpus))
decay_steps = data_steps

X_data, y_data = list(), list()

tf.reset_default_graph()
with tf.Graph().as_default(), tf.device('/cpu:0'):
    init, merged, saver, avg_loss, apply_gradient_op, encoded_data, encoded_labels = model()
    
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        saver.restore(sess, tf.train.latest_checkpoint(weight_path))
        batch_time = []
        print("Started Sampling")

        for step in range(1, data_steps + 1):
            time_start = time.time()
            data_temp, labels_temp = sess.run([encoded_data, encoded_labels])
            batch_time.append(time.time()-time_start)

            X_data.append(data_temp)
            y_data.append(labels_temp)

            if (step == data_steps):
                sys.stdout.write("\r - {}/{} - {}".format(step, data_steps, get_time(time.time()-epoch_time)))
            else:
                sys.stdout.write("\r - {}/{} - {}".format(step, data_steps, get_time(np.mean(batch_time)*(data_steps-step))))
            sys.stdout.flush()

        print("\nFinished!")      

In [None]:
data_path = "/home/kalvik/shared/CSI_DATA/preprocessed_final/train"
weight_path = "/home/kalvik/shared/autoencoder/weights/mse/"
sequence_length = 540
input_width = 8000 
decay_rate = 0.8
lstm_size = 4000
rollout_ = True
batch_size = 10
num_gpus = 1
lr = 1E-4
gen = Generator(data_path, (input_width, sequence_length), shuffle=False)
data_steps = int(gen.num_sample//(batch_size*num_gpus))
decay_steps = data_steps

tf.reset_default_graph()
with tf.Graph().as_default(), tf.device('/cpu:0'):
    init, merged, saver, avg_loss, apply_gradient_op, encoded_data, encoded_labels = model()
    
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        saver.restore(sess, tf.train.latest_checkpoint(weight_path))
        batch_time = []
        print("Started Sampling")

        for step in range(1, data_steps + 1):
            time_start = time.time()
            data_temp, labels_temp = sess.run([encoded_data, encoded_labels])
            batch_time.append(time.time()-time_start)

            X_data.append(data_temp)
            y_data.append(labels_temp)

            if (step == data_steps):
                sys.stdout.write("\r - {}/{} - {}".format(step, data_steps, get_time(time.time()-epoch_time)))
            else:
                sys.stdout.write("\r - {}/{} - {}".format(step, data_steps, get_time(np.mean(batch_time)*(data_steps-step))))
            sys.stdout.flush()

        print("\nFinished!")      

In [None]:
print(np.array(X_data).shape, np.array(y_data).shape)