In [None]:
import tensorflow as tf
import timesynth as ts
import numpy as np
import ujson as json
import tqdm

#### Data Generation

In [None]:
#Function for building the dictionary
def create_data_dictionary(phi, sigma, irregular_time_samples, signal):
    data_dict = {}
    data_dict['phi'] = phi
    data_dict['sigma'] = sigma
    data_dict['time_samples'] = list(irregular_time_samples)
    data_dict['signal'] = list(signal)
    return data_dict

In [None]:
def write_file(folder_name, file_name, data_dict):
    with open(folder_name + file_name + '.json', 'w') as fp:
        json.dump(data_dict, fp)

In [None]:
def generate_data(phi, sigma, num_points):
    time_sampler = ts.TimeSampler(stop_time=20)
    irregular_time_samples = time_sampler.sample_irregular_time(num_points=num_points,
                                                                keep_percentage=50)
    irregular_time_samples_diff = np.diff(np.append(0, irregular_time_samples))
    signal = np.zeros(len(irregular_time_samples)+1)
    noise_samples = np.random.normal(size=len(irregular_time_samples))
    for i in range(len(irregular_time_samples)):
        signal[i+1] = np.power(phi, irregular_time_samples_diff[i])*signal[i] + \
                        sigma*np.sqrt(1 - np.power(phi, 2*irregular_time_samples_diff[i]))*noise_samples[i]
    return irregular_time_samples, signal[1:]

In [None]:
# Only run to generate datasets
# Change folder_name argument for write_file function to generate the dataset
num_samples = 10000
phi_samples = np.random.uniform(size=num_samples)
sigma_samples = np.random.uniform(size=num_samples)
signal_lengths = np.random.randint(low=500, high=1000, size=num_samples)
for i in tqdm.tqdm(range(num_samples)):
    sigma = sigma_samples[i]
    phi = phi_samples[i]
    sig_length = signal_lengths[i]
    signal = np.nan
    while np.any(np.isnan(signal)):
        time_samples, signal = generate_data(phi, sigma, 600)
    data_dict = create_data_dictionary(phi, sigma, time_samples, signal)
    file_name = 'series_'+str(i)
    write_file('../data/corr/train/', file_name, data_dict)

#### Building model for learning autocorrelation of irregular time series residuals

In [1]:
import tensorflow as tf
from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import ujson as json
import tqdm
%matplotlib inline
import sys
sys.path.append('/Users/abhishek/Projects/experimental-timeflow/')
import timeflow as tflow

In [2]:
def read_file(folder_name, file_name):
    with open(folder_name + file_name+'.json', 'r') as fp:
        data = json.load(fp)
    return data

In [3]:
class LSTMLayerBatch(object):

    def __init__(self, input_size, hidden_layer_size, input_placeholder):

        # Initialization of given values
        self.input_size = input_size
        self.hidden_layer_size = hidden_layer_size
        self.target_size = target_size

        # Weights and Bias for input and hidden tensor
        self.Wi = tf.Variable(tf.zeros(
            [self.input_size, self.hidden_layer_size]))
        self.Ui = tf.Variable(tf.zeros(
            [self.hidden_layer_size, self.hidden_layer_size]))
        self.bi = tf.Variable(tf.zeros([self.hidden_layer_size]))

        
        self.Wf = tf.Variable(tf.zeros(
            [self.input_size, self.hidden_layer_size]))
        self.Uf = tf.Variable(tf.zeros(
            [self.hidden_layer_size, self.hidden_layer_size]))
        self.bf = tf.Variable(tf.zeros([self.hidden_layer_size]))        
        
        
        self.Wog = tf.Variable(tf.zeros(
            [self.input_size, self.hidden_layer_size]))
        self.Uog = tf.Variable(tf.zeros(
            [self.hidden_layer_size, self.hidden_layer_size]))
        self.bog = tf.Variable(tf.zeros([self.hidden_layer_size]))        
        
        
        self.Wc = tf.Variable(tf.zeros(
            [self.input_size, self.hidden_layer_size]))
        self.Uc = tf.Variable(tf.zeros(
            [self.hidden_layer_size, self.hidden_layer_size]))
        self.bc = tf.Variable(tf.zeros([self.hidden_layer_size]))        
        
        # Placeholder for input vector with shape[batch, seq, embeddings]
        self._inputs = input_placeholder

        # Processing inputs to work with scan function
        self.processed_input = process_batch_input_for_RNN(self._inputs)
        
        self.initial_hidden = self._inputs[:, 0, :]
        self.initial_hidden= tf.matmul(
            self.initial_hidden, tf.zeros([input_size, hidden_layer_size]))
        
        
        self.initial_hidden=tf.pack([self.initial_hidden,self.initial_hidden])
        
    # Function for LSTM cell.
    def forward_step(self, previous_hidden_memory_tuple, x):
        """
        This function takes previous hidden state and memory tuple with input and
        outputs current hidden state.
        """
        
        previous_hidden_state,c_prev=tf.unpack(previous_hidden_memory_tuple)
        
        #Input Gate
        i= tf.sigmoid(
            tf.matmul(x,self.Wi)+tf.matmul(previous_hidden_state,self.Ui) + self.bi 
        )
        
        #Forget Gate
        f= tf.sigmoid(
            tf.matmul(x,self.Wf)+tf.matmul(previous_hidden_state,self.Uf) + self.bf 
        )
        
        #Output Gate
        o= tf.sigmoid(
            tf.matmul(x,self.Wog)+tf.matmul(previous_hidden_state,self.Uog) + self.bog
        )
        
        #New Memory Cell
        c_= tf.nn.tanh(
            tf.matmul(x,self.Wc)+tf.matmul(previous_hidden_state,self.Uc) + self.bc 
        ) 
        
        #Final Memory cell
        c= f*c_prev + i*c_
        
        #Current Hidden state
        current_hidden_state = o*tf.nn.tanh(c)


        return tf.pack([current_hidden_state,c])

    # Function for getting all hidden state.
    def get_outputs(self):
        """
        Iterates through time/ sequence to get all hidden state
        """

        # Getting all hidden state throuh time
        all_hidden_states = tf.scan(self.forward_step,
                                    self.processed_input,
                                    initializer=self.initial_hidden,
                                    name='states')
        all_hidden_states=all_hidden_states[:,0,:,:]
        
        return all_hidden_states

# Function to convert batch input data to use scan ops of tensorflow.
def process_batch_input_for_RNN(batch_input):
    """
    Process tensor of size [5,3,2] to [3,5,2]
    """
    batch_input_ = tf.transpose(batch_input, perm=[2, 0, 1])
    X = tf.transpose(batch_input_)

    return X

In [4]:
class MeanLayerBatch(object):
    
    def __init__(self, input_layer):
        self.inputs = input_layer.get_outputs()
    
    def get_outputs(self):
        return tf.reduce_mean(self.inputs, reduction_indices=0)

In [5]:
class RegLayerBatch(object):
    
    def __init__(self, input_size, output_size, input_layer):
        self.inputs = input_layer.get_outputs()
        self.input_size = input_size
        self.output_size = output_size
        self.Wo = tf.Variable(tf.truncated_normal([self.input_size, self.output_size], mean=0, stddev=.01))
        self.bo = tf.Variable(tf.truncated_normal([self.output_size], mean=0, stddev=.01))
    
    def get_outputs(self):
        output = tf.matmul(self.inputs, self.Wo) + self.bo
        return output

In [6]:
hidden_layer_size = 10
input_size = 2
target_size = 1

In [7]:
input_placeholder = tf.placeholder(tf.float32,
                                   shape=[None, None, input_size],
                                   name='inputs')

In [8]:
with tf.variable_scope('LSTM_layer'):
    lstm_layer = LSTMLayerBatch(input_size, hidden_layer_size, input_placeholder)
with tf.variable_scope('Mean_Layer'):
    mean_layer = MeanLayerBatch(lstm_layer)
with tf.variable_scope('Reg_Layer'):
    reg_layer = RegLayerBatch(hidden_layer_size, target_size, mean_layer)

In [9]:
y = tf.placeholder(tf.float32, shape=[None, target_size],name='inputs')

In [10]:
outputs = reg_layer.get_outputs()

In [11]:
with tf.variable_scope('RMSE'):
    rmse = tflow.utils.metrics.RMSE(outputs, y)

In [12]:
tf.summary.scalar("RMSE", rmse)
summary_op = tf.summary.merge_all()

In [13]:
#Training with Adadelta Optimizer
train_step = tf.train.AdamOptimizer(learning_rate=0.02).minimize(rmse)

In [14]:
# Starting tensorflow session
sess=tf.InteractiveSession()
tf.global_variables_initializer().run()

In [15]:
#Setting up log directory for tensorboard
logs_path = '../tmp/corr/1'
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph())

In [16]:
def build_random_batch_input(batch_size):
    outputs = np.zeros(batch_size)
    counter = 0
    random_files = np.random.choice(10000, size=batch_size, replace=False)
    for k in range(batch_size):
        data_dict = read_file('../data/corr/train/', 'series_'+str(random_files[k]))
        time_samples = data_dict['time_samples']
        samples = data_dict['signal']
        outputs[counter] = data_dict['phi']
        if counter == 0:
            input_vector = np.transpose(np.concatenate(
                                    (np.array(time_samples, ndmin=2),
                                     np.array(samples, ndmin=2)), axis=0))
        elif counter == 1:
            value_vector = np.transpose(np.concatenate(
                                    (np.array(time_samples, ndmin=2),
                                     np.array(samples, ndmin=2)), axis=0))
            input_vector = np.stack((input_vector, value_vector))
        else:
            value_vector = np.transpose(np.concatenate(
                                    (np.array(time_samples, ndmin=2),
                                     np.array(samples, ndmin=2)), axis=0))
            input_vector = np.append(input_vector, np.expand_dims(value_vector, 0), axis=0)
        counter += 1
    return input_vector, np.reshape(outputs, (batch_size, 1))

In [17]:
batch_size = 100

In [18]:
input_vector, out_vector = build_random_batch_input(batch_size)

In [25]:
for k in tqdm.tqdm(range(50)):
    _, summary = sess.run([train_step, summary_op],
                         feed_dict={input_placeholder:input_vector,
                                    y:out_vector})
    writer.add_summary(summary, k)

100%|██████████| 50/50 [00:37<00:00,  1.27it/s]


#### Dataset processing and loading pipeline

In [38]:
tf.app.flags.DEFINE_integer('threads', 1, 'Number of threads to preprocess the files')

In [87]:
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes

In [88]:
FLAGS = tf.app.flags.FLAGS

In [89]:
train_directory = '../data/corr/train'
file_path = '%s/*' % train_directory
filenames = tf.gfile.Glob(file_path)

In [90]:
spacing = np.linspace(0, len(filenames), FLAGS.threads + 1).astype(np.int)
ranges = []
for i in xrange(len(spacing) - 1):
    ranges.append([spacing[i], spacing[i+1]])

In [91]:
with tf.gfile.FastGFile(filenames[0], 'r') as f:
    image_data = f.read()

In [92]:
str_dict = json.loads(image_data)

In [93]:
str_dict.keys()

[u'phi', u'sigma', u'signal', u'time_samples']

In [105]:
str_dict['phi']

0.5268461639

In [94]:
def _convert_to_numpy(str_dict):
    signal = np.reshape(np.array(str_dict['signal']), (len(str_dict['signal']), 1))
    time_samples = np.reshape(np.array(str_dict['time_samples']), (len(str_dict['time_samples']), 1))
    return np.concatenate((signal, time_samples), axis=1)

In [None]:
def _convert_to_list(str_dict):
    signal = str_dict['signal']
    time_samples = str_dict['time_samples']
    return [signal, time_samples]

In [95]:
data = _convert_to_numpy(str_dict)

In [99]:
class DataCoder(object):
    def __init__(self):
        self._sess = tf.Session()
        self._data = tf.placeholder(dtype=tf.uint16)
        self._decode_data = ops.convert_to_tensor(self._data, dtype=dtypes.uint16)

    def decode_data(self, data):
        data_tensor = self._sess.run(self._decode_data, feed_dict={self._data: data})
        return data_tensor

In [100]:
coder = DataCoder()

In [102]:
data_tensor = coder.decode_data(data)

In [107]:
def data_coder(str_dict):
    data = _convert_to_numpy(str_dict)
    label = str_dict['phi']
    #Encoding the data into a tensor
    data_buffer = ops.convert_to_tensor
    return data_buffer, label

In [113]:
str_data = data.tostring()
data_check = np.fromstring(str_data)

#### Testing reading the data

In [119]:
def read_and_decode_single_example(filename):
    filename_queue = tf.train.string_input_producer([filename], num_epochs=None)

    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
    serialized_example,
    features={
      'data': tf.FixedLenFeature([], tf.string),
      'label': tf.FixedLenFeature([], tf.string)
    
    })
  
    data = features['data']
    label = features['label']
    print type(data), tf.Print(data)
    return data, label

In [123]:
def inputs(train_filename, batch_size):
    data, label = read_and_decode_single_example(train_filename)
    images_batch, labels_batch = tf.train.shuffle_batch(
    [data, label], batch_size=batch_size,
    capacity=2000,
    min_after_dequeue=1000)
    return images_batch, labels_batch

In [124]:
train_filename = '../data/corr/records/train/train.tfrecords'
batch_size = 1

In [125]:
input_ = inputs(train_filename, batch_size)

In [135]:
from tensorflow.examples.tutorials.mnist import input_data

In [137]:
dir(input_data)

['__builtins__',
 '__doc__',
 '__file__',
 '__name__',
 '__package__',
 'absolute_import',
 'division',
 'gzip',
 'numpy',
 'os',
 'print_function',
 'read_data_sets',
 'tempfile',
 'tf',
 'urllib',
 'xrange']

In [129]:
tf.train.Feature

tensorflow.core.example.feature_pb2.Feature

In [138]:
tf.train.input_producer?