In [17]:
import utils_libs

In [9]:
# An alternative to tf.nn.rnn_cell._linear function, which has been removed in Tensorfow 1.0.1
# The highway layer is borrowed from https://github.com/mkroutikov/tf-lstm-char-cnn
def linear(input_, output_size, scope=None):
    '''
    Linear map: output[k] = sum_i(Matrix[k, i] * input_[i] ) + Bias[k]
    Args:
    input_: a tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    scope: VariableScope for the created subgraph; defaults to "Linear".
  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(input_[i] * W[i]), where W[i]s are newly created matrices.
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  '''

    shape = input_.get_shape().as_list()
    if len(shape) != 2:
        raise ValueError("Linear is expecting 2D arguments: %s" % str(shape))
    if not shape[1]:
        raise ValueError("Linear expects shape[1] of arguments: %s" % str(shape))
    input_size = shape[1]

    # Now the computation.
    with tf.variable_scope(scope or "SimpleLinear"):
        matrix = tf.get_variable("Matrix", [output_size, input_size], dtype=input_.dtype)
        bias_term = tf.get_variable("Bias", [output_size], dtype=input_.dtype)

    return tf.matmul(input_, tf.transpose(matrix)) + bias_term

class myGRUCell(RNNCell):
  """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
  Args:
    num_units: int, The number of units in the GRU cell.
    activation: Nonlinearity to use.  Default: `tanh`.
    reuse: (optional) Python boolean describing whether to reuse variables
     in an existing scope.  If not `True`, and the existing scope already has
     the given variables, an error is raised.
    kernel_initializer: (optional) The initializer to use for the weight and
    projection matrices.
    bias_initializer: (optional) The initializer to use for the bias.
  """

  def __init__(self,
               num_units,
               activation=None,
               reuse=None,
               kernel_initializer=None,
               bias_initializer=None):
    super(GRUCell, self).__init__(_reuse=reuse)
    self._num_units = num_units
    self._activation = activation or math_ops.tanh
    self._kernel_initializer = kernel_initializer
    self._bias_initializer = bias_initializer
    self._gate_linear = None
    self._candidate_linear = None

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def call(self, inputs, state):
    """Gated recurrent unit (GRU) with nunits cells."""
    if self._gate_linear is None:
      bias_ones = self._bias_initializer
      if self._bias_initializer is None:
        bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
      with vs.variable_scope("gates"):  # Reset gate and update gate.
        self._gate_linear = _Linear(
            [inputs, state],
            2 * self._num_units,
            True,
            bias_initializer=bias_ones,
            kernel_initializer=self._kernel_initializer)

    value = math_ops.sigmoid(self._gate_linear([inputs, state]))
    r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

    r_state = r * state
    if self._candidate_linear is None:
      with vs.variable_scope("candidate"):
        self._candidate_linear = _Linear(
            [inputs, r_state],
            self._num_units,
            True,
            bias_initializer=self._bias_initializer,
            kernel_initializer=self._kernel_initializer)
    c = self._activation(self._candidate_linear([inputs, r_state]))
    new_h = u * state + (1 - u) * c
    return new_h, new_h

NameError: name '_Find' is not defined

In [13]:
# discriminative

class tsLSTM_discriminative():
    
    def __init__(self, n_lstm_dim, n_steps, n_data_dim, n_lstm_layers, session,\
                 lr, l2, max_norm, bool_is_stateful, n_batch_size ):
        
        self.LEARNING_RATE = lr
        self.L2 =  l2
        
        self.N_LSTM_LAYERS = n_lstm_layers
        self.N_LSTM_DIM    = n_lstm_dim
        
        self.N_STEPS    = n_steps
        self.N_DATA_DIM = n_data_dim
        
        self.x = tf.placeholder(tf.float32, [None, self.N_STEPS, self.N_DATA_DIM])
        self.y = tf.placeholder(tf.float32, [None, 1])
        
        self.keep_prob = tf.placeholder(tf.float32)
        
        self.is_state_full = bool_is_stateful
        self.n_batch_size = n_batch_size
        
        self.sess = session
    
        with tf.variable_scope("lstm"):
            
#           !!change
            lstm_cell    = tf.contrib.rnn.GRUCell( self.N_LSTM_DIM )
#           state_is_tuple = True
            
            stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm_cell]* self.N_LSTM_LAYERS,\
                                                       state_is_tuple=True )
            
            self.hiddens, self.state = tf.nn.dynamic_rnn(cell = stacked_lstm,\
                                                         inputs = self.x,\
                                                         dtype = tf.float32)
            
        tmp_hiddens = tf.transpose( self.hiddens, [1,0,2]  )
        last_hidden = tmp_hiddens[-1]
        
        with tf.variable_scope("hidden"):
                
#           change  orthogonal ini
            w = tf.Variable(tf.random_normal([self.N_LSTM_DIM, 128],\
                            stddev=math.sqrt(2.0/self.N_LSTM_DIM)))
            b = tf.Variable(tf.zeros( [128] ))
            
            self.regularization = tf.nn.l2_loss(w)
            
            h = tf.matmul(last_hidden, w) + b
            h = tf.nn.relu( h )
        
        
        with tf.variable_scope("output"):
            
            w = tf.Variable(tf.random_normal([128, 1],\
                            stddev=math.sqrt(2.0/128)))
            b = tf.Variable(tf.zeros( [ 1 ] ))
            
            self.regularization += tf.nn.l2_loss(w)
            
            self.py = tf.matmul(h, w) + b
    
    def train_ini(self):  
        
#       !!! change
        self.cost = tf.reduce_mean( tf.square(self.y - self.py) ) 
#     + self.L2*self.regularization
        
        self.optimizer = \
        tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)  
#         tf.train.AdadeltaOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
#         tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)  
#         !! same lr, converge faster
#         tf.train.AdadeltaOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
#         tf.train.RMSPropOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
#         tf.train.RMSPropOptimizer(learning_rate = self.lr).minimize(self.cost)
#         tf.train.AdadeltaOptimizer(learning_rate = self.lr).minimize(self.cost)
#         tf.train.AdamOptimizer(learning_rate = self.lr).minimize(self.cost)
#         tf.train.GradientDescentOptimizer(learning_rate = self.lr).minimize(self.cost)
        
        self.init = tf.global_variables_initializer()
        
#       in addition to ini op, initialize the state vector of RNN
        self.sess.run( [self.init, self.state_variables] )
        
        
    def train_batch(self, x_batch, y_batch, keep_prob ):
        
        _, c, _ = self.sess.run([self.optimizer, self.cost, self.state_update_op],\
                        feed_dict={self.x:x_batch,\
                                   self.y:y_batch,\
                                   self.keep_prob:keep_prob\
                                 })
            
        return c

#   initialize inference         
    def inference_ini(self):

#       denormalzied RMSE  
        self.rmse = \
        tf.sqrt( tf.reduce_mean( tf.square( self.y - self.py ) ) )
        
        
#   infer givn testing data    
    def inference(self, x_test, y_test, keep_prob):
        return self.sess.run([self.rmse], feed_dict={self.x:x_test,\
                                                self.y:y_test,\
                                                self.keep_prob:keep_prob\
                                                })
        
   
    def test(self, x_test, y_test ):
        
        self.init = tf.global_variables_initializer()
        
        tmpshape = tf.shape(self.state)
        self.sess.run( self.init )
        
        return self.sess.run( [ tmpshape ], \
                 feed_dict={self.x:x_test, self.y:y_test})
    

In [None]:
# discriminative

disc_xtrain = np.reshape( xtrain, [-1, 100, 1] )
disc_xtest  = np.reshape( xtest, [-1, 100, 1] )

disc_ytrain = ytrain
disc_ytest  = ytest

print np.shape(disc_xtrain), np.shape(disc_ytrain), np.shape(disc_xtest), \
np.shape(disc_ytest)


para_n_epoch = 1200

# tunable parameters

#   representation ability
para_lr = 0.001
para_lstm_dim = 256
para_lstm_layers = 1
#   regularization
para_batch_size = 32
para_l2 = 0.1

para_max_norm  = 4
para_keep_prob = 0.8

# fixed parameters
para_data_dim = 1
para_steps = 100

# evaluation parameters
para_eval_byepoch = 10


# reset the environment
tf.reset_default_graph()

with tf.Session() as sess:
    
    reg = tsLSTM_discriminative(para_lstm_dim, para_steps, para_data_dim, \
                                para_lstm_layers, sess, \
                                para_lr, para_l2, para_max_norm,\
                                True, para_batch_size)
    
    reg.train_ini()
    reg.inference_ini()
    
    total_cnt   = np.shape(disc_xtrain)[0]
    total_batch = int(total_cnt/para_batch_size)
    
    total_idx = range(total_cnt)
    
#   training cycle
    for epoch in range(para_n_epoch):
        
        tmpc = 0.0
        
#       shuffle traning instances each epoch  
        np.random.shuffle(total_idx)
    
#       Loop over all batches
        for i in range(total_batch):
            
            batch_idx = total_idx[ i*para_batch_size: (i+1)*para_batch_size ] 
            
            batch_x = disc_xtrain[ batch_idx ]
            batch_y = disc_ytrain[ batch_idx ]            
            
            tmpc += reg.train_batch( batch_x, batch_y, para_keep_prob,)
        
        if epoch%para_eval_byepoch != 0:
            continue
    
        tmp_test_acc  = reg.inference( disc_xtest, disc_ytest,  para_keep_prob) 
        tmp_train_acc = reg.inference( disc_xtrain,disc_ytrain, para_keep_prob) 
        
        print "loss on epoch ", epoch, " : ", 1.0*tmpc/total_batch, tmp_test_acc,\
        tmp_train_acc
    
    print "Optimization Finished!"

In [None]:

from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 2
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length

def generateData():
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])

init_state = tf.placeholder(tf.float32, [batch_size, state_size])

W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)

W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

# Unpack columns
inputs_series = tf.unpack(batchX_placeholder, axis=1)
labels_series = tf.unpack(batchY_placeholder, axis=1)

# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
    current_input = tf.reshape(current_input, [batch_size, 1])
    input_and_state_concatenated = tf.concat(1, [current_input, current_state])  # Increasing number of columns

    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition
    states_series.append(next_state)
    current_state = next_state

logits_series = [tf.matmul(state, W2) + b2 for state in states_series] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) \
          for logits, labels in zip(logits_series,labels_series)]
total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

def plot(loss_list, predictions_series, batchX, batchY):
    plt.subplot(2, 3, 1)
    plt.cla()
    plt.plot(loss_list)

    for batch_series_idx in range(5):
        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]
        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])

        plt.subplot(2, 3, batch_series_idx + 2)
        plt.cla()
        plt.axis([0, truncated_backprop_length, 0, 2])
        left_offset = range(truncated_backprop_length)
        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")

    plt.draw()
    plt.pause(0.0001)


with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    plt.ion()
    plt.figure()
    plt.show()
    loss_list = []

    for epoch_idx in range(num_epochs):
        x,y = generateData()
        _current_state = np.zeros((batch_size, state_size))

        print("New data, epoch", epoch_idx)

        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]

            _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder:batchX,
                    batchY_placeholder:batchY,
                    init_state:_current_state
                })

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:
                print("Step",batch_idx, "Loss", _total_loss)
                plot(loss_list, _predictions_series, batchX, batchY)

plt.ioff()
plt.show()
view raw1-10-vanilla-rnn.py hosted with ❤ by GitHub

In [None]:
# generative

def expand_y( x, y ):
    cnt = len(x)
    expand_y = []
    
    for i in range(cnt):
        tmp = x[i][1:]
        tmp = np.append( tmp, y[i][0] )
        
        expand_y.append( tmp )
    
    return np.array( expand_y )

class tsLSTM_generative():
    
    def __init__(self, n_lstm_dim, n_steps, n_data_dim, n_lstm_layers, session,\
                 lr, l2, max_norm ):
        
        self.LEARNING_RATE = lr
        self.L2 =  l2
        
        self.N_LSTM_LAYERS = n_lstm_layers
        self.N_LSTM_DIM    = n_lstm_dim
        
        self.N_STEPS    = n_steps
        self.N_DATA_DIM = n_data_dim
        
        self.x = tf.placeholder(tf.float32, [None, self.N_STEPS, self.N_DATA_DIM])
        self.y = tf.placeholder(tf.float32, [None, self.N_STEPS])
        self.test_y    = tf.placeholder(tf.float32, [None, 1]
        self.keep_prob = tf.placeholder(tf.float32)
        
        with tf.variable_scope("lstm"):
            
            lstm_cell    = tf.contrib.rnn.GRUCell(self.N_LSTM_DIM)
#           , state_is_tuple = True
            
            stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm_cell]* self.N_LSTM_LAYERS,\
                                                      state_is_tuple=True )
            
            self.hiddens, self.state = tf.nn.dynamic_rnn(cell = stacked_lstm, \
                                                         inputs = self.x,\
                                                    dtype = tf.float32)
            
        with tf.variable_scope("output"):
            
            w = tf.Variable( tf.random_normal([self.N_LSTM_DIM, 1],\
                             stddev=math.sqrt(2.0/self.N_LSTM_DIM)) )
            
            b = tf.Variable(tf.zeros( [ 1 ] ))
            
            
            train_h  =  tf.reshape( self.hiddens, [ -1, self.N_LSTM_DIM ] )
            train_py =  tf.matmul( train_h, w ) + b
            self.py  =  tf.reshape( train_py, [-1, self.N_STEPS ] ) 
            
            
            test_h = tf.transpose( self.hiddens, [1,0,2] )
            self.test_py = tf.matmul( test_h[-1], w ) + b
        

    def train_ini(self):
        
        self.cost = tf.nn.l2_loss( self.y - self.py )
        
        self.optimizer = \
        tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
#         tf.train.AdadeltaOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
#         tf.train.RMSPropOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
#         tf.train.AdamOptimizer(learning_rate = self.lr).minimize(self.cost)
#         tf.train.RMSPropOptimizer(learning_rate = self.lr).minimize(self.cost)
#         tf.train.AdadeltaOptimizer(learning_rate = self.lr).minimize(self.cost)
#         tf.train.AdamOptimizer(learning_rate = self.lr).minimize(self.cost)
#         tf.train.GradientDescentOptimizer(learning_rate = self.lr).minimize(self.cost)
        
        self.init = tf.global_variables_initializer()
        sess.run( self.init )
        
        
    def train_batch(self, x_batch, y_batch, keep_prob ):
        
        _,c = sess.run([self.optimizer, self.cost],\
                        feed_dict={self.x:x_batch,\
                                   self.y:y_batch,\
                                   self.keep_prob:keep_prob\
                                 })
            
        return c
    
#   initialize inference         
    def inference_ini(self):

#       denormalzied RMSE
        self.rmse = tf.sqrt( tf.reduce_mean(\
                             tf.square( self.test_y - self.test_py ) ) )
        
#   infer givn testing data    
    def inference(self, x_test, y_test, keep_prob):
        return sess.run([self.rmse], feed_dict={self.x:x_test,\
                                                self.test_y:y_test,\
                                                self.keep_prob:keep_prob\
                                                   })
        
   
    def test(self, x_test, y_test):
        
        self.init = tf.global_variables_initializer()
        
        tmpshape  = tf.shape(self.state)
        sess.run( self.init )
        
        return sess.run( [ tmpshape ], \
                         feed_dict={self.x:x_test, self.y:y_test})
    

In [14]:
for i in range(0, 100, 10):
    print i

0
10
20
30
40
50
60
70
80
90


In [None]:
# generative

gen_ytrain_test =  ytrain
gen_ytrain = expand_y( xtrain, ytrain )
gen_ytest  = ytest

gen_xtrain = np.reshape( xtrain, [-1, 100, 1] )
gen_xtest  = np.reshape( xtest, [-1, 100, 1] )

print np.shape(gen_xtrain), np.shape(gen_ytrain), np.shape(gen_xtest), np.shape(gen_ytest)



para_n_epoch = 1200

# tunable parameters

#   representation ability
para_lr = 0.01
para_lstm_dim = 256
para_lstm_layers = 1
#   regularization
para_batch_size = 32
para_l2 = 0.1

para_max_norm = 4
para_keep_prob = 0.8

# fixed parameters
para_data_dim = 1
para_steps = 100

# evaluation parameters
para_eval_byepoch = 10

# clean the graph
tf.reset_default_graph()

with tf.Session() as sess:
    
    reg = tsLSTM_generative( para_lstm_dim, para_steps, para_data_dim, \
                                para_lstm_layers, sess, \
                                para_lr, para_l2, para_max_norm)
    
    reg.train_ini()
    reg.inference_ini()
    
    total_cnt   = np.shape(gen_xtrain)[0]
    total_batch = int(total_cnt/para_batch_size)
    
    total_idx = range(total_cnt)
    
 
    
#   training cycle
    for epoch in range(para_n_epoch):
        
        tmpc = 0.0
        
#       shuffle traning instances each epoch  
        np.random.shuffle(total_idx)
    
#       Loop over all batches
        for i in range(total_batch):
            
            batch_idx = total_idx[ i*para_batch_size: (i+1)*para_batch_size ] 
            
            batch_x = gen_xtrain[ batch_idx ]
            batch_y = gen_ytrain[ batch_idx ]            
            
            tmpc += reg.train_batch( batch_x, batch_y, para_keep_prob,)
        
        if epoch%para_eval_byepoch != 0:
            continue
    
        tmp_test_acc  = reg.inference( gen_xtest, gen_ytest,       para_keep_prob) 
        tmp_train_acc = reg.inference( gen_xtrain,gen_ytrain_test, para_keep_prob) 
        
        print "loss on epoch ", epoch, " : ", 1.0*tmpc/total_batch, tmp_test_acc,\
        tmp_train_acc
    
    print "Optimization Finished!"