In [0]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neural_network import MLPClassifier
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt

In [0]:
def createXorData(samples=100000, size=50):
  """
  generates a dataset for the XOR problem in 
    https://blog.openai.com/requests-for-research-2/
  LHS are sequences of bools
  RHS computes the parity bit 
    (eg. wether the sequence has odd or even sum)
  """
  lhs = np.zeros((samples, size), dtype=np.float32)
  rhs = np.zeros(samples)
  for row in range(samples):
    lhs[row] = np.random.randint(0, 2, size)
    rhs[row] = lhs[row].sum() % 2
  return lhs, rhs

In [0]:
def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [labels[ i] for i in idx]

    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [0]:
SIZE = 3
SAMPLES = 40000
LEARNING_RATE = 0.0001
BATCH_SIZE = 100
TRAIN_STEPS = 1500
HIDDEN_NEURONS_1 = 5
HIDDEN_NEURONS_2 = 50
HIDDEN_NEURONS_3 = 50
CELL_SIZE = HIDDEN_NEURONS_1
RNN_LAYERS = SIZE
DROPOUT_PROB = 0.0000
RNN_TIMESTEPS = 1
NUM_CATEGORIES = 2

Xtrain, Ytrain = createXorData(SAMPLES, SIZE)
Xtest, Ytest = createXorData(int(SAMPLES/2), SIZE)

In [0]:
#
# sklearn MLP
#
mlp = MLPClassifier(hidden_layer_sizes=(
                    HIDDEN_NEURONS_1, HIDDEN_NEURONS_2, HIDDEN_NEURONS_3),
                    learning_rate_init=LEARNING_RATE,
                    solver='adam',
                    activation="relu"
                   ).fit(Xtrain, Ytrain)
pred_score = mlp.score(Xtest, Ytest)
print(pred_score)

In [0]:
#
# MLP in TF
#
MLPsess = tf.Session()
tf.reset_default_graph()

with tf.device('/gpu:0'):
  # Create the neural network
  def graph_net(x, dropout, reuse, is_training, n_classes=NUM_CATEGORIES):
      # Define a scope for reusing the variables
      with tf.variable_scope('MLP', reuse=reuse):
          # NOTE: this fails to train without relu activation
          fc1 = tf.layers.dense(x, HIDDEN_NEURONS_1, activation=tf.nn.relu)
          fc2 = tf.layers.dense(fc1, HIDDEN_NEURONS_2, activation=tf.nn.relu)
          fc3 = tf.layers.dense(fc2, HIDDEN_NEURONS_3, activation=tf.nn.relu)
          # Output layer, class prediction
          out = tf.contrib.layers.fully_connected(fc3, n_classes)
      return out
    
    
  # Define the model function (following TF Estimator Template)
  def mlp_model(features, labels, mode):
      # Build the neural network
      # Because Dropout have different behavior at training and prediction time, we
      # need to create 2 distinct computation graphs that still share the same weights.
      mlp_train = graph_net(features, DROPOUT_PROB, reuse=False, is_training=True)
      mlp_test = graph_net(features, DROPOUT_PROB, reuse=True, is_training=False)
      # Predictions
      pred_classes = tf.argmax(mlp_test, axis=1)
      # If prediction mode, early return
      if mode == tf.estimator.ModeKeys.PREDICT:
          return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)

      # Define loss and optimizer
      loss_op = tf.losses.sparse_softmax_cross_entropy(
        logits=mlp_train, 
        labels=tf.cast(labels, dtype=tf.int32))
      optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
      train_op = optimizer.minimize(loss_op,
                                    global_step=tf.train.get_global_step())

      # Evaluate the accuracy of the model
      acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

      # TF Estimators requires to return a EstimatorSpec, that specify
      # the different ops for training, evaluating, ...
      estim_specs = tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=pred_classes,
          loss=loss_op,
          train_op=train_op,
          eval_metric_ops={'accuracy': acc_op})
      return estim_specs
    
    
  # Build the Estimator
  mlp_m = tf.estimator.Estimator(mlp_model)
  # Define the input function for training
  input_fn = tf.estimator.inputs.numpy_input_fn(
      x=Xtrain, y=Ytrain,
      batch_size=BATCH_SIZE, num_epochs=None, shuffle=True)
  # Train the Model
  mlp_m.train(input_fn, steps=TRAIN_STEPS)
  # Evaluate the Model
  # Define the input function for evaluating
  test_fn = tf.estimator.inputs.numpy_input_fn(
      x=Xtest, y=Ytest,
      batch_size=BATCH_SIZE, shuffle=False)
  # Use the Estimator 'evaluate' method
  e = mlp_m.evaluate(test_fn)

  print("Testing Accuracy:", e['accuracy'])

In [0]:
def plot_model(history):
    ''' Plot model accuracy and loss
    Args:
        history: Keras dictionary contatining training/validation loss/acc
    Returns:
        Plots model's training/validation loss and accuracy history
    '''
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(loss) + 1)

    plt.figure()
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.figure()
    acc = history.history['acc']
    val_acc = history.history['val_acc']

    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()
    return

In [22]:
model = Sequential()
model.add(LSTM(32, input_shape=(SIZE, 1)))
model.add(Dense(1, activation='sigmoid'))
# Display model summary
model.summary()
model.compile('adam', loss='binary_crossentropy', metrics=['acc'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 32)                4352      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Need to reshape X into 
history = model.fit(Xtrain.reshape((SAMPLES, SIZE, 1)), 
                    Ytrain, epochs=20, 
                    batch_size=32,
                    validation_split=0.2, 
                    shuffle=True)
plot_model(history)

Train on 32000 samples, validate on 8000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 7232/32000 [=====>........................] - ETA: 8s - loss: 0.0389 - acc: 1.0000

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20

In [9]:
#
# RNN in TF
#
RNNsess = tf.Session()
tf.reset_default_graph()
with tf.device('/gpu:0'):
  # X input is [BatchSize, MaxSequenceLength, 1]
  # Tensorflow dynamic_rnn takes in  [batch_size x num_timesteps x num_features]
  # see https://stackoverflow.com/questions/42513613/tensorflow-dynamic-rnn-regressor-valueerror-dimension-mismatch#42518245
  # (last 1 is because fed one at a time)
  X = tf.placeholder(tf.float32, [None, SIZE, 1], name="XbitVectors")
  # Y is a [BatchSize] vector of {0, 1} parity labels
  Y_ = tf.placeholder(tf.float32, [None], name="YparityLabels")
  # RNN state [BatchSize, CellSize*#Layers]
  Hin = tf.placeholder(tf.float32, [None, CELL_SIZE * RNN_LAYERS], name="HinitState")
  
#   l = tf.unstack(Hin, axis=0)
#   init_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(l[idx][0], l[idx][1]) 
#                            for idx in range(RNN_LAYERS)])
  
  # RNN Model
  cell_ = tf.nn.rnn_cell.GRUCell(CELL_SIZE)
  m_cell = tf.nn.rnn_cell.MultiRNNCell([cell_]*SIZE, state_is_tuple=False)
  Hr, H = tf.nn.dynamic_rnn(m_cell, X, initial_state=Hin, time_major=False)
  
  # Softmax output layer
  Hf = tf.reshape(Hr, [-1, CELL_SIZE], name="rnnOutReshaping")
  Ylogits = tf.layers.linear(hf, NUM_CATEGORIES, name="Ylogits")
  # Prediction
  Y = tf.nn.softmax(Ylogits)
  Ypred = tf.argmax(Y, 1)
  Ypred = tf.reshape(Ypred, [BATCH_SIZE, -1])
  
  loss = tf.nn.softmax_cross_entropy_with_logits(Ylogits, Y_)
  train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
  
  
  num_batches = SAMPLES//BATCH_SIZE
  for epoch in range(20):
    inH = np.zeros([BATCH_SIZE, CELL_SIZE * SIZE])
    for i in range(0, num_batches):
      x, y_ = next_batch(BATCH_SIZE, Xtrain,  Ytrain)
      x = x.reshape(BATCH_SIZE, SIZE, 1)
      dic = {X: x, Y_: y_, Hin:inH}
      _,y,outH = sess.run([train_step, Yp,H,], feed_dict-dic)
      inH=outH

ValueError: ignored

In [36]:
#
# RNN in TF
#
RNNsess = tf.Session()
tf.reset_default_graph()

with tf.device('/gpu:0'):
  # Create the neural network
  def graph_rnn(x, dropout, reuse, is_training, n_classes=NUM_CATEGORIES):
      """
      Training data is shaped as a normal matrix
      [[0,1,1], [1,0,0], ..., [1,1,1]]

      The RNN cell takes as input a sequence of sequences,
      so to have a rnn sequence length of [# features] 
      the data needs to be reshaped as
      [[[0],[1],[1]], [[1],[0],[0]], ..., [[1],[1],[1]]]

      (alternatively the data can be further split 
       in more sub-array of sequences in the features)
      """
      # Define a scope for reusing the variables
      with tf.variable_scope('RNN', reuse=reuse):
        # Reshape each row into array of arrays
        # note: x.get_shape()[0] is loltensorflowcrap for len(x)
        x = tf.reshape(x, (x.get_shape()[0], RNN_TIMESTEPS, int(SIZE/RNN_TIMESTEPS)))
        
        # Single RNN Cell net
        rnn_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_NEURONS_1)
        rnn_layers = tf.nn.rnn_cell.MultiRNNCell([rnn_cell] * SIZE, 
                                                 state_is_tuple=False)
        # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
        # 'state' is a tensor of shape [batch_size, cell_state_size]
        initial_state = tf.placeholder(tf.float32, [None, SIZE * HIDDEN_NEURONS_1])
#        initial_state = rnn_cell.zero_state((BATCH_SIZE), dtype=tf.float32)
        outputs, state = tf.nn.dynamic_rnn(rnn_layers, x,
                                          initial_state=initial_state,
                                          dtype=tf.float32,
#                                           time_major=False
                                         )
        # get last output of rnn
        indices = SIZE - 1
        rnn_out = tf.gather(outputs, indices, axis=1)
        formatted_rnn_out = tf.reshape(tf.squeeze(rnn_out),
                                   (-1, HIDDEN_NEURONS_1))
        # Output layer, class prediction
        out = tf.layers.dense(formatted_rnn_out, n_classes, activation=tf.nn.relu)
      return out
    
    
  # Define the model function (following TF Estimator Template)
  def rnn_model(features, labels, mode):   
    # Build the graph
    rnn_train = graph_rnn(features, DROPOUT_PROB, reuse=False, is_training=True)
    rnn_test = graph_rnn(features, DROPOUT_PROB, reuse=True, is_training=False)
    # Predictions
    pred_classes = tf.argmax(rnn_test, axis=1)
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)

    # Define loss and optimizer
    loss_op = tf.losses.sparse_softmax_cross_entropy(
      logits=rnn_train, 
      labels=tf.cast(labels, dtype=tf.int32))
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    train_op = optimizer.minimize(loss_op,
                                  global_step=tf.train.get_global_step())
    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op,
        train_op=train_op,
        eval_metric_ops={'accuracy': acc_op})
    return estim_specs
    

  # Build the Estimator
  rnn_m = tf.estimator.Estimator(rnn_model)
  # Define the input function for training
  input_fn = tf.estimator.inputs.numpy_input_fn(
      # Reshape data to get arrays of sequences 
      # instead of raw array inputs which rnn will reject
      x=Xtrain,
      y=Ytrain,
      batch_size=BATCH_SIZE, num_epochs=None, shuffle=True)
  # Train the Model
  rnn_m.train(input_fn, steps=TRAIN_STEPS)
  # Evaluate the Model
  # Define the input function for evaluating
  test_fn = tf.estimator.inputs.numpy_input_fn(
      x=Xtest,
      y=Ytest,
      batch_size=BATCH_SIZE, shuffle=False)
  # Use the Estimator 'evaluate' method
  e = rnn_m.evaluate(test_fn)

  print("Testing Accuracy:", e['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpkjadpdas', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1f361f6240>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.


ValueError: ignored

In [21]:
#
# RNN in TF (old, trains but diverges)
#
RNNsess = tf.Session()
tf.reset_default_graph()

with tf.device('/gpu:0'):
  # Create the neural network
  def graph_rnn(x, dropout, reuse, is_training, n_classes=2):
      """
      Training data is shaped as a normal matrix
      [[0,1,1], [1,0,0], ..., [1,1,1]]

      The RNN cell takes as input a sequence of sequences,
      so to have a rnn sequence length of [# features] 
      the data needs to be reshaped as 
      [[[0,1,1]], [[1,0,0]], ..., [[1,1,1]]]

      (alternatively the data can be further split 
       in more sub-array of sequences in the features)
      

      """
      # Define a scope for reusing the variables
      with tf.variable_scope('RNN', reuse=reuse):
        # Reshape each row into array of arrays
        # note: x.get_shape()[0] is loltensorflowcrap for len(x)
        x = tf.reshape(x, (x.get_shape()[0], RNN_TIMESTEPS, 
                           SIZE//RNN_TIMESTEPS))
        # Single RNN Cell net
        rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_NEURONS_3)
#         rnn_layers = tf.nn.rnn_cell.MultiRNNCell([rnn_cell] * SIZE, state_is_tuple=True)
        # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
        # 'state' is a tensor of shape [batch_size, cell_state_size]
        initial_state = rnn_cell.zero_state(BATCH_SIZE, dtype=tf.float32)
        outputs, state = tf.nn.dynamic_rnn(rnn_cell, x,
                                          initial_state=initial_state,
#                                           dtype=tf.float32,
                                          time_major=False)
        # When performing dynamic calculation, we must retrieve the last
        # dynamically computed output, i.e., if a sequence length is 10, we need
        # to retrieve the 10th output.
        # However TensorFlow doesn't support advanced indexing yet, so we build
        # a custom op that for each sample in batch size, get its length and
        # get the corresponding relevant output.
        # 'outputs' is a list of output at every timestep, we pack them in a Tensor
        # and change back dimension to [batch_size, n_step, n_input]
        outputs = tf.stack(outputs)
        outputs = tf.transpose(outputs, [1, 0, 2])
        # Hack to build the indexing and retrieve the right output.
        batch_size = tf.shape(outputs)[0]
        # Start indices for each sample
        index = tf.range(0, BATCH_SIZE) * SIZE + (SIZE - 1)
        # Indexing
        outputs = tf.gather(tf.reshape(outputs, [-1, HIDDEN_NEURONS_3]), index)
        # Output layer, class prediction
        out = tf.layers.dense(outputs, n_classes, activation=tf.nn.relu)
      return out
    
    
  # Define the model function (following TF Estimator Template)
  def rnn_model(features, labels, mode):   
    # Build the graph
    rnn_train = graph_rnn(features, DROPOUT_PROB, reuse=False, is_training=True)
    rnn_test = graph_rnn(features, DROPOUT_PROB, reuse=True, is_training=False)
    # Predictions
    pred_classes = tf.argmax(rnn_test, axis=1)
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)

    # Define loss and optimizer
    loss_op = tf.losses.sparse_softmax_cross_entropy(
      logits=rnn_train, 
      labels=tf.cast(labels, dtype=tf.int32))
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    train_op = optimizer.minimize(loss_op,
                                  global_step=tf.train.get_global_step())
    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op,
        train_op=train_op,
        eval_metric_ops={'accuracy': acc_op})
    return estim_specs
    

  # Build the Estimator
  rnn_m = tf.estimator.Estimator(rnn_model)
  # Define the input function for training
  input_fn = tf.estimator.inputs.numpy_input_fn(
      # Reshape data to get arrays of sequences 
      # instead of raw array inputs which rnn will reject
      x=Xtrain,
      y=Ytrain,
      batch_size=BATCH_SIZE, num_epochs=None, shuffle=True)
  # Train the Model
  rnn_m.train(input_fn, steps=TRAIN_STEPS)
  # Evaluate the Model
  # Define the input function for evaluating
  test_fn = tf.estimator.inputs.numpy_input_fn(
      x=Xtest,
      y=Ytest,
      batch_size=BATCH_SIZE, shuffle=False)
  # Use the Estimator 'evaluate' method
  e = rnn_m.evaluate(test_fn)

  print("Testing Accuracy:", e['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp2hk5vzap', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1f359ac198>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp2hk5vzap/model.ckpt.
INFO:tensorflow:loss = 0.692851, step = 1
INFO:tensorfl

TypeError: ignored