In [0]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neural_network import MLPClassifier


def createXorData(samples=100000, size=50):
  """
  generates a dataset for the XOR problem in 
    https://blog.openai.com/requests-for-research-2/
  LHS are sequences of bools
  RHS computes the parity bit 
    (eg. wether the sequence has odd or even sum)
  """
  lhs = np.zeros((samples, size), dtype=np.int8)
  rhs = np.zeros(samples)
  for row in range(samples):
    lhs[row] = np.random.randint(0, 2, size)
    rhs[row] = lhs[row].sum() % 2
  return lhs, rhs


def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels from an array 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [labels[ i] for i in idx]

    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [0]:
SIZE = 3
SAMPLES = 20000
LEARNING_RATE = 0.001
TRAIN_STEPS = 2500
BATCH_SIZE = 1000

Xtrain, Ytrain = createXorData(SAMPLES, SIZE)
Xtest, Ytest = createXorData(SAMPLES//2, SIZE)

X_lenghts = np.ones((SAMPLES, 1)) * SIZE

In [61]:
#
# overkill MLP
# Seems to work up to input length of 22
#
mlp = MLPClassifier(hidden_layer_sizes=(25, 25,25), 
                    activation="relu"
                   ).fit(Xtrain, Ytrain)
pred_score = mlp.score(Xtest, Ytest)
print(pred_score)

0.9917


In [0]:
class LSTM(object):
    def __init__(self, hidden_units, num_classes=2, 
                 max_sequence_length=SIZE, 
                 random_seed=np.random.randint(150)):
        self.hidden_units = hidden_units
        self.num_classes = num_classes
        self.max_sequence_length = max_sequence_length
        self.random_seed = random_seed
        
        self.g = tf.Graph()
        with self.g.as_default():
            self.X = tf.placeholder(tf.float32, 
                                    (None, self.max_sequence_length, 1), 
                                    name='sequences')
            self.X_len = tf.placeholder(tf.int32,
                                        (None, 1),
                                        name='lengths')
            self.Y = tf.placeholder(tf.int32, 
                                    (None),
                                    name='parity_labels')

            batch_dim = tf.shape(self.X)[0]
            with tf.variable_scope("LSTM"):
                tf.set_random_seed(self.random_seed)
                cell = tf.contrib.rnn.LSTMCell(self.hidden_units)
                initial_state = cell.zero_state(batch_dim, tf.float32)
                outputs, state = tf.nn.dynamic_rnn(cell, 
                                                   self.X, 
                                                   initial_state=initial_state, 
                                                   dtype=tf.float32)
            # get last output of rnn
            indices = self.X_len[-1] - 1
            rnn_out = tf.gather(outputs, indices, axis=1)
            self.final_rnn_out = tf.reshape(tf.squeeze(rnn_out),
                                            (-1, self.hidden_units))
            with tf.variable_scope("Linear"):
                W = tf.Variable(tf.random_normal((self.hidden_units, num_classes), 
                                                  stddev=0.1, 
                                                 seed=self.random_seed))
                b = tf.Variable(tf.zeros((num_classes)))
                self.h = tf.add(tf.matmul(self.final_rnn_out, W), b)

            self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.h, labels=self.Y))
            self.train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(self.loss)

            self.predictions = tf.reshape(tf.squeeze(
                                 tf.argmax(self.h, axis=1)), (-1, 1))

In [0]:
def train_model(model, Xtrain, Ytrain, Xtest, Ytest, X_lenghts,
                epochs=100, batch_size=BATCH_SIZE, dataset_size=SAMPLES):
    """
    """
    num_batches = dataset_size//batch_size
    with model.g.as_default():
        sess = tf.Session()
        init = tf.global_variables_initializer()
        sess.run(init)
        for t in range(epochs):
            print("Epoch {}\n".format(t+1))
            for i in range(0, num_batches):
                X_batch = Xtrain[i:i+batch_size]
                Y_batch = Ytrain[i:i+batch_size]
                X_len_batch = X_lenghts[i:i+batch_size]
                _ = sess.run(model.train_op, 
                             feed_dict={model.X: X_batch, 
                                        model.Y: Y_batch, 
                                        model.X_len: X_len_batch})
                if i % 100 == 0:
                    loss_ = sess.run(model.loss,
                                     feed_dict={model.X: Xtrain, 
                                                model.Y: Ytrain,
                                                model.X_len: X_len_batch})
                    pred = sess.run(model.predictions, 
                                    feed_dict={model.X: Xtest, 
                                               model.Y: Ytest,
                                               model.X_len: X_len_batch})
                    accuracy = np.mean(pred == Ytest)
                    print('iteration: {}, loss: {},  accuracy: {}'.format(i+1, loss_, accuracy))



In [131]:
k_lstm_units = LSTM(SIZE)

Xtrain_r = Xtrain.reshape((SAMPLES, SIZE, 1))
Xtest_r = Xtest.reshape((SAMPLES//2, SIZE, 1))

train_model(k_lstm_units, Xtrain_r, Ytrain, Xtest_r, Ytest, X_lenghts)

Epoch 1

iteration: 1, loss: 0.6931548118591309,  accuracy: 0.49781818
Epoch 2

iteration: 1, loss: 0.6934610605239868,  accuracy: 0.4957
Epoch 3

iteration: 1, loss: 0.6933797597885132,  accuracy: 0.4957
Epoch 4

iteration: 1, loss: 0.693368136882782,  accuracy: 0.4957
Epoch 5

iteration: 1, loss: 0.693374752998352,  accuracy: 0.4957
Epoch 6

iteration: 1, loss: 0.6933584809303284,  accuracy: 0.49679306
Epoch 7

iteration: 1, loss: 0.6933428049087524,  accuracy: 0.49679306
Epoch 8

iteration: 1, loss: 0.6933079957962036,  accuracy: 0.49679306
Epoch 9

iteration: 1, loss: 0.6932434439659119,  accuracy: 0.49679306
Epoch 10

iteration: 1, loss: 0.693108856678009,  accuracy: 0.49679306
Epoch 11

iteration: 1, loss: 0.6928402185440063,  accuracy: 0.49679306
Epoch 12

iteration: 1, loss: 0.6922440528869629,  accuracy: 0.49679306
Epoch 13

iteration: 1, loss: 0.6910783052444458,  accuracy: 0.49790504
Epoch 14

iteration: 1, loss: 0.6888827085494995,  accuracy: 0.49790504
Epoch 15

iteration:

iteration: 1, loss: 0.2573140561580658,  accuracy: 0.50108876
Epoch 54

iteration: 1, loss: 0.24483217298984528,  accuracy: 0.50108876
Epoch 55

iteration: 1, loss: 0.23232348263263702,  accuracy: 0.50108876
Epoch 56

iteration: 1, loss: 0.21993906795978546,  accuracy: 0.50003698
Epoch 57

iteration: 1, loss: 0.20779502391815186,  accuracy: 0.50003698
Epoch 58

iteration: 1, loss: 0.19600330293178558,  accuracy: 0.50003698
Epoch 59

iteration: 1, loss: 0.1846395581960678,  accuracy: 0.50003698
Epoch 60

iteration: 1, loss: 0.17376267910003662,  accuracy: 0.50003698
Epoch 61

iteration: 1, loss: 0.16340188682079315,  accuracy: 0.50003698
Epoch 62

iteration: 1, loss: 0.15358573198318481,  accuracy: 0.50003698
Epoch 63

iteration: 1, loss: 0.14431868493556976,  accuracy: 0.50003698
Epoch 64

iteration: 1, loss: 0.13560490310192108,  accuracy: 0.50003698
Epoch 65

iteration: 1, loss: 0.12743954360485077,  accuracy: 0.50003698
Epoch 66

iteration: 1, loss: 0.11980558931827545,  accuracy: 0