In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

%matplotlib inline



In [6]:
def reset_graph(seed = 42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

### Manual

In [9]:
reset_graph()

n_inputs = 3 # inputs for time slot
n_neurons = 5 # hidden neurons in just 1 time slot

# tensor shape -> (n_batches x n_inputs)
X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape = [n_inputs, n_neurons]), 
                 dtype = tf.float32)
Wy = tf.Variable(tf.random_normal(shape = [n_neurons, n_neurons]), 
                 dtype = tf.float32)
b = tf.Variable(tf.zeros([1, n_neurons]), dtype = tf.float32)

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

In [10]:
# t = 0
X0_batch = np.array([[0.0, 0.1, 0.2],   # instance 0
                     [0.3, 0.4, 0.5],   # instance 1
                     [0.6, 0.7, 0.8],   # instance 2 
                     [0.9, 1.0, 1.1],   # instance 3
                    ])
# t = 1
X1_batch = np.array([[1.1, 1.2, 1.3],   # instance 0
                     [1.4, 1.5, 1.6],   # instance 1
                     [1.7, 1.8, 1.9],   # instance 2 
                     [2.0, 2.1, 2.2],   # instance 3
                    ])

with tf.Session() as s:
    init.run()
    Y0_val, Y1_val = s.run([Y0, Y1],
                          feed_dict = {X0: X0_batch, X1: X1_batch})

print Y0_val
print Y1_val

[[-0.00664975  0.195439    0.08291762  0.08832355 -0.14525607]
 [ 0.32749006 -0.09047261 -0.30807865  0.20217191 -0.45717433]
 [ 0.59584385 -0.36221007 -0.61687893  0.31080624 -0.68643111]
 [ 0.77524525 -0.58375102 -0.80808318  0.41188511 -0.83014971]]
[[ 0.83347863 -0.4676269  -0.87743753  0.15186396 -0.87976229]
 [ 0.91412014 -0.54188269 -0.82953322 -0.37742507 -0.95792758]
 [ 0.95737088 -0.68740845 -0.8081519  -0.65423918 -0.98491126]
 [ 0.97921664 -0.83296072 -0.8362062  -0.75839293 -0.99425137]]


### Usando `static_rnn`

In [11]:
reset_graph()

n_inputs = 3 # inputs for time slot
n_neurons = 5 # hidden neurons in just 1 time slot

# tensor shape -> (n_batches x n_inputs)
X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

# com API de mais alto nivel
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
outputs, state = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],
                                           dtype =tf.float32)
Y0, Y1 = outputs

init = tf.global_variables_initializer()

In [12]:
# t = 0
X0_batch = np.array([[0.0, 0.1, 0.2],   # instance 0
                     [0.3, 0.4, 0.5],   # instance 1
                     [0.6, 0.7, 0.8],   # instance 2 
                     [0.9, 1.0, 1.1],   # instance 3
                    ])
# t = 1
X1_batch = np.array([[1.1, 1.2, 1.3],   # instance 0
                     [1.4, 1.5, 1.6],   # instance 1
                     [1.7, 1.8, 1.9],   # instance 2 
                     [2.0, 2.1, 2.2],   # instance 3
                    ])

with tf.Session() as s:
    init.run()
    Y0_val, Y1_val = s.run([Y0, Y1],
                          feed_dict = {X0: X0_batch, X1: X1_batch})

print Y0_val
print Y1_val

[[-0.11336877 -0.04618133 -0.04251913  0.08736697  0.14434618]
 [-0.26662004 -0.18219495  0.02026685  0.23455885  0.54267317]
 [-0.40747947 -0.31157607  0.08289339  0.37172768  0.7896542 ]
 [-0.5312956  -0.43035144  0.14487195  0.49438283  0.9109515 ]]
[[-0.66259533 -0.50169367  0.22397666  0.52975714  0.94856107]
 [-0.84929156 -0.5857386   0.39825439  0.45898128  0.98381126]
 [-0.92375982 -0.65609491  0.5346204   0.44486007  0.9942531 ]
 [-0.95567167 -0.71526313  0.63692611  0.48122585  0.99774879]]


### Sem necessidade de descrever explicitamente a entrada ao longo do tempo

In [15]:
reset_graph()

n_inputs = 3 # inputs for time slot
n_neurons = 5 # hidden neurons in just 1 time slot
n_steps = 2

# tensor shape -> (n_batches x n_inputs)
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
# get list of tensors with shape [None, n_inputs]
Xseqs = tf.unstack(tf.transpose(X, perm = [1, 0, 2]))

# com API de mais alto nivel
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
output_seqs, state = tf.contrib.rnn.static_rnn(basic_cell, Xseqs,
                                           dtype =tf.float32)
# back to the original shape
outputs = tf.transpose(tf.stack(output_seqs), perm = [1, 0, 2])

init = tf.global_variables_initializer()

In [16]:
X_batch = np.array([[[0.0, 0.1, 0.2], [1.1, 1.2, 1.3]],   # instance 0
                    [[0.3, 0.4, 0.5], [1.4, 1.5, 1.6]],   # instance 1
                    [[0.6, 0.7, 0.8], [1.7, 1.8, 1.9]],   # instance 2 
                    [[0.9, 1.0, 1.1], [2.0, 2.1, 2.2]],   # instance 3
                    ])

with tf.Session() as s:
    init.run()
    out_vals = s.run(outputs, feed_dict = {X: X_batch})

print out_vals

[[[-0.15318885  0.12051394 -0.14256774  0.11027262 -0.05871544]
  [-0.90414226  0.89195144 -0.63326883  0.70085257 -0.44146615]]

 [[-0.48086599  0.39384031 -0.28806657  0.26011854 -0.17563742]
  [-0.94593662  0.97297293 -0.67838675  0.859218   -0.56859499]]

 [[-0.71327341  0.61166167 -0.42137903  0.39840218 -0.28780583]
  [-0.96981335  0.9921385  -0.73931801  0.93104404 -0.65325063]]

 [[-0.85203207  0.76443809 -0.53866905  0.52067178 -0.39259726]
  [-0.98340499  0.99724239 -0.80625534  0.96356934 -0.70796287]]]


### Desdobramento dinâmico

In [17]:
reset_graph()

n_inputs = 3 # inputs for time slot
n_neurons = 5 # hidden neurons in just 1 time slot
n_steps = 2

# tensor shape -> (n_batches x n_inputs)
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

# com API de mais alto nivel
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
outputs, state = tf.nn.dynamic_rnn(basic_cell, X,
                                   dtype =tf.float32)
init = tf.global_variables_initializer()

In [18]:
X_batch = np.array([[[0.0, 0.1, 0.2], [1.1, 1.2, 1.3]],   # instance 0
                    [[0.3, 0.4, 0.5], [1.4, 1.5, 1.6]],   # instance 1
                    [[0.6, 0.7, 0.8], [1.7, 1.8, 1.9]],   # instance 2 
                    [[0.9, 1.0, 1.1], [2.0, 2.1, 2.2]],   # instance 3
                    ])

with tf.Session() as s:
    init.run()
    out_vals = s.run(outputs, feed_dict = {X: X_batch})

print out_vals

[[[ 0.14834246  0.0544144  -0.12870833  0.04149501 -0.0309626 ]
  [ 0.88367331  0.2528224  -0.92533368  0.48299512 -0.16013345]]

 [[ 0.45225543  0.10361931 -0.47149643  0.210849   -0.10643698]
  [ 0.93180227  0.42531857 -0.9779222   0.35922     0.04373024]]

 [[ 0.67811656  0.15232225 -0.71363842  0.36842591 -0.18070443]
  [ 0.9635635   0.5406754  -0.99264485  0.29947734  0.17718439]]

 [[ 0.82224393  0.20029651 -0.8557173   0.50734007 -0.25296682]
  [ 0.98211133  0.60967129 -0.99719095  0.31619012  0.24006009]]]


### Tamanhos variáveis na Entrada

In [19]:
reset_graph()

n_inputs = 3 # inputs for time slot
n_neurons = 5 # hidden neurons in just 1 time slot
n_steps = 2

# tensor shape -> (n_batches x n_inputs)
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
seq_lens = tf.placeholder(tf.float32, [None])

# com API de mais alto nivel
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
outputs, state = tf.nn.dynamic_rnn(basic_cell, X, dtype =tf.float32,
                                   sequence_length = seq_lens)
init = tf.global_variables_initializer()

In [21]:
X_batch = np.array([[[0.0, 0.1, 0.2], [1.1, 1.2, 1.3]], # instance 0
                    [[0.3, 0.4, 0.5], [0.0, 0.0, 0.0]], # padded instance 1
                    [[0.6, 0.7, 0.8], [1.7, 1.8, 1.9]], # instance 2 
                    [[0.9, 1.0, 1.1], [2.0, 2.1, 2.2]], # instance 3
                    ])

with tf.Session() as s:
    init.run()
    out_vals, st_vals = s.run([outputs, state], 
                              feed_dict = {X: X_batch, 
                                           seq_lens: [2,1,2,2]})

print out_vals
print st_vals

[[[-0.05815748 -0.02042178  0.0261902  -0.0287376   0.00827101]
  [-0.25962549 -0.59064484 -0.52196658  0.3489787   0.2417295 ]]

 [[-0.11885347 -0.19865772 -0.13991296  0.07099477  0.06549278]
  [ 0.          0.          0.          0.          0.        ]]

 [[-0.17867406 -0.36465546 -0.29849792  0.16932763  0.12228704]
  [-0.36642525 -0.73754156 -0.7765432   0.62759364  0.38315642]]

 [[-0.23720218 -0.51031458 -0.44215211  0.26440096  0.17829153]
  [-0.41150665 -0.79461533 -0.84945899  0.72448599  0.44811308]]]
[[-0.25962549 -0.59064484 -0.52196658  0.3489787   0.2417295 ]
 [-0.11885347 -0.19865772 -0.13991296  0.07099477  0.06549278]
 [-0.36642525 -0.73754156 -0.7765432   0.62759364  0.38315642]
 [-0.41150665 -0.79461533 -0.84945899  0.72448599  0.44811308]]


## Classificação para MNIST

In [22]:
n_steps = 28
n_inputs = 28

In [24]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('data/MNIST_data')
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz


In [26]:
reset_graph()
n_neurons = 150
n_outputs = 10
learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
outputs, state = tf.nn.dynamic_rnn(basic_cell, X, dtype =tf.float32)

logits = tf.layers.dense(state, n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y,
                                                         logits = logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
train_op = optimizer.minimize(loss)
acc = tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32))

init = tf.global_variables_initializer()

In [27]:
n_epochs = 10
batch_size = 150

with tf.Session() as s:
    init.run()
    for e in range(n_epochs):
        for i in range(mnist.train.num_examples // batch_size):
            Xb, yb = mnist.train.next_batch(batch_size)
            Xb = Xb.reshape(-1, n_steps, n_inputs)
            s.run(train_op, feed_dict = {X: Xb, y: yb})
        acc_train = acc.eval(feed_dict = {X: Xb, y: yb})
        acc_test = acc.eval(feed_dict = {X: X_test, y: y_test})
        print '%d - acc tr: %.6f test: %.6f' % (e, acc_train, acc_test)

0 - acc tr: 0.940000 test: 0.930800
1 - acc tr: 0.933333 test: 0.943100
2 - acc tr: 0.940000 test: 0.953500
3 - acc tr: 0.966667 test: 0.962300
4 - acc tr: 0.953333 test: 0.968500
5 - acc tr: 0.960000 test: 0.965900
6 - acc tr: 0.980000 test: 0.970600
7 - acc tr: 0.980000 test: 0.971500
8 - acc tr: 0.960000 test: 0.971500
9 - acc tr: 0.986667 test: 0.969200


In [28]:
x = np.array([1,2,3,4])
x.shape

(4,)

In [30]:
xn = x.reshape((-1, 4))

In [31]:
print xn

[[1 2 3 4]]


In [32]:
xn.shape

(1, 4)

In [35]:
xn = x.reshape((4, 1))

In [36]:
print xn

[[1]
 [2]
 [3]
 [4]]
