In [2]:
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
from nltk.tokenize import word_tokenize
tf.reset_default_graph()

In [3]:
dataset = open("../sherlock.txt").read()

In [4]:
words = sorted(set(dataset))
len_words = len(words)
max_length = 16
length_seq = 100
batch_size = 64
training_test = 0.95

In [5]:
int_to_char = dict((i, c) for i, c in enumerate(words))
char_to_int = dict((c, i) for i, c in enumerate(words))

In [6]:
dataX = []
dataY = []

In [15]:
def encode_one_hot(sentence):
    max_word_length = max_length
    sent = []
    
    for word in word_tokenize(sentence):
        word_encoding = np.zeros(shape=(max_word_length, len_words), dtype='float32')
        
        for i, char in enumerate(word):
            try:
                char_encoding = char_to_int[char]
                one_hot = np.zeros(len_words, dtype='float32')
                one_hot[char_encoding] = 1.0
                word_encoding[i] = one_hot
            except Exception as e:
                pass
        
        sent.append(np.array(word_encoding))
    
    return np.array(sent)

In [9]:
for i in range(0, len(dataset) - length_seq, 1):
    seq_in = dataset[i:i+length_seq]
    seq_out = dataset[i+length_seq]
    dataX.append(encode_one_hot(seq_in))
    
    out_encoding = np.zeros(shape=(len_words))
    char_encoding = char_to_int[seq_out]
    out_encoding[char_encoding] = 1
    dataY.append(out_encoding)

In [11]:
def numpy_fillna(data):
    lens = np.array([len(i) for i in data])
    
    mask = np.arange(lens.max()) < lens[:, None]
    
    out = np.zeros(shape=(mask.shape + (max_length, len_words)), dtype='float32')
    
    out[mask] = np.concatenate(data)
    
    return out

In [33]:
def load_to_ram(batch_size, init):
    actDataX = []
    actDataY = []
    n_rows = batch_size
    for i in range(init, init+batch_size, 1):
        if i < len(dataX):
            actDataX.append(dataX[i])
            actDataY.append(dataY[i])
            n_rows -= 1
    if n_rows == 0:
        actDataX = numpy_fillna(actDataX)
        return True, actDataX, actDataY
    else:
        return False, actDataY, actDataY

In [18]:
def iterate_minibatch(batch_size, test=False):
    if not test:
        n_batch = int(len(dataX) * training_test // batch_size)
        
        for i in range(n_batch):
            go, dx, dy = load_to_ram(batch_size, i * batch_size)
            if go:
                yield dx, dy
    else:
        n_batch_init = int(len(dataX) * training_test // batch_size)
        n_batch_fin = int(len(dataX) // batch_size)
        for i in range(n_batch_init, n_batch_fin, 1):
            go, dx, dy = load_to_ram(batch_size, i * batch_size)
            if go:
                yield dx, dy

In [22]:
def linear(input_, output_size, scope=None):
    shape = input_.get_shape().as_list()
    input_size = shape[1]
    with tf.variable_scope(scope or "SimpleLinear"):
        matrix = tf.get_variable("Matrix", [output_size, input_size], dtype=input_.dtype)
        bias_term = tf.get_variable("Bias", [output_size], dtype=input_.dtype)
    return tf.add(tf.matmul(input_, matrix, transpose_b=True), bias_term)


def softmax(input_, out_dim, scope=None):
    with tf.variable_scope(scope or "Softmax"):
        W = tf.get_variable('W', [input_.get_shape()[1], out_dim])
        b = tf.get_variable('b', [out_dim])
    return tf.nn.softmax(tf.add(tf.matmul(input_, W), b))


def conv2d(input_, output_dim, k_h, k_w, name="conv2d"):
    with tf.variable_scope(name):
        w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim])
        b = tf.get_variable('b', [output_dim])
    return tf.add(tf.nn.conv2d(input_, w, strides=[1, 1, 1, 1], padding='VALID'), b)

In [23]:
def highway(input_, size, num_layers=1, bias=2.0, activation=tf.nn.relu, scope='Highway'):
    with tf.variable_scope(scope):
        for idx in range(num_layers):
            g = activation(linear(input_, size, scope="highway_lin_%d" % idx))
            t = tf.sigmoid(linear(input_, size, scope="highway_gate_%d" % idx) + bias)
            output = t * g + (1.0 - t) * input_
            input_ = output
        return output

In [24]:
def tdnn(input_, kernels, kernel_features, scope="TDNN"):
    input_ = tf.reshape(input_, [-1, max_length, len_words])
    input_ = tf.expand_dims(input_, 1)
    layers = []
    with tf.variable_scope(scope):
        for kernel_size, kernel_feature_size in zip(kernels, kernel_features):
            reduce_length = max_length - kernel_size + 1
            conv = conv2d(input_, kernel_feature_size, 1, kernel_size, name="kernel_%d" % kernel_size)
            pool = tf.nn.max_pool(tf.tanh(conv), [1, 1, reduce_length, 1], [1, 1, 1, 1], 'VALID')
            layers.append(tf.squeeze(pool, [1, 2]))
        if len(kernels) > 1:
            output = tf.concat(layers, 1)
        else:
            output = layers[0]
        return output

In [25]:
X = tf.placeholder('float32', shape=[None, None, max_length, len_words], name='X')
Y = tf.placeholder('float32', shape=[None, len_words], name='Y')

In [26]:
kernels = [1, 2, 3, 4, 5, 6, 7]
kernel_features = [25, 50, 75, 100, 125, 150, 175]
size = 700
rnn_size = 650
dropout = 0.0

In [27]:
cnn = tdnn(X, kernels, kernel_features)
cnn = highway(cnn, size)
cnn = tf.reshape(cnn, [batch_size, -1, size])

In [28]:
with tf.variable_scope("LSTM"):
    def create_rnn_cell():
        cell = rnn.BasicLSTMCell(rnn_size, state_is_tuple=True, forget_bias=0.0, reuse=False)
        if dropout > 0.0:
            cell = rnn.DropoutWrapper(cell, output_keep_prob=1. - dropout)
        return cell
    cell = create_rnn_cell()
    initial_rnn_state = cell.zero_state(batch_size, dtype="float32")
    
    outputs, final_rnn_state = tf.nn.dynamic_rnn(cell, cnn, initial_state=initial_rnn_state, dtype=tf.float32)
    outputs = tf.transpose(outputs, [1, 0, 2])
    last = outputs[-1]

prediction = softmax(last, len_words)

In [29]:
epochs = 1 # For simplicity I only use one epoch here, but you can change for any number
learning_rate = 0.0001
pred = prediction
cost = - tf.reduce_sum(Y * tf.log(tf.clip_by_value(pred, 1e-10, 1.0)))
predictor = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
acc = tf.reduce_mean(tf.cast(predictor, 'float32'))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [31]:
saver = tf.train.Saver()

In [34]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    best_acc = 0.0
    act_epoch = 0
    
    while act_epoch < epochs:
        loss = 0.0
        accuracy = 0.0
        batch = 1
        act_epoch += 1
        for iterator_train in iterate_minibatch(batch_size):
            data_in, data_out = iterator_train
            _, c, a = sess.run([optimizer, cost, acc], feed_dict={X: data_in, Y: data_out})
            loss += c
            accuracy += a
            print("act_epoch:", act_epoch, "batch:", batch, "loss:", loss/batch, "acc:", accuracy/batch)
            batch += 1
        
        if act_epoch == 1 or act_epoch % 10 == 0:
            accuracy_test = 0.0
            batch_test = 0.0
            for iterator_test in iterate_minibatch(batch_size, test=True):
                data_in, data_out = iterator_test
                a = sess.run(acc, feed_dict={X: data_in, Y: data_out})
                accuracy_test += a
                batch_test += 1
            print("Accuracy Test:", accuracy_test/batch_test)
        
        path = saver.save(sess, "./tmp/model")
        print("Saving in Epoch", act_epoch, "in path", path)

act_epoch: 1 batch: 1 loss: 276.936767578125 acc: 0.0
act_epoch: 1 batch: 2 loss: 275.8157043457031 acc: 0.0
act_epoch: 1 batch: 3 loss: 275.30810546875 acc: 0.015625
act_epoch: 1 batch: 4 loss: 274.83326721191406 acc: 0.0234375
act_epoch: 1 batch: 5 loss: 274.1716247558594 acc: 0.034375
act_epoch: 1 batch: 6 loss: 274.1794738769531 acc: 0.033854166666666664
act_epoch: 1 batch: 7 loss: 274.09073311941967 acc: 0.029017857142857144
act_epoch: 1 batch: 8 loss: 273.6947250366211 acc: 0.04296875
act_epoch: 1 batch: 9 loss: 272.74830457899304 acc: 0.04861111111111111
act_epoch: 1 batch: 10 loss: 272.4758544921875 acc: 0.0609375
act_epoch: 1 batch: 11 loss: 271.8074951171875 acc: 0.07102272727272728
act_epoch: 1 batch: 12 loss: 270.8001963297526 acc: 0.08463541666666667
act_epoch: 1 batch: 13 loss: 269.8661874624399 acc: 0.09375
act_epoch: 1 batch: 14 loss: 268.9802812848772 acc: 0.09821428571428571
act_epoch: 1 batch: 15 loss: 267.68515625 acc: 0.10520833333333333
act_epoch: 1 batch: 16 loss

act_epoch: 1 batch: 118 loss: 214.47042613918498 acc: 0.1517478813559322
act_epoch: 1 batch: 119 loss: 214.36064904477416 acc: 0.15152310924369747
act_epoch: 1 batch: 120 loss: 214.1375026702881 acc: 0.15182291666666667
act_epoch: 1 batch: 121 loss: 213.99678342204447 acc: 0.1518595041322314
act_epoch: 1 batch: 122 loss: 213.91456203773373 acc: 0.15176741803278687
act_epoch: 1 batch: 123 loss: 213.81012732032838 acc: 0.1521849593495935
act_epoch: 1 batch: 124 loss: 213.66326781242125 acc: 0.15209173387096775
act_epoch: 1 batch: 125 loss: 213.6062724609375 acc: 0.152125
act_epoch: 1 batch: 126 loss: 213.52723839169457 acc: 0.15215773809523808
act_epoch: 1 batch: 127 loss: 213.31397854061578 acc: 0.15231299212598426
act_epoch: 1 batch: 128 loss: 213.1184492111206 acc: 0.152587890625
act_epoch: 1 batch: 129 loss: 212.94167452080305 acc: 0.1527374031007752
act_epoch: 1 batch: 130 loss: 212.87886681189903 acc: 0.1527644230769231
act_epoch: 1 batch: 131 loss: 212.75014466729783 acc: 0.152910

act_epoch: 1 batch: 231 loss: 208.08402526327026 acc: 0.15739989177489178
act_epoch: 1 batch: 232 loss: 208.0532023988921 acc: 0.15746228448275862
act_epoch: 1 batch: 233 loss: 207.99548418429788 acc: 0.15745708154506438
act_epoch: 1 batch: 234 loss: 207.91334859733908 acc: 0.15751869658119658
act_epoch: 1 batch: 235 loss: 207.9443416514295 acc: 0.15764627659574468
act_epoch: 1 batch: 236 loss: 207.98007441375216 acc: 0.15750794491525424
act_epoch: 1 batch: 237 loss: 207.97498324752357 acc: 0.15763449367088608
act_epoch: 1 batch: 238 loss: 208.03111183743516 acc: 0.15743172268907563
act_epoch: 1 batch: 239 loss: 208.087757381934 acc: 0.15723064853556484
act_epoch: 1 batch: 240 loss: 208.02350686391193 acc: 0.15709635416666667
act_epoch: 1 batch: 241 loss: 207.96015100756128 acc: 0.15715767634854771
act_epoch: 1 batch: 242 loss: 207.886010288207 acc: 0.1572184917355372
act_epoch: 1 batch: 243 loss: 207.82083789213203 acc: 0.15727880658436214
act_epoch: 1 batch: 244 loss: 207.76221172145

NameError: name 'iterator_minibatch' is not defined