In [3]:
# load GloVe encodings

import tensorflow as tf
import numpy as np

print("Loading word vectors")
embeddings_index = {}
#dct = {}

with open("glove300.txt", 'r') as fh:
    for line in fh:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
        #dct[word] = values[1:]
        

print('Found %s word vectors.' % len(embeddings_index))


Loading word vectors
Found 400000 word vectors.


In [4]:
# load training data

print("loading training data...")
texts = []
titles = []
subtitles = []
with open('data/NewsArticles.csv', 'rb') as fh:
    lines = fh.readlines()
    #print(len(lines))
    for line in lines[1:129]:  #TODO change me back full dataset
        
        l = str(line)[2:]
        ID, date, link, rest =  l.split(",", 3)
        
        if rest.startswith('"'):
            [scrap, title, rest] = rest.split('"', 2)
        else:
            title, rest = rest.split(",", 1)
        
        if rest.startswith('"'):
            [scrap, subtitle, text] = rest.split('"', 2)
        else:
            subtitle, text = rest.split(",", 1)
        
        text = text.strip(",\\r\\n'")
       
        titles.append(title)
        subtitles.append(subtitle)
        texts.append(text)

num_samples = len(texts)
print("found {} texts".format(num_samples))
print("\n",texts[0])

loading training data...
found 128 texts

 "Michigan billionaire education activist Betsy DeVos was confirmed today to serve as the secretary of education in President Trump\'s administration, after Vice President Mike Pence cast a tie-breaking vote in the Senate. The Senate voted on DeVos""?highly contentious nomination this afternoon, and the tally was split evenly, requiring Pence to use his authority as president of the upper chamber of Congress to break the impasse. This was the first time that a vice president has broken a tie to confirm a Cabinet nominee. Pence read the vote count 50-50 and then voted himself, rendering the tally 51-50. The day before the vote, Democrats staged a 24-hour marathon of speeches, with more than 30 lawmakers taking to the floor to urge at least one additional Republican to vote against DeVos and block her confirmation. ""It is hard to imagine a worse choice,""?Sen. Elizabeth Warren, D-Mass., said before she read letters from constituents urging her t

In [5]:
# convert input sequences into tensors with padding

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

MAX_NUM_WORDS = 1024 # total number of words to work with (most common words from dataset)
MAX_SEQUENCE_LENGTH = 1024
test_percentage = 10

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts) # list of sequences (one per text input)
data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
word_index = tokenizer.word_index  # dictionary mapping words (str) to their rank/index (int)

print('Found %s unique tokens.' % len(word_index))
print('Shape of data tensor:', data.shape)
print("\n", data[1])

# shuffle
#indices = np.arange(data.shape[0])
#np.random.shuffle(indices)
#data = data[indices]

# separate test and training examples
#split = int(num_samples * ((100-test_percentage) / 100))
#res = np.array_split(data, split)
#training_samples = res[0]
#test_samples = res[1]

Found 9938 unique tokens.
Shape of data tensor: (128, 1024)

 [  0   0   0 ...,   4  35 661]


In [6]:
# variables

max_len = 1024
hidden_dimension = 300
epochs = 1
learning_rate = 0.01
EMBEDDING_DIM = 300
batch_size = 128

In [7]:
# compute embedding matrix
from keras.layers import Embedding

embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))

for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector
        
print(embedding_matrix.shape)
print(embedding_matrix)

# embedding layer

embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            name="Preprocessing",
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False,
                            batch_input_shape=(batch_size, 1024))

#print(embedding_layer.get_config())


(9939, 300)
[[ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.04656     0.21318001 -0.0074364  ...,  0.0090611  -0.20988999
   0.053913  ]
 [-0.25756001 -0.057132   -0.67189997 ..., -0.16043     0.046744   -0.070621  ]
 ..., 
 [-0.34492001 -0.14936    -0.27792001 ..., -0.02331     0.18252    -0.44894999]
 [ 0.32444    -0.13552999 -0.36107999 ..., -0.33045    -0.072577
  -0.49854001]
 [-0.069605    0.052601   -0.22163001 ..., -0.25538     0.14901     0.18018   ]]


In [8]:
# 2-layer RNN, each layer has 1024 GRU cells
# length of embeddings vectors = 1024
# dropout probability for cells in output layer of first layer = 0.2 
# mini-batch stochastic gradient descent
# anneal the learning rate regularly when validation set fails to improve
# gradients are backpropaged 35 time steps and clipped to max value = 0.25
# formula (2)
from keras.models import Sequential
from keras.layers import GRU, Dropout, Dense, Activation
from keras.optimizers import SGD
from keras.models import load_model

load_from_file = True
if load_from_file:
    model = load_model("saves/model-1513008967.3425572.hdf5")

else:
    dummy_labels = np.zeros((batch_size, 300)) # shouldn't need these

    print('Building STATEFUL model...')
    model = Sequential()
    model.add(embedding_layer)  # input is 2D (samples, indices), output is 3D (samples, sequence_length, embedding_dim)

    # Layer 1
    L1 = GRU(hidden_dimension, name="Layer_1", input_shape=(max_len, 300), return_sequences=True, stateful=True) # unroll=True ?
    #print(L1.get_config())
    model.add(L1)

    # Layer 2
    L2 = GRU(hidden_dimension, name="Layer_2", input_shape=(max_len, 300), return_sequences=False, stateful=True, dropout=0.2)
    model.add(L2)
    # Output logits
    #L3 = Dense(1, name="Logits_layer", activation='softmax')
    #L3 = Activation('softmax')
    #model.add(L3) # outputs result of softmax(dot(input, kernel) + bias)


    print("Input data shape: ", data.shape)
    print("Layer 0 input shape: ", embedding_layer.input_shape)
    print("Layer 0 output shape: ", embedding_layer.output_shape)
    print("Layer 1 input shape: ", L1.input_shape)
    print("Layer 1 output shape: ", L1.output_shape)
    print("Layer 2 input shape: ", L2.input_shape)
    print("Layer 2 output shape: ", L2.output_shape)




    #model.add(Dropout(0.2))
    #model.add(GRU(hidden_dimension, input_shape=(max_len, 300), return_sequences=False, stateful=False))
    #model.add(Dense(1024, activation='softmax')) # preds

    sgd = SGD(learning_rate, momentum=0.0, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    # train
    output = model.fit(data, dummy_labels, batch_size=batch_size, epochs=epochs, shuffle=False)

    import time
    now = int(time.time())
    model.save("saves/model-{}.hdf5".format(str(now)))
    
print("\n", model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Preprocessing (Embedding)    (128, 1024, 300)          2981700   
_________________________________________________________________
Layer_1 (GRU)                (128, 1024, 300)          540900    
_________________________________________________________________
Layer_2 (GRU)                (128, 300)                540900    
Total params: 4,063,500
Trainable params: 1,081,800
Non-trainable params: 2,981,700
_________________________________________________________________

 None


In [11]:
# Re-define model for predictions with batch size 1

n_batch = 1
new_model = Sequential()

new_embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            name="Preprocessing_new",
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False,
                            batch_input_shape=(n_batch, 1024))

new_model.add(new_embedding_layer)  # input is 2D (samples, indices), output is 3D (samples, sequence_length, embedding_dim)

# Layer 1
new_L1 = GRU(hidden_dimension, name="Layer_1_new", input_shape=(max_len, 300), return_sequences=True, stateful=True) # unroll=True ?
new_model.add(new_L1)

# Layer 2
new_L2 = GRU(hidden_dimension, name="Layer_2_new", input_shape=(max_len, 300), return_sequences=False, stateful=True, dropout=0.2)
new_model.add(new_L2)

sgd = SGD(learning_rate, momentum=0.0, decay=0.0, nesterov=False)

# copy weights
old_weights = model.get_weights()
new_model.set_weights(old_weights)
new_model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
print("done")

done


In [13]:
# make predictions 
# do beam search here instead

import keras.preprocessing.text

print("input: ", data[0])
prediction = new_model.predict(np.array(data[0])[np.newaxis])[0]
print("prediction: ", prediction)

items_iterator = embeddings_index.items()
inverse_dictionary = dct #{v: k for k, v in items_iterator}

word_vectors = embeddings_index.values()

dst = (np.dot(word_vectors, prediction)
               / np.linalg.norm(word_vectors, axis=1)
               / np.linalg.norm(prediction))

word_ids = np.argsort(-dst)

x = prediction
print([(inverse_dictionary[x], dst[x]) for x in word_ids[:5] if x in self.inverse_dictionary])


input:  [  0   0   0 ...,   2  29 150]
prediction:  [ 0.04572431  0.01570713  0.05382569 -0.11145879  0.1462418   0.04722214
 -0.04859181 -0.10704654 -0.09107925 -0.01094797 -0.10073339 -0.04246584
  0.13497752 -0.10365248 -0.23515652 -0.13684878 -0.20667528  0.00721037
  0.17974156 -0.15897979  0.14913592  0.14937566  0.01850339  0.11201054
  0.2268997  -0.14638424 -0.20244944  0.24165636  0.36174268  0.04289497
 -0.00321247 -0.09621538 -0.01938743 -0.03167554 -0.17574283  0.14660315
 -0.05914986 -0.06970091 -0.10216457  0.0587291   0.0356455  -0.008211
 -0.22130033 -0.04675016 -0.11456672 -0.10555627 -0.09474204 -0.04943121
  0.14787239 -0.11926004  0.06533959  0.27145827 -0.21120021 -0.07077575
  0.00621931 -0.071443    0.09603091  0.1817199   0.17582867 -0.00249821
 -0.05660181  0.27953646 -0.05721919  0.2707743   0.06069357 -0.16527149
 -0.16364256  0.0371842  -0.26538384 -0.04680658 -0.03583626 -0.04456161
  0.14272247  0.05205672 -0.20102808  0.02758783 -0.06952568  0.17310834
 

TypeError: unsupported operand type(s) for *: 'dict_values' and 'float'

In [33]:
# beam search decoder
sequence_length=1024

decoded, log_prob = tf.nn.ctc_beam_search_decoder(
    output,
    sequence_length,
    beam_width=8,
    top_paths=1,
    merge_repeated=True
)

#K.get_value(dec[0])

TypeError: Expected float32 passed to parameter 'inputs' of op 'CTCBeamSearchDecoder', got <keras.callbacks.History object at 0x7f4132f51358> of type 'History' instead.

In [None]:
# use reverse of word_index.items() to lookup words from integer ID

def keras_rnn_predict(samples, empty=0, rnn_model=new_model):
    """for every sample, calculate probability for every possible label
    you need to supply your RNN model and maxlen - the length of sequences it can handle
    """
    res = []
    inputs = pad_sequences(samples, maxlen=MAX_SEQUENCE_LENGTH)
    for i in inputs:
        res.append(rnn_model.predict(np.array(i)[np.newaxis], verbose=0))
    
    return np.array(res)

def beamsearch(predict=keras_rnn_predict, k=8, maxsample=400, use_unk=False, oov=None, empty=1, eos="eos"):
    """return k samples (beams) and their NLL scores, each sample is a sequence of labels,
    all samples starts with an `empty` label and end with `eos` or truncated to length of `maxsample`.
    You need to supply `predict` which returns the label probability of each sample.
    `use_unk` allow usage of `oov` (out-of-vocabulary) label in samples
    """
    
    dead_k = 0 # samples that reached eos
    dead_samples = []
    dead_scores = []
    live_k = 1 # samples that did not yet reached eos
    live_samples = [[empty]]
    live_scores = [0]

    while live_k and dead_k < k:
        # for every possible live sample calc prob for every possible label 
        probs = predict(live_samples, empty=empty)

        # total score for every sample is sum of -log of word prb
        cand_scores = np.array(live_scores)[:,None] - np.log(probs)
        if not use_unk and oov is not None:
            cand_scores[:,oov] = 1e20
        cand_flat = cand_scores.flatten()

        # find the best (lowest) scores we have from all possible samples and new words
        ranks_flat = cand_flat.argsort()[:(k-dead_k)]
        live_scores = cand_flat[ranks_flat]

        # append the new words to their appropriate live sample
        voc_size = probs.shape[1]
        live_samples = [live_samples[r//voc_size]+[r%voc_size] for r in ranks_flat]

        # live samples that should be dead are...
        zombie = [s[-1] == eos or len(s) >= maxsample for s in live_samples]
        
        # add zombies to the dead
        dead_samples += [s for s,z in zip(live_samples,zombie) if z]  # remove first label == empty
        dead_scores += [s for s,z in zip(live_scores,zombie) if z]
        dead_k = len(dead_samples)
        # remove zombies from the living 
        live_samples = [s for s,z in zip(live_samples,zombie) if not z]
        live_scores = [s for s,z in zip(live_scores,zombie) if not z]
        live_k = len(live_samples)

    return dead_samples + live_samples, dead_scores + live_scores 

print(beamsearch())

In [47]:
# encode


# 2-layer RNN, each layer has 1024 GRU cells
# length of embeddings vectors = 1024
# dropout probability for cells in output layer of first layer = 0.2 
# mini-batch stochastic gradient descent
# anneal the learning rate regularly when validation set fails to improve
# gradients are backpropaged 35 time steps and clipped to max value = 0.25
# formula (2)

from keras.models import Sequential
from keras.layers import GRUCell
from keras.layers import RNN
from keras.optimizers import SGD
from keras.layers import Input
import keras

learning_rate = 1 #0.01 #1e-6
clip_norm = 1.0 # ???


sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)


# Model
#model = Sequential()

# Cells
layer_one_cells = []
for _ in range(0, 1024):
    layer_one_cells.append(GRUCell(300, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', dropout=0.2, recurrent_dropout=0.0))

layer_two_cells = []  
for _ in range(0, 1024):
    layer_two_cells.append(GRUCell(300, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros'))

                           
# Layers
#one = RNN(layer_one_cells, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False)
#two = RNN(layer_two_cells, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False)

inputs = keras.Input((60, 300))
one = keras.layers.RNN(layer_one_cells)(inputs)
#two = keras.layers.RNN(layer_two_cells)(inputs)

model = keras.models.Model(inputs, one)

#model.add(embedding_layer)
#model.add(one)
#model.add(two)



#rnn.add(GRU(1024, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', dropout=0.0, recurrent_dropout=0.0))

sgd = SGD(learning_rate, momentum=0.0, decay=0.0, nesterov=False)

model.compile(loss='mse',
            optimizer=sgd,
            metrics=['accuracy'])

model.fit(embedded_sequences, None, epochs=1, batch_size=1024, verbose=2)

#scores = rnn.evaluate(x_test, y_test, verbose=0)
#print('IRNN test score:', scores[0])
#print('IRNN test accuracy:', scores[1])

TypeError: 'NoneType' object is not subscriptable

NameError: name 'ctc_beam_search_decoder' is not defined

In [1]:
texts = []
with open('data/NewsArticles.csv', 'rb') as fh:
    lines = fh.readlines()
    #print(len(lines))
    for line in lines:
        
        l = str(line)[2:]
        ID, date, link, rest =  l.split(",", 3)
        
        if rest.startswith('"'):
            [scrap, title, rest] = rest.split('"', 2)
        else:
            title, rest = rest.split(",", 1)
        
        if rest.startswith('"'):
            [scrap, subtitle, text] = rest.split('"', 2)
        else:
            subtitle, text = rest.split(",", 1)
        
        text = text.strip(",\\r\\n'")
       
        print(title)
        texts.append(text)
    #print(ID, date, link, title) 
    #print(subtitle, text)

title
Betsy DeVos Confirmed as Education Secretary, With Pence Casting Historic Tie-Breaking Vote
Melania Trump Says White House Could Mean Millions for Brand
As Trump Fears Fraud, GOP Eliminates Election Commission
Appeals Court to Decide on Challenge to Trump\'s Immigration Executive Order
At Least 4 Tornadoes Reported in Southeast Louisiana
Mother of Backpacker Slain in Australia Criticizes Trump
Trump\'s Labor Secretary Pick Andrew Puzder Admits to Employing Undocumented Worker
Iran\'s Top Leader Mocks \'Newcomer\' Trump
EU to Britain: Pay Up for What You Ordered Before Leaving
Multi-State Manhunt in Southeast Intensifies for Alleged Murderer and Accomplice
Flu Takes a Toll in NYC With 4 Children Reported Dead in Seasonal Outbreak
Romania Protests Endure as President Says Country in Crisis
Hillary Clinton Releases Video Statement: \'Future Is Female\'
Homeland Security Secretary John Kelly Defends Travel Ban but Regrets Quick Rollout
2 Other Times Kellyanne Conway Referred to Bowli

ANALYSIS: President Trump brings new tone to old promises
Lawmakers react to Trump\'s joint address to Congress
Celebrities, other public figures react to President Trump\'s joint address to Congress
Rosie O\'Donnell leads anti-Trump protest in DC ahead of congressional address
Chicago mayor slams Trump over remarks about city\'s murder rate during congressional address
Ex-governor Steve Beshear defends Obamacare in response to Trump\'s speech
House GOP rejects Dem effort for info on Trump-Russia probes
Houston police search for suspect \'potentially\' linked to officers\' shooting
Family was aboard the plane that crashed into a Southern California home, killing 3, authorities say
Man accused in fatal 2014 moviegoer shooting: \'It was his life or mine\'
Efforts to repeal NC \'bathroom bill\' are an at impasse again
Fallen Navy SEAL\'s widow receives standing ovation during Trump\'s address
Trump responds to father of Navy SEAL killed in Yemen raid
Yemen raid yielded \'valuable intellig

Nunes: Intel at White House came from Executive Branch
Kushner offers to meet with Senate Intel Committee over Russia meetings
Cheney: Putin made \'a very serious effort\' to interfere in US election
WikiLeaks denies Roger Stone\'s claim of backchannel to the group
North Korea conducts another missile engine test, US officials say
Cyclone Debbie prompts evacuations in Queensland, Australia
First on CNN: Border wall ask: $1 billion for 62 miles
Hong Kong escalator malfunction: 2 mechanics arrested
Elon Musk Just Launched A New Startup
In Breakthrough Discovery, Scientists Mass-Produce Artificial Blood
White Man Accused Of Fatally Stabbing Black New Yorker Is Charged With Terrorism
Alyssa Milano Wants To Drive You To The Polls In Georgia
Katie Cassidy Is Coming Back To \'Arrow,\' But It\'s Not Quite How You\'d Think
These Women are Bringing Disabilities Education to YouTube
Maybe Getting Tight With Rep. Devin Nunes Wasn
Barely Anyone Is Mourning The Demise Of The GOP\'s Health Care Bill


In [2]:
'''Example script to generate text from Nietzsche's writings.
At least 20 epochs are required before the generated text"
starts sounding coherent.
It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.
If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.

source: https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py
'''

from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import GRU
from keras.optimizers import SGD
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys

#path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
#text = open(path).read().lower()

text = " ".join(texts)
print('corpus length:', len(text))


chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Using TensorFlow backend.


corpus length: 12808990
total chars: 94
nb sequences: 4269650
Vectorization...


In [None]:



# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(GRU(1024, input_shape=(maxlen, len(chars))))#, dropout=0.2)
model.add(GRU(1024, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

#optimizer = RMSprop(lr=0.01)
sgd = SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
model.compile(loss='categorical_crossentropy', optimizer=sgd)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

# train the model, output generated text after each iteration
for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    start_index = random.randint(0, len(text) - maxlen - 1)

    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print()
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [4]:

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import SimpleRNN
from keras import initializers
from keras.optimizers import RMSprop

batch_size = 32
num_classes = 10
epochs = 50
hidden_units = 100

learning_rate = 1e-6
clip_norm = 1.0

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], -1, 1)
x_test = x_test.reshape(x_test.shape[0], -1, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print('Evaluate IRNN...')
model = Sequential()
model.add(SimpleRNN(hidden_units,
                    kernel_initializer=initializers.RandomNormal(stddev=0.001),
                    recurrent_initializer=initializers.Identity(gain=1.0),
                    activation='relu',
                    input_shape=x_train.shape[1:]))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
rmsprop = RMSprop(lr=learning_rate)
model.compile(loss='categorical_crossentropy',
              optimizer=rmsprop,
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

scores = model.evaluate(x_test, y_test, verbose=0)
print('IRNN test score:', scores[0])
print('IRNN test accuracy:', scores[1])

x_train shape: (60000, 784, 1)
60000 train samples
10000 test samples
Evaluate IRNN...
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
IRNN test score: 1.99227425728
IRNN test accuracy: 0.2543


In [None]:
# length model
length_model = Sequential()

In [None]:
# repetition model
repetition_model = Sequential()

In [None]:
# ...

In [None]:
# entailment model
# rate=0.0005, batch size=128
#tf.train.AdamOptimizer
entailment_model = Sequential()

In [None]:
# relevance model
# train with ADAM (rate=0.001, dropout=0.5)
# 1-D convolution layer with filter size 3, stride 1
# pad sequences so that output is same length as input

In [None]:
# scrap

# 2-layer RNN, each layer has 1024 GRU cells
"""
units_per_layer = 1024
units_per_cell = 1024 # maybe should be 300?
layer_one_dropout = 0.2

#learning_rate = 1 # find init value, anneal

layer_one_cells = []
layer_two_cells = []

for _ in range(0, units_per_layer):
    cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.GRUCell(units_per_cell), output_keep_prob=1.0-layer_one_dropout)
    layer_one_cells.append(cell)

for _ in range(0, units_per_layer):
    cell = tf.contrib.rnn.GRUCell(units_per_cell)
    layer_two_cells.append(cell)
    
first = tf.contrib.rnn.MultiRNNCell(layer_one_cells)
second = tf.contrib.rnn.MultiRNNCell(layer_two_cells)

# TODO connect layers
"""

In [6]:
# embed words into 300-D vectors initialized with GloVe encodings
# embedding_matrix is a tensor of shape [vocabulary_size, embedding size] => [?????, 300]

word_embeddings = tf.nn.embedding_lookup(vectors, word_ids)  # returns A Tensor with the same type as the tensors in params

NameError: name 'word_ids' is not defined