In [1]:
import pyprind
import pandas as pd
from string import punctuation
import re
import numpy as np

In [2]:
df = pd.read_csv('train.csv', encoding='utf-8')
df = df.drop(['id'], axis=1)
print(df.head())

                                                text author
0  This process, however, afforded me no means of...    EAP
1  It never once occurred to me that the fumbling...    HPL
2  In his left hand was a gold snuff box, from wh...    EAP
3  How lovely is spring As we looked from Windsor...    MWS
4  Finding nothing else, not even gold, the Super...    HPL


In [3]:
df.shape

(19579, 2)

In [4]:
author_mapping = {label:idx for idx,label in enumerate(np.unique(df['author']))}
author_mapping

{u'EAP': 0, u'HPL': 1, u'MWS': 2}

In [5]:
df['author'] = df['author'].map(author_mapping)
print(df.head())

                                                text  author
0  This process, however, afforded me no means of...       0
1  It never once occurred to me that the fumbling...       1
2  In his left hand was a gold snuff box, from wh...       0
3  How lovely is spring As we looked from Windsor...       2
4  Finding nothing else, not even gold, the Super...       1


In [6]:
## Preprocessing the data:
## Separate words and 
## count each word's occurrence

from collections import Counter

counts = Counter()
pbar = pyprind.ProgBar(len(df['text']),
                       title='Counting words occurences')
for i,review in enumerate(df['text']):
    text = ''.join([c if c not in punctuation else ' '+c+' ' \
                    for c in review]).lower()
    df.loc[i,'text'] = text
    pbar.update()
    counts.update(text.split())

Counting words occurences
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:18


In [7]:
## Create a mapping:
## Map each unique word to an integer

word_counts = sorted(counts, key=counts.get, reverse=True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}


mapped_reviews = []
pbar = pyprind.ProgBar(len(df['text']),
                       title='Map reviews to ints')
for review in df['text']:
    mapped_reviews.append([word_to_int[word] for word in review.split()])
    pbar.update()

Map reviews to ints


[u',', u'the', u'of', u'.', u'and']


0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


In [8]:
## Define fixed-length sequences:
## Use the last 200 elements of each sequence
## if sequence length < 200: left-pad with zeros

sequence_length = 300  ## sequence length (or T in our formulas)
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype=int)
for i, row in enumerate(mapped_reviews):
    review_arr = np.array(row)
    sequences[i, -len(row):] = review_arr[-sequence_length:]

X_train = sequences[:15500, :]
y_train = df.loc[:15500, 'author'].values
X_test = sequences[15500:, :]
y_test = df.loc[15500:, 'author'].values


np.random.seed(123) # for reproducibility

## Function to generate minibatches:
def create_batch_generator(x, y=None, batch_size=64):
    n_batches = len(x)//batch_size
    x= x[:n_batches*batch_size]
    if y is not None:
        y = y[:n_batches*batch_size]
    for ii in range(0, len(x), batch_size):
        if y is not None:
            yield x[ii:ii+batch_size], y[ii:ii+batch_size]
        else:
            yield x[ii:ii+batch_size]

In [9]:
import tensorflow as tf


class SentimentRNN(object):
    def __init__(self, n_words, seq_len=200,
                 lstm_size=256, num_layers=1, batch_size=64,
                 learning_rate=0.0001, embed_size=200):
        self.n_words = n_words
        self.seq_len = seq_len
        self.lstm_size = lstm_size   ## number of hidden units
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.embed_size = embed_size

        self.g = tf.Graph()
        with self.g.as_default():
            tf.set_random_seed(123)
            self.build()
            self.saver = tf.train.Saver()
            self.init_op = tf.global_variables_initializer()

    def build(self):
        ## Define the placeholders
        tf_x = tf.placeholder(tf.int32,
                    shape=(self.batch_size, self.seq_len),
                    name='tf_x')
        tf_y = tf.placeholder(tf.float32,
                    shape=(self.batch_size),
                    name='tf_y')
        tf_keepprob = tf.placeholder(tf.float32,
                    name='tf_keepprob')
        ## Create the embedding layer
        embedding = tf.Variable(
                    tf.random_uniform(
                        (self.n_words, self.embed_size),
                        minval=-1, maxval=1),
                    name='embedding')
        embed_x = tf.nn.embedding_lookup(
                    embedding, tf_x, 
                    name='embeded_x')

        ## Define LSTM cell and stack them together
        cells = tf.contrib.rnn.MultiRNNCell(
                [tf.contrib.rnn.DropoutWrapper(
                   tf.contrib.rnn.BasicLSTMCell(self.lstm_size),
                   output_keep_prob=tf_keepprob)
                 for i in range(self.num_layers)])

        ## Define the initial state:
        self.initial_state = cells.zero_state(
                 self.batch_size, tf.float32)
        print('  << initial state >> ', self.initial_state)

        lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
                 cells, embed_x,
                 initial_state=self.initial_state)
        ## Note: lstm_outputs shape: 
        ##  [batch_size, max_time, cells.output_size]
        print('\n  << lstm_output   >> ', lstm_outputs)
        print('\n  << final state   >> ', self.final_state)

        ## Apply a FC layer after on top of RNN output:
        logits = tf.layers.dense(
                 inputs=lstm_outputs[:, -1],
                 units=1, activation=None,
                 name='logits')
        
        logits = tf.squeeze(logits, name='logits_squeezed')
        print ('\n  << logits        >> ', logits)
        
        y_proba = tf.nn.sigmoid(logits, name='probabilities')
        predictions = {
            'probabilities': y_proba,
            'labels' : tf.cast(tf.round(y_proba), tf.int32,
                 name='labels')
        }
        print('\n  << predictions   >> ', predictions)

        ## Define the cost function
        cost = tf.reduce_mean(
                 tf.nn.sigmoid_cross_entropy_with_logits(
                 labels=tf_y, logits=logits),
                 name='cost')
        
        ## Define the optimizer
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        train_op = optimizer.minimize(cost, name='train_op')

    def train(self, X_train, y_train, num_epochs):
        with tf.Session(graph=self.g) as sess:
            sess.run(self.init_op)
            iteration = 1
            for epoch in range(num_epochs):
                state = sess.run(self.initial_state)
                
                for batch_x, batch_y in create_batch_generator(
                            X_train, y_train, self.batch_size):
                    feed = {'tf_x:0': batch_x,
                            'tf_y:0': batch_y,
                            'tf_keepprob:0': 0.5,
                            self.initial_state : state}
                    loss, _, state = sess.run(
                            ['cost:0', 'train_op', 
                             self.final_state],
                            feed_dict=feed)

                    if iteration % 20 == 0:
                        print("Epoch: %d/%d Iteration: %d "
                              "| Train loss: %.5f" % (
                               epoch + 1, num_epochs,
                               iteration, loss))

                    iteration +=1
                if (epoch+1)%10 == 0:
                    self.saver.save(sess,
                        "model/spooky-%d.ckpt" % epoch)

    def predict(self, X_data, return_proba=False):
        preds = []
        with tf.Session(graph = self.g) as sess:
            self.saver.restore(
                sess, tf.train.latest_checkpoint('model/'))
            test_state = sess.run(self.initial_state)
            for ii, batch_x in enumerate(
                create_batch_generator(
                    X_data, None, batch_size=self.batch_size), 1):
                feed = {'tf_x:0' : batch_x,
                        'tf_keepprob:0': 1.0,
                        self.initial_state : test_state}
                if return_proba:
                    pred, test_state = sess.run(
                        ['probabilities:0', self.final_state],
                        feed_dict=feed)
                else:
                    pred, test_state = sess.run(
                        ['labels:0', self.final_state],
                        feed_dict=feed)
                    
                preds.append(pred)
                
        return np.concatenate(preds)

In [10]:
## Train:

n_words = max(list(word_to_int.values())) + 1

rnn = SentimentRNN(n_words=n_words, 
                   seq_len=sequence_length,
                   embed_size=256, 
                   lstm_size=128, 
                   num_layers=1, 
                   batch_size=100, 
                   learning_rate=0.001)

('  << initial state >> ', (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 128) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 128) dtype=float32>),))
('\n  << lstm_output   >> ', <tf.Tensor 'rnn/transpose:0' shape=(100, 300, 128) dtype=float32>)
('\n  << final state   >> ', (LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(100, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(100, 128) dtype=float32>),))
('\n  << logits        >> ', <tf.Tensor 'logits_squeezed:0' shape=(100,) dtype=float32>)
('\n  << predictions   >> ', {'probabilities': <tf.Tensor 'probabilities:0' shape=(100,) dtype=float32>, 'labels': <tf.Tensor 'labels:0' shape=(100,) dtype=int32>})


In [38]:
rnn.train(X_train, y_train, num_epochs=200)

Epoch: 1/200 Iteration: 20 | Train loss: 0.69409
Epoch: 1/200 Iteration: 40 | Train loss: -0.82173
Epoch: 1/200 Iteration: 60 | Train loss: 1.42024
Epoch: 1/200 Iteration: 80 | Train loss: 0.14882
Epoch: 1/200 Iteration: 100 | Train loss: -0.23103
Epoch: 1/200 Iteration: 120 | Train loss: -0.44219
Epoch: 1/200 Iteration: 140 | Train loss: -0.88181
Epoch: 2/200 Iteration: 160 | Train loss: -2.65125
Epoch: 2/200 Iteration: 180 | Train loss: -1.74752
Epoch: 2/200 Iteration: 200 | Train loss: -1.28096
Epoch: 2/200 Iteration: 220 | Train loss: -0.95793
Epoch: 2/200 Iteration: 240 | Train loss: -1.11271
Epoch: 2/200 Iteration: 260 | Train loss: -2.00011
Epoch: 2/200 Iteration: 280 | Train loss: -3.62694
Epoch: 2/200 Iteration: 300 | Train loss: -4.84606
Epoch: 3/200 Iteration: 320 | Train loss: -8.56320
Epoch: 3/200 Iteration: 340 | Train loss: -9.22080
Epoch: 3/200 Iteration: 360 | Train loss: -9.36788
Epoch: 3/200 Iteration: 380 | Train loss: -5.81568
Epoch: 3/200 Iteration: 400 | Train lo

Epoch: 21/200 Iteration: 3120 | Train loss: -84.72800
Epoch: 21/200 Iteration: 3140 | Train loss: -158.84778
Epoch: 21/200 Iteration: 3160 | Train loss: -102.89279
Epoch: 21/200 Iteration: 3180 | Train loss: -112.02808
Epoch: 21/200 Iteration: 3200 | Train loss: -151.55048
Epoch: 21/200 Iteration: 3220 | Train loss: -116.63463
Epoch: 21/200 Iteration: 3240 | Train loss: -149.84395
Epoch: 22/200 Iteration: 3260 | Train loss: -156.54411
Epoch: 22/200 Iteration: 3280 | Train loss: -116.06241
Epoch: 22/200 Iteration: 3300 | Train loss: -118.04439
Epoch: 22/200 Iteration: 3320 | Train loss: -98.18291
Epoch: 22/200 Iteration: 3340 | Train loss: -149.24576
Epoch: 22/200 Iteration: 3360 | Train loss: -86.41217
Epoch: 22/200 Iteration: 3380 | Train loss: -138.67598
Epoch: 22/200 Iteration: 3400 | Train loss: -138.65640
Epoch: 23/200 Iteration: 3420 | Train loss: -140.86830
Epoch: 23/200 Iteration: 3440 | Train loss: -160.39996
Epoch: 23/200 Iteration: 3460 | Train loss: -154.43222
Epoch: 23/200

Epoch: 40/200 Iteration: 6120 | Train loss: -210.45137
Epoch: 40/200 Iteration: 6140 | Train loss: -203.13275
Epoch: 40/200 Iteration: 6160 | Train loss: -238.79562
Epoch: 40/200 Iteration: 6180 | Train loss: -241.44379
Epoch: 40/200 Iteration: 6200 | Train loss: -259.45413
Epoch: 41/200 Iteration: 6220 | Train loss: -168.92656
Epoch: 41/200 Iteration: 6240 | Train loss: -312.66031
Epoch: 41/200 Iteration: 6260 | Train loss: -204.52660
Epoch: 41/200 Iteration: 6280 | Train loss: -207.61021
Epoch: 41/200 Iteration: 6300 | Train loss: -286.19144
Epoch: 41/200 Iteration: 6320 | Train loss: -234.68797
Epoch: 41/200 Iteration: 6340 | Train loss: -293.24850
Epoch: 42/200 Iteration: 6360 | Train loss: -297.83884
Epoch: 42/200 Iteration: 6380 | Train loss: -214.51717
Epoch: 42/200 Iteration: 6400 | Train loss: -221.60999
Epoch: 42/200 Iteration: 6420 | Train loss: -178.72217
Epoch: 42/200 Iteration: 6440 | Train loss: -282.05109
Epoch: 42/200 Iteration: 6460 | Train loss: -164.21152
Epoch: 42/

Epoch: 59/200 Iteration: 9100 | Train loss: -305.66269
Epoch: 59/200 Iteration: 9120 | Train loss: -378.29504
Epoch: 59/200 Iteration: 9140 | Train loss: -421.01102
Epoch: 60/200 Iteration: 9160 | Train loss: -328.42242
Epoch: 60/200 Iteration: 9180 | Train loss: -352.84875
Epoch: 60/200 Iteration: 9200 | Train loss: -378.16086
Epoch: 60/200 Iteration: 9220 | Train loss: -309.84204
Epoch: 60/200 Iteration: 9240 | Train loss: -292.35358
Epoch: 60/200 Iteration: 9260 | Train loss: -362.27985
Epoch: 60/200 Iteration: 9280 | Train loss: -359.94772
Epoch: 60/200 Iteration: 9300 | Train loss: -370.31821
Epoch: 61/200 Iteration: 9320 | Train loss: -257.53232
Epoch: 61/200 Iteration: 9340 | Train loss: -461.70844
Epoch: 61/200 Iteration: 9360 | Train loss: -317.67194
Epoch: 61/200 Iteration: 9380 | Train loss: -312.26242
Epoch: 61/200 Iteration: 9400 | Train loss: -413.22437
Epoch: 61/200 Iteration: 9420 | Train loss: -342.39243
Epoch: 61/200 Iteration: 9440 | Train loss: -418.99228
Epoch: 62/

Epoch: 78/200 Iteration: 12060 | Train loss: -472.04272
Epoch: 78/200 Iteration: 12080 | Train loss: -449.42239
Epoch: 79/200 Iteration: 12100 | Train loss: -484.46829
Epoch: 79/200 Iteration: 12120 | Train loss: -574.09436
Epoch: 79/200 Iteration: 12140 | Train loss: -563.89276
Epoch: 79/200 Iteration: 12160 | Train loss: -407.59720
Epoch: 79/200 Iteration: 12180 | Train loss: -281.75214
Epoch: 79/200 Iteration: 12200 | Train loss: -414.62915
Epoch: 79/200 Iteration: 12220 | Train loss: -514.49390
Epoch: 79/200 Iteration: 12240 | Train loss: -551.14655
Epoch: 80/200 Iteration: 12260 | Train loss: -457.55875
Epoch: 80/200 Iteration: 12280 | Train loss: -482.75485
Epoch: 80/200 Iteration: 12300 | Train loss: -493.55380
Epoch: 80/200 Iteration: 12320 | Train loss: -415.52945
Epoch: 80/200 Iteration: 12340 | Train loss: -392.02536
Epoch: 80/200 Iteration: 12360 | Train loss: -471.07785
Epoch: 80/200 Iteration: 12380 | Train loss: -470.03455
Epoch: 80/200 Iteration: 12400 | Train loss: -48

Epoch: 97/200 Iteration: 15000 | Train loss: -540.07849
Epoch: 97/200 Iteration: 15020 | Train loss: -682.94702
Epoch: 98/200 Iteration: 15040 | Train loss: -712.49127
Epoch: 98/200 Iteration: 15060 | Train loss: -512.57642
Epoch: 98/200 Iteration: 15080 | Train loss: -507.34726
Epoch: 98/200 Iteration: 15100 | Train loss: -431.35477
Epoch: 98/200 Iteration: 15120 | Train loss: -643.26361
Epoch: 98/200 Iteration: 15140 | Train loss: -404.47015
Epoch: 98/200 Iteration: 15160 | Train loss: -570.63879
Epoch: 98/200 Iteration: 15180 | Train loss: -583.71075
Epoch: 99/200 Iteration: 15200 | Train loss: -599.40820
Epoch: 99/200 Iteration: 15220 | Train loss: -747.99915
Epoch: 99/200 Iteration: 15240 | Train loss: -709.73853
Epoch: 99/200 Iteration: 15260 | Train loss: -497.85129
Epoch: 99/200 Iteration: 15280 | Train loss: -367.35342
Epoch: 99/200 Iteration: 15300 | Train loss: -533.15643
Epoch: 99/200 Iteration: 15320 | Train loss: -620.99860
Epoch: 99/200 Iteration: 15340 | Train loss: -70

Epoch: 116/200 Iteration: 17900 | Train loss: -598.86212
Epoch: 116/200 Iteration: 17920 | Train loss: -570.36224
Epoch: 116/200 Iteration: 17940 | Train loss: -696.55029
Epoch: 116/200 Iteration: 17960 | Train loss: -701.47937
Epoch: 116/200 Iteration: 17980 | Train loss: -715.45264
Epoch: 117/200 Iteration: 18000 | Train loss: -496.88962
Epoch: 117/200 Iteration: 18020 | Train loss: -848.84564
Epoch: 117/200 Iteration: 18040 | Train loss: -587.35052
Epoch: 117/200 Iteration: 18060 | Train loss: -604.29382
Epoch: 117/200 Iteration: 18080 | Train loss: -779.87439
Epoch: 117/200 Iteration: 18100 | Train loss: -632.20349
Epoch: 117/200 Iteration: 18120 | Train loss: -806.29626
Epoch: 118/200 Iteration: 18140 | Train loss: -842.49969
Epoch: 118/200 Iteration: 18160 | Train loss: -591.38593
Epoch: 118/200 Iteration: 18180 | Train loss: -613.04584
Epoch: 118/200 Iteration: 18200 | Train loss: -510.23962
Epoch: 118/200 Iteration: 18220 | Train loss: -784.87531
Epoch: 118/200 Iteration: 18240

Epoch: 135/200 Iteration: 20780 | Train loss: -805.47461
Epoch: 135/200 Iteration: 20800 | Train loss: -1005.35205
Epoch: 135/200 Iteration: 20820 | Train loss: -950.52393
Epoch: 135/200 Iteration: 20840 | Train loss: -699.00720
Epoch: 135/200 Iteration: 20860 | Train loss: -473.12469
Epoch: 135/200 Iteration: 20880 | Train loss: -716.05420
Epoch: 135/200 Iteration: 20900 | Train loss: -853.00763
Epoch: 135/200 Iteration: 20920 | Train loss: -984.74249
Epoch: 136/200 Iteration: 20940 | Train loss: -778.57794
Epoch: 136/200 Iteration: 20960 | Train loss: -805.37689
Epoch: 136/200 Iteration: 20980 | Train loss: -840.93237
Epoch: 136/200 Iteration: 21000 | Train loss: -707.35883
Epoch: 136/200 Iteration: 21020 | Train loss: -675.44141
Epoch: 136/200 Iteration: 21040 | Train loss: -811.01672
Epoch: 136/200 Iteration: 21060 | Train loss: -801.64929
Epoch: 136/200 Iteration: 21080 | Train loss: -831.47302
Epoch: 137/200 Iteration: 21100 | Train loss: -568.95062
Epoch: 137/200 Iteration: 2112

Epoch: 153/200 Iteration: 23660 | Train loss: -1053.26440
Epoch: 153/200 Iteration: 23680 | Train loss: -803.59607
Epoch: 153/200 Iteration: 23700 | Train loss: -1063.66589
Epoch: 154/200 Iteration: 23720 | Train loss: -1122.58496
Epoch: 154/200 Iteration: 23740 | Train loss: -799.53943
Epoch: 154/200 Iteration: 23760 | Train loss: -788.27515
Epoch: 154/200 Iteration: 23780 | Train loss: -702.53766
Epoch: 154/200 Iteration: 23800 | Train loss: -1039.25452
Epoch: 154/200 Iteration: 23820 | Train loss: -606.23755
Epoch: 154/200 Iteration: 23840 | Train loss: -906.50079
Epoch: 154/200 Iteration: 23860 | Train loss: -910.06702
Epoch: 155/200 Iteration: 23880 | Train loss: -957.58765
Epoch: 155/200 Iteration: 23900 | Train loss: -1115.42810
Epoch: 155/200 Iteration: 23920 | Train loss: -1107.80371
Epoch: 155/200 Iteration: 23940 | Train loss: -788.22211
Epoch: 155/200 Iteration: 23960 | Train loss: -557.32623
Epoch: 155/200 Iteration: 23980 | Train loss: -857.82935
Epoch: 155/200 Iteration:

Epoch: 172/200 Iteration: 26520 | Train loss: -981.77795
Epoch: 172/200 Iteration: 26540 | Train loss: -1017.40845
Epoch: 172/200 Iteration: 26560 | Train loss: -1088.70239
Epoch: 172/200 Iteration: 26580 | Train loss: -872.45093
Epoch: 172/200 Iteration: 26600 | Train loss: -857.97546
Epoch: 172/200 Iteration: 26620 | Train loss: -1020.15625
Epoch: 172/200 Iteration: 26640 | Train loss: -1006.73468
Epoch: 172/200 Iteration: 26660 | Train loss: -1042.17236
Epoch: 173/200 Iteration: 26680 | Train loss: -730.95612
Epoch: 173/200 Iteration: 26700 | Train loss: -1293.93799
Epoch: 173/200 Iteration: 26720 | Train loss: -849.49377
Epoch: 173/200 Iteration: 26740 | Train loss: -878.72626
Epoch: 173/200 Iteration: 26760 | Train loss: -1196.79419
Epoch: 173/200 Iteration: 26780 | Train loss: -934.19232
Epoch: 173/200 Iteration: 26800 | Train loss: -1171.27979
Epoch: 174/200 Iteration: 26820 | Train loss: -1247.13818
Epoch: 174/200 Iteration: 26840 | Train loss: -886.60437
Epoch: 174/200 Iterati

Epoch: 190/200 Iteration: 29380 | Train loss: -1274.02026
Epoch: 190/200 Iteration: 29400 | Train loss: -763.46594
Epoch: 190/200 Iteration: 29420 | Train loss: -1129.92249
Epoch: 190/200 Iteration: 29440 | Train loss: -1123.41724
Epoch: 191/200 Iteration: 29460 | Train loss: -1183.19385
Epoch: 191/200 Iteration: 29480 | Train loss: -1459.72424
Epoch: 191/200 Iteration: 29500 | Train loss: -1357.76636
Epoch: 191/200 Iteration: 29520 | Train loss: -965.09253
Epoch: 191/200 Iteration: 29540 | Train loss: -705.84253
Epoch: 191/200 Iteration: 29560 | Train loss: -1030.45947
Epoch: 191/200 Iteration: 29580 | Train loss: -1216.89465
Epoch: 191/200 Iteration: 29600 | Train loss: -1349.62939
Epoch: 192/200 Iteration: 29620 | Train loss: -1122.09351
Epoch: 192/200 Iteration: 29640 | Train loss: -1131.31409
Epoch: 192/200 Iteration: 29660 | Train loss: -1223.70239
Epoch: 192/200 Iteration: 29680 | Train loss: -982.32898
Epoch: 192/200 Iteration: 29700 | Train loss: -963.11804
Epoch: 192/200 Iter

In [37]:
## Test: 
preds = rnn.predict(X_test)
count = 0.0
for i, guess in enumerate(preds):
    if guess == y_test[i]:
        count += 1.0
        
print count
print len(preds)
print (count/len(preds))*100.0

INFO:tensorflow:Restoring parameters from model/spooky-39.ckpt
2148.0
4000
53.7


In [31]:
## Get probabilities:
proba = rnn.predict(X_test, return_proba=True)

INFO:tensorflow:Restoring parameters from model/spooky-39.ckpt
[1.5822611e-37 1.0000000e+00 2.3078283e-31 ... 1.0000000e+00 1.0000000e+00
 1.0000000e+00]


In [34]:
print len(X_test)

4079


In [35]:
print len(preds)

4000
