# Testing a Pre-Trained Network

We apply a pre-trained Network to some documents. We downloaded word vectors from [GloVe](http://nlp.stanford.edu/projects/glove/) and made training data from Thomson Reuters News Archive from 2003 to 2012.

In [1]:
import numpy as np
import tensorflow as tf
import re

In [2]:
numDimensions = 200
maxSeqLength = 550
batchSize = 24
lstmUnits = 64
numClasses = 2
iterations = 100050

In [3]:
with open('wordsList_glove_dim200.txt') as f:
    wordsList = [s.strip() for s in f.readlines()]
wordVectors = np.load('wordVectors_glove_dim200.npy')

In [4]:
tf.reset_default_graph()

labels = tf.placeholder(tf.float32, [batchSize, numClasses])
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength])

data = tf.Variable(tf.zeros([batchSize, maxSeqLength, numDimensions]),dtype=tf.float32)
data = tf.nn.embedding_lookup(wordVectors,input_data)

lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits)
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.25)
value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)

weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[numClasses]))
value = tf.transpose(value, [1, 0, 2])
last = tf.gather(value, int(value.get_shape()[0]) - 1)
prediction = (tf.matmul(last, weight) + bias)

correctPred = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32))

The network is in 'models'. 

In [5]:
sess = tf.InteractiveSession()
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('models'))

INFO:tensorflow:Restoring parameters from models/pretrained_lstm.ckpt-100000


In [6]:
strip_special_chars = re.compile("[^A-Za-z0-9 ]+")

def cleanSentences(string):
    string = string.lower().replace("<br />", " ")
    return re.sub(strip_special_chars, "", string.lower())

def getSentenceMatrix(sentence):
    arr = np.zeros([batchSize, maxSeqLength])
    sentenceMatrix = np.zeros([batchSize,maxSeqLength], dtype='int32')
    cleanedSentence = cleanSentences(sentence)
    split = cleanedSentence.split()
    for indexCounter,word in enumerate(split):
        try:
            sentenceMatrix[0,indexCounter] = wordsList.index(word)
        except ValueError:
            sentenceMatrix[0,indexCounter] = len(wordsList) - 1 #Vector for unkown words
    return sentenceMatrix

We apply the model for prepared text text.

In [7]:
f = open('test/p_test.txt', 'r')
fout = open('test_result/p__result.txt', 'w')
for line in f:
    inputText = line
    inputMatrix = getSentenceMatrix(inputText)
    predictedSentiment = sess.run(prediction, {input_data: inputMatrix})[0]
    if (predictedSentiment[0] > predictedSentiment[1]):
        fout.write('1.0\t')
        fout.write(line)
    else:
        fout.write('-1.0\t')
        fout.write(line)
fout.close()

In [8]:
f = open('test/n_test.txt', 'r')
fout = open('test_result/n_result.txt', 'w')
for line in f:
    inputText = line
    inputMatrix = getSentenceMatrix(inputText)
    predictedSentiment = sess.run(prediction, {input_data: inputMatrix})[0]
    if (predictedSentiment[0] > predictedSentiment[1]):
        fout.write('1.0\t')
        fout.write(line)
    else:
        fout.write('-1.0\t')
        fout.write(line)
fout.close()