In [34]:
import tensorflow as tf
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split

# Reading Text file , extracting Notes content
def getProcessedText(data):
    return str(data).decode('ascii','ignore').encode('ascii','ignore')

dataframe = pd.read_csv("notesOutput.txt", sep=' ',index_col=None)
data = [getProcessedText(doc) for doc in dataframe["Notes"]]
labels = map(lambda label: 1 if label.startswith('Pos') else 0, dataframe["Autoclose"])

# average length of a review 
max_seq_length = sum([len(note.split(" ")) for note in data]) / len(data)

#mapping every word to unique Id
vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(max_seq_length)
x_data = np.array(list(vocab_processor.fit_transform(data))) # every word to its corresponding index in unique word list
y_output = np.array(labels)

vocab_size = len(vocab_processor.vocabulary_)


# Divinding data into Train-test data in 70:30
train_data, test_data, train_target , test_target = train_test_split(x_data , y_output , test_size=0.3, random_state=21, stratify=y_output)


tf.reset_default_graph()

x = tf.placeholder(tf.int32, [None , max_seq_length])
y = tf.placeholder(tf.int32,[None])

num_epochs = 20
batch_size = 25
embedding_size = 50 # vector size of one word
max_label =2

# vector of all vocab words squishing to 50 on scale of -1.0 to 1.0
embedding_matrix = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
embeddings = tf.nn.embedding_lookup(embedding_matrix, x)
embeddings


lstmCell = tf.contrib.rnn.BasicLSTMCell(embedding_size) # no of neurons in each cell
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell , output_keep_prob=0.75)

output , (final_state_memory_cell , otherInfo) = tf.nn.dynamic_rnn(lstmCell , embeddings , dtype=tf.float32)

#softmax layer
logits = tf.layers.dense(final_state_memory_cell, max_label, activation=None)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits , labels=y)

loss = tf.reduce_mean(cross_entropy)
prediction = tf.equal(tf.argmax(logits, 1), tf.cast(y, tf.int64))

accuracy = tf.reduce_mean(tf.cast(prediction , tf.float32))

optimizer = tf.train.AdamOptimizer(0.01)
train_step = optimizer.minimize(loss)

init = tf.global_variables_initializer()

with tf.Session() as session:
    init.run()
    
    for epoch in range(num_epochs):
        num_batches = int(len(train_data) // batch_size) + 1
        
        for i in range(num_batches):
            min_ix = i* batch_size
            max_ix = np.min([len(train_data), ((i+1)*batch_size)])
            
            x_train_batch = train_data[min_ix: max_ix]
            y_train_batch = train_target[min_ix: max_ix]
            
            train_dict = {x: x_train_batch , y: y_train_batch}
            session.run(train_step, feed_dict = train_dict)

            train_loss , train_acc = session.run([loss, accuracy], feed_dict=train_dict)
            
        test_dict = {x: test_data , y:test_target}
        test_loss, test_acc = session.run([loss , accuracy], feed_dict = test_dict)
        print('Epoch:{}, Test Loss: {:.2} , Test Acc: {:.5}'.format(epoch+1, test_loss , test_acc))
            




Epoch:1, Test Loss: 0.67 , Test Acc: 0.66667
Epoch:2, Test Loss: 0.72 , Test Acc: 0.66667
Epoch:3, Test Loss: 0.77 , Test Acc: 0.68333
Epoch:4, Test Loss: 0.89 , Test Acc: 0.73333
Epoch:5, Test Loss: 0.77 , Test Acc: 0.66667
Epoch:6, Test Loss: 0.98 , Test Acc: 0.7
Epoch:7, Test Loss: 1.3 , Test Acc: 0.68333
Epoch:8, Test Loss: 1.7 , Test Acc: 0.66667
Epoch:9, Test Loss: 3.9 , Test Acc: 0.68333
Epoch:10, Test Loss: 0.83 , Test Acc: 0.68333
Epoch:11, Test Loss: 1.1 , Test Acc: 0.68333
Epoch:12, Test Loss: 1.6 , Test Acc: 0.66667
Epoch:13, Test Loss: 1.8 , Test Acc: 0.7
Epoch:14, Test Loss: 1.9 , Test Acc: 0.7
Epoch:15, Test Loss: 2.0 , Test Acc: 0.7
Epoch:16, Test Loss: 2.0 , Test Acc: 0.7
Epoch:17, Test Loss: 2.1 , Test Acc: 0.7
Epoch:18, Test Loss: 2.1 , Test Acc: 0.7
Epoch:19, Test Loss: 2.1 , Test Acc: 0.7
Epoch:20, Test Loss: 2.1 , Test Acc: 0.71667
