In [None]:
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer 
import numpy as np 
import random 
import pickle 
from collections import Counter 

import tensorflow as tf 


lemmatizer = WordNetLemmatizer() 
max_lines = 10000000 
pos = 'Data/sentences/positive.txt'
neg = 'Data/sentences/negative.txt'

def create_lexicon(pos, neg): 
    lexicon = [] 
    for fi in [pos, neg]: 
        with open(fi, 'r') as f: 
            contents = f.readlines() 
            for l in contents[:max_lines]: 
                all_words = word_tokenize(l.lower()) 
                lexicon += list(all_words) 
 
    lexicon = [lemmatizer.lemmatize(i) for i in lexicon] 
    w_counts = Counter(lexicon) 
 
    l2 =[] 
    for w in w_counts: 
        if 1000 > w_counts[w] > 50: 
            l2.append(w) 
    return l2 
 
def sample_handling(sample,lexicon,classification): 
    featureset = [] 
    with open(sample,'r') as f: 
        contents = f.readlines() 
        for l in contents[:max_lines]: 
            current_words = word_tokenize(l.lower()) 
            current_words = [lemmatizer.lemmatize(i) for i in current_words] 
            features = np.zeros(len(lexicon)) 
            for word in current_words: 
                if word.lower() in lexicon: 
                    index_value = lexicon.index(word.lower()) 
                    features[index_value] += 1 
 
            features = list(features) 
            featureset.append([features,classification]) 
 
    return featureset 
 
lexicon = create_lexicon(pos,neg) 
features = [] 
features += sample_handling(pos, lexicon,[1,0]) 
features += sample_handling(neg, lexicon,[0,1]) 
random.shuffle(features) 
features = np.array(features) 

testing_size = int(0.1*len(features)) 

X_train = list(features[:,0][:-testing_size]) 
y_train = list(features[:,1][:-testing_size]) 
X_test = list(features[:,0][-testing_size:]) 
y_test = list(features[:,1][-testing_size:]) 

In [None]:
n_epochs = 10 
batch_size = 128 
h1 = 500 
h2 = 500  
n_classes = 2 

In [None]:
x_input = tf.placeholder('float') 
y_input = tf.placeholder('float') 
 
hidden_1 = {'weight':tf.Variable(tf.random_normal([len(X_train[0]), h1])), 
                  'bias':tf.Variable(tf.random_normal([h1]))} 
 
hidden_2 = {'weight':tf.Variable(tf.random_normal([h1, h2])), 
                  'bias':tf.Variable(tf.random_normal([h2]))} 
  
output_layer = {'weight':tf.Variable(tf.random_normal([h2, n_classes])), 
                'bias':tf.Variable(tf.random_normal([n_classes])),}      

In [None]:
l1 = tf.add(tf.matmul(x_input, hidden_1['weight']), hidden_1['bias']) 
l1 = tf.nn.relu(l1) 

l2 = tf.add(tf.matmul(l1, hidden_2['weight']), hidden_2['bias']) 
l2 = tf.nn.relu(l2) 

output = tf.matmul(l2, output_layer['weight']) + output_layer['bias'] 

In [None]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y_input)) 
opt = tf.train.AdamOptimizer().minimize(loss) 


with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 

    for epoch in range(n_epochs): 
        epoch_loss = 0 
        i = 0 
        while i < len(X_train): 
            start = i 
            end = i + batch_size 
            batch_x = np.array(X_train[start:end]) 
            batch_y = np.array(y_train[start:end]) 

            _, batch_loss = sess.run([opt, loss], feed_dict={x_input: batch_x, y_input: batch_y}) 
            epoch_loss += batch_loss
            i += batch_size 

        print('Epoch {}: loss {}'.format(epoch, epoch_loss))