# Predicting movie reviews with a three-layer neural network (from scratch)

In [1]:
import sys
import numpy as np

np.random.seed(1)

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
f = open('reviews.txt', 'r')
review_tokens = list(filter(lambda x: x not in ['', '\n'], map(lambda x: set(x.split(' ')), f.readlines())))
f.close()

f = open('labels.txt', 'r')
labels = [1 if label == 'positive\n' else 0 for label in f.readlines()]
f.close()

vocab = set()
for sentence in review_tokens:
    for word in sentence:
        if len(word) > 0:
            vocab.add(word)
vocab = list(vocab)

word_to_index = {}
for word_index, word in enumerate(vocab):
    word_to_index[word] = word_index

reviews = []
for sentence in review_tokens:
    sentence_indices = []
    for word in sentence:
        try:
            sentence_indices.append(word_to_index[word])
        except KeyError:
            pass
    reviews.append(list(set(sentence_indices)))

# features = np.array([[1 if word_index == word_to_index[word] else 0 for word_index, word in enumerate(vocab)] for review in reviews])
one_hot_matrix = []
for review in reviews:
    one_hot_vector = [0 for _ in range(len(vocab))]
    for word_index in review:
        one_hot_vector[word_index] = 1
    one_hot_matrix.append(one_hot_vector)
features = np.array(one_hot_matrix)

In [34]:
# np.save('review_features', features)
# np.save('review_labels', labels)

In [35]:
# features = np.load('review_features')
# labels = np.load('review_labels')

In [36]:
alpha, num_of_epochs, input_size, hidden_size, output_size, correct, total = (0.01, 300, len(vocab), 5, 1, 0, 0)
weights_0_1 = 2 * np.random.random((input_size, hidden_size)) - 1
weights_1_2 = 2 * np.random.random((hidden_size, output_size)) - 1

In [37]:
for epoch in range(num_of_epochs):
    error = 0
    total_error_squared = 0
    for i in range(int(len(features) * 0.8)):
        layer_0 = features[i:i + 1]
        layer_1 = sigmoid(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)

        error = layer_2 - labels[i:i + 1]
        layer_2_delta = error

        layer_1_delta = np.dot(layer_2_delta, weights_1_2.T)

        weights_1_2 -= alpha * np.dot(layer_1.T, layer_2_delta)
        weights_0_1 -= alpha * np.dot(layer_0.T, layer_1_delta)

        if np.abs(layer_2_delta) < 0.5:
            correct += 1
        total += 1

        if i % 10 == 9:
            progress = str(i/float(len(features)))
            sys.stdout.write('\rIteration:'+str(iter)\
                             +' Progress:'+progress[2:4]\
                             +'.'+progress[4:6]\
                             +'% Training Accuracy:'\
                             + str(correct/float(total)) + '%')
    print()

correct, total = (0,0)
for i in range(int(len(features) * 0.8), len(features)):

    x = features[i:i + 1]
    y = labels[i:i + 1]

    layer_1 = sigmoid(np.sum(weights_0_1, axis=0))
    layer_2 = sigmoid(np.dot(layer_1, weights_1_2))

    if np.abs(layer_2 - y) < 0.5:
        correct += 1
    total += 1
print("Test Accuracy:" + str(correct / float(total)))

Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.5%42857142857142%%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.55%3333333333333%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.5791666666666667%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.60625%806451613%%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.635%333333333333%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.6666666666666666%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.6910714285714286%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.7125%38095238096%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.7291666666666666%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.7425%6329113924%%
Iteration:<built-in function iter> Progress:79.% Training Accuracy:0.7556818181818182%
Iteration:<built-in function iter> Progres