# Sentiment classification of TV reviews using ReLU  
Sentiment analysis of "[Tatort](https://en.wikipedia.org/wiki/Tatort)" Reviews.  
Data source: Facebook comments pulled from weekly 'poll' postings on the 'dasErste' FB page.

1 = positive review  
2 = negative review

## Import and review the data

In [185]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences

tatortreviews = pd.read_csv('data/tatort_reviews_labeled.csv')
print("Number of reviews: ", len(tatortreviews))

tatortreviews.head()

Number of reviews:  2000


Unnamed: 0,Review,Evalu
0,schlechteste ever 😣,0
1,6 absolut kein thema mehr,0
2,"ein sehr guter tatort (y), das franken-team so...",1
3,absolute 6!,0
4,"kann nicht mitreden. ""x-men: apocalypse"" auf s...",0


In [186]:
reviews = tatortreviews['Review']
labels = tatortreviews['Evalu']

#### Bag of Words

In [187]:
from collections import Counter

total_wordcount = Counter()

for row in reviews:
    total_wordcount.update(row.split(" "))
    
print("Total word count: ", len(total_wordcount))

Total word count:  6016


In [188]:
vocabulary = sorted(total_wordcount, key=total_wordcount.get, reverse=True)[:6000]
print("Num of words: ", len(vocabulary))
print(vocabulary)

Num of words:  6000
['und', 'ich', 'die', 'der', 'war', 'tatort', 'nicht', 'das', 'sehr', 'ein', 'mal', 'gut', 'eine', 'den', 'es', 'aber', 'zu', 'wieder', '1', 'mit', 'mir', 'so', 'auch', 'in', 'note', 'hat', 'von', 'ist', 'für', 'fand', 'nur', 'wie', 'super', 'man', 'immer', '2', 'noch', 'was', 'mich', '6', 'guter', '-', 'zum', 'glatte', 'thema', 'spannend', 'endlich', 'schon', 'ihn', 'mehr', 'ganz', 'habe', 'im', 'als', 'auf', 'er', '!', 'sich', 'tatort.', 'einen', 'da', 'dem', 'aus', 'dass', 'sind', 'nach', 'einfach', '...', 'dann', 'an', 'bis', 'etwas', 'richtig', 'wenn', 'kann', 'bin', 'doch', 'um', 'seit', 'gut.', 'klasse', 'leider', 'gute', 'ne', ',', '👍', 'wirklich', 'bei', 'viel', 'ende', 'ja', 'wird', 'am', 'einer', 'waren', 'oder', 'gar', 'tatort,', 'wir', 'geht', 'sie', 'letzten', '.', 'alle', 'kein', 'keine', 'echt', 'ohne', '5', 'einem', 'dieser', 'gut,', 'schauspieler', '1.', 'eigentlich', 'hätte', '3', 'team', 'gefallen', 'hab', 'haben', 'beiden', 'krimi', '4', 'bitte'

In [189]:
wordindex = {word: i for i, word in enumerate(vocabulary)}
wordindex

{'und': 0,
 'ich': 1,
 'die': 2,
 'der': 3,
 'war': 4,
 'tatort': 5,
 'nicht': 6,
 'das': 7,
 'sehr': 8,
 'ein': 9,
 'mal': 10,
 'gut': 11,
 'eine': 12,
 'den': 13,
 'es': 14,
 'aber': 15,
 'zu': 16,
 'wieder': 17,
 '1': 18,
 'mit': 19,
 'mir': 20,
 'so': 21,
 'auch': 22,
 'in': 23,
 'note': 24,
 'hat': 25,
 'von': 26,
 'ist': 27,
 'für': 28,
 'fand': 29,
 'nur': 30,
 'wie': 31,
 'super': 32,
 'man': 33,
 'immer': 34,
 '2': 35,
 'noch': 36,
 'was': 37,
 'mich': 38,
 '6': 39,
 'guter': 40,
 '-': 41,
 'zum': 42,
 'glatte': 43,
 'thema': 44,
 'spannend': 45,
 'endlich': 46,
 'schon': 47,
 'ihn': 48,
 'mehr': 49,
 'ganz': 50,
 'habe': 51,
 'im': 52,
 'als': 53,
 'auf': 54,
 'er': 55,
 '!': 56,
 'sich': 57,
 'tatort.': 58,
 'einen': 59,
 'da': 60,
 'dem': 61,
 'aus': 62,
 'dass': 63,
 'sind': 64,
 'nach': 65,
 'einfach': 66,
 '...': 67,
 'dann': 68,
 'an': 69,
 'bis': 70,
 'etwas': 71,
 'richtig': 72,
 'wenn': 73,
 'kann': 74,
 'bin': 75,
 'doch': 76,
 'um': 77,
 'seit': 78,
 'gut.': 79,
 '

## Mapping text to vector

In [190]:
def text_to_vector(text):
    word_vector = np.zeros(len(vocabulary), dtype = np.int_)
    for word in text.split(' '):
        idx = wordindex.get(word, None)
        if idx is None:
            continue
        else:
            word_vector[idx] += 1
    return np.array(word_vector)
 
word_vectors = np.zeros((len(reviews), len(vocabulary)), dtype=np.int_)    
for ii, text in enumerate(reviews):
    word_vectors[ii] = text_to_vector(text)

### Split data into Train, Validation, Test sets

In [191]:
#from sklearn.model_selection import train_test_split
#X_train, X_test, Y_train, Y_test = train_test_split(reviews, labels, test_size=0.1, random_state=666)

#print(len(X_train))
#print(len(Y_train))

Y = labels
records = len(labels)

shuffle = np.arange(records)
np.random.shuffle(shuffle)
test_fraction = 0.9

train_split, test_split = shuffle[:int(records*test_fraction)], shuffle[int(records*test_fraction):]
X_train, Y_train = word_vectors[train_split], to_categorical(Y.values[train_split], 2)
X_test, Y_test = word_vectors[test_split], to_categorical(Y.values[test_split], 2)

## Building the network

In [205]:
def build_network_model():
    tf.reset_default_graph()
    
    #Inputs
    net = tflearn.input_data([None, 6000])
    
    #Hidden layer
    net = tflearn.fully_connected(net, 120, activation='ReLU')
    
    #Output Layer
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net, optimizer='sgd', learning_rate=0.1, loss='categorical_crossentropy')
    
    model = tflearn.DNN(net)
    return model


# Now, build it!
model = build_network_model()

## Training the network


In [206]:
model.fit(X_train, Y_train, validation_set = 0.1, show_metric=True, batch_size=64, n_epoch=50)

Training Step: 1299  | total loss: [1m[32m0.10573[0m[0m | time: 0.205s
| SGD | epoch: 050 | loss: 0.10573 - acc: 0.9878 -- iter: 1600/1620
Training Step: 1300  | total loss: [1m[32m0.10191[0m[0m | time: 1.216s
| SGD | epoch: 050 | loss: 0.10191 - acc: 0.9890 | val_loss: 0.29371 - val_acc: 0.8722 -- iter: 1620/1620
--


## Testing 

In [208]:
predictions = (np.array(model.predict(X_test)) >= 0.5).astype(np.int_)
test_accuracy = np.mean(predictions == Y_test, axis=0)
print("Test accuracy: ", test_accuracy)

Test accuracy:  [ 0.85  0.85]


In [223]:
# Check with a custom sentence.
# res = the result of the prediction, containing the positive and negative probability.
res = model.predict([text_to_vector("eher doof")])[0]
print(res)
if(res[1] > res[0]):
    print('Positiv!')
else:
    print('Negativ!')

[0.8736256957054138, 0.1263742595911026]
Negativ!
