In [55]:
# working with data
import os
import pandas as pd
from matplotlib import pyplot as plt

# text preprocessing libraries 
import string
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [56]:
# read in the data
path = os.path.join(os.path.normpath(os.getcwd() + os.sep + os.pardir) + "/data")

#print(os.listdir(path))

ml_model = pd.read_csv(path + '/cleaned/ml_model.csv')

#print(ml_model.columns[0])
ml_model.set_index(ml_model.columns[0], inplace=True)

print(ml_model.sort_values("NEG_BAYES"))


            POS_BAYES  NEG_BAYES
Unnamed: 0                      
timeless     0.994118   0.005882
fallout      0.992701   0.007299
selma        0.992424   0.007576
superbly     0.991736   0.008264
ronan        0.991379   0.008621
...               ...        ...
roommate     0.007084   0.992916
seagal       0.006210   0.993790
gotti        0.005737   0.994263
flatliners   0.005150   0.994850
sara         0.004672   0.995328

[40477 rows x 2 columns]


In [57]:
def clean(sample_text):
    '''
    casefolding, punctuation removal, tokenisation, lemmatisation, stopword removal
    '''
    sample_text = str(sample_text).lower()
    sample_text = sample_text.translate(str.maketrans('','', string.punctuation)) 
    # Consider using regex to address back to back punctuation, like day-to-day
    
    sample_text = word_tokenize(sample_text)
    lemmatizer = WordNetLemmatizer()
    sample_text = [lemmatizer.lemmatize(word) for word in sample_text]

    #consider changing this later, to take care of bi grams, where words such as 'not' may be important
    stop_words = set(stopwords.words('english'))
    sample_text = [word for word in sample_text if word not in stop_words]

    return sample_text

In [58]:
def review_scorer(ml_model, review, dampener=20):
    '''
    Generates a score from -100 to 100, based on how strongly positive or negative the ml model predicts a review is
    '''
    words = clean(review)
    pos_score = 1
    neg_score = 1
    for word in words:
        if word in ml_model.index:
            certainty = 100* abs(ml_model.at[word, "POS_BAYES"] - ml_model.at[word, "NEG_BAYES"])
            if certainty > dampener:
                pos_score *= ml_model.at[word, "POS_BAYES"]
                neg_score *= ml_model.at[word, "NEG_BAYES"]
    return 100 * (pos_score - neg_score)/(pos_score + neg_score)

def review_classifier(ml_model, review, dampener=20, confidence=30):
    '''
    Classifies a movie review into one of three categories, based on its score
    '''
    score = review_scorer(ml_model, review, dampener)
    if score >= confidence:
        return "POSITIVE"
    elif score <= -confidence:
        return "NEGATIVE"
    else:
        return "UNSURE"

In [59]:
review = "Watching the film is like bathing in the sun"
print(review_scorer(ml_model, review))
print(review_classifier(ml_model, review))

review = "It was like drinking bleach"
print(review_scorer(ml_model, review))
print(review_classifier(ml_model, review))

review = "It was like drinking wine"
print(review_scorer(ml_model, review))
print(review_classifier(ml_model, review))


31.511276369736056
POSITIVE
-51.67771361698523
NEGATIVE
55.59934491457741
POSITIVE


In [60]:
review = "Is there anything hugely wrong with Aquaman? Not really. Is this a missed opportunity? Yes."
print(review_scorer(ml_model, review))
print(review_classifier(ml_model, review))

review = "Aquaman works because it isn't laughing at itself-it's both joyously whimsical and confident in its own sea-worthiness."
print(review_scorer(ml_model, review))
print(review_classifier(ml_model, review))

-79.41864765945644
NEGATIVE
99.84215773652208
POSITIVE


In [61]:
review = "the movie made me feel sigma and tingly inside"
print(review_scorer(ml_model, review))
print(review_classifier(ml_model, review))

54.40740647547831
POSITIVE


In [62]:
review = "I'd love to never watch this movie again"
print(review_scorer(ml_model, review))
print(review_classifier(ml_model, review))

7.712161496686293
UNSURE


In [137]:
# Evaluation of Ml_model
import ast

test_data = pd.read_csv(path + '/test/test.csv')

def eval_model(ml_model, test_data, confidence):
    true_pos = 0
    true_neg = 0
    false_pos = 0
    false_neg = 0
    unsure = 0

    for review in test_data.itertuples(name='Pandas'):
        review_text = review.review_text
        sentiment = review.sentiment
        prediction = review_classifier(ml_model, review_text, dampener=20, confidence=confidence)
        if sentiment == prediction:
            if prediction == "POSITIVE":
                true_pos += 1
            elif prediction == "NEGATIVE":
                true_neg += 1
            else:
                unsure += 1
        else:
            if prediction == "POSITIVE":
                false_pos += 1
            elif prediction == "NEGATIVE":
                false_neg += 1
            else:
                unsure += 1
    return (true_pos, true_neg, false_pos, false_neg, unsure)

true_pos, true_neg, false_pos, false_neg, indecision = eval_model(ml_model, test_data, 0)

In [138]:
#ml_model evaluation metrics
total_decisions = false_pos + false_neg + true_pos + true_neg
accuracy = 100 * (true_pos + true_neg) / total_decisions
pos_precision = 100 * true_pos / (true_pos + false_pos)
pos_recall = 100 * true_pos / (true_pos + false_neg)
pos_f1 = 2 * pos_precision * pos_recall / (pos_recall + pos_precision)
neg_precision = 100 * true_neg / (true_neg + false_neg)
neg_recall = 100 * true_neg / (true_neg + false_pos)
neg_f1 = 2 * neg_precision * neg_recall / (neg_recall + neg_precision)
reliability = total_decisions / (total_decisions + indecision)

print(accuracy, pos_f1, neg_f1)
print(pos_precision, pos_recall, neg_precision, neg_recall)
print(reliability)

72.02998846597463 76.37603507062835 65.72438162544168
73.20261437908496 79.83706720977597 70.13574660633485 61.83510638297872
1.0


In [141]:
print(len(test_data.loc[test_data["sentiment"] == "POSITIVE"]))
print(len(test_data.loc[test_data["sentiment"] == "NEGATIVE"]))

982
752


In [143]:
# testing the self-awareness of ml_model
# result: positive correlation between ml_model confidence score and its evaluation metrics

for confidence in range(100):
    true_pos, true_neg, false_pos, false_neg, indecision = eval_model(ml_model, test_data, confidence)
    total_decisions = false_pos + false_neg + true_pos + true_neg
    accuracy = 100 * (true_pos + true_neg) / total_decisions
    pos_precision = 100 * true_pos / (true_pos + false_pos)
    pos_recall = 100 * true_pos / (true_pos + false_neg)
    pos_f1 = 2 * pos_precision * pos_recall / (pos_recall + pos_precision)
    neg_precision = 100 * true_neg / (true_neg + false_neg)
    neg_recall = 100 * true_neg / (true_neg + false_pos)
    neg_f1 = 2 * neg_precision * neg_recall / (neg_recall + neg_precision)
    reliability = total_decisions / (total_decisions + indecision)

    print(confidence, accuracy, pos_f1, neg_f1, reliability)

0 72.02998846597463 76.37603507062835 65.72438162544168 1.0
1 72.2189866045428 76.46768623581647 66.09808102345416 0.9901960784313726
2 72.34915055653192 76.58730158730158 66.23748211731044 0.9844290657439446
3 72.44418331374852 76.67826951765291 66.33165829145729 0.9815455594002307
4 72.59870359457867 76.80798004987531 66.52267818574514 0.9786620530565168
5 72.91049199762892 77.09273182957394 66.8600435097897 0.9728950403690888
6 72.94887039239 77.14716223003516 66.86088856518572 0.9700115340253749
7 73.00357568533968 77.20181177654756 66.91015339663988 0.9677047289504037
8 73.14593301435407 77.33467945482082 67.05796038151138 0.9642445213379469
9 73.22929171668667 77.42914979757084 67.10914454277285 0.9607843137254902
10 73.32931968693558 77.52409944190765 67.20947446336046 0.9579008073817762
11 73.35744424352019 77.54065040650406 67.25925925925925 0.9567474048442907
12 73.4422262552934 77.63627101375447 67.31198808637379 0.9532871972318339
13 73.60097323600974 77.78915046059366 67.4