# Import Needed Python Modules

In [1]:
import pandas as pd
import numpy as np
import nltk
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import opinion_lexicon
from sklearn.metrics import confusion_matrix

# Overall Data Preparation

In [2]:
#Import the data set, name the columns, and separate the "labels" from the reviews (implicitly dropping the unneeded column)
# I would normally convert everything to lowercase, but the file seems to come all in lowercase
movies = pd.read_csv('MovieReview-Sample.csv', names = ['Review_Number','Review','Positive_or_Negative'])

actual_classifications = movies['Positive_or_Negative']
reviews = movies['Review']

# Question 1: Four Performance Comparisons

## Question 1a: Bing Liu's Lexicon

### Creation of the "Positive and Negative Words Counter" Function (stolen directly from Lab)

In [3]:
def count_positive_and_negative_words(data, positive_words_dictionary, negative_words_dictionary):
# count of positive and negative words that appeared in each review
# net count which is calculated by positive count subtracting negative count. 
    positive_count = []
    negative_count = []
    net_assessment = []

    for nrow in range(0,len(data)):
        text = data[nrow]
        
        qa = 0
        qb = 0

        for word in positive_words_dictionary:
            if (word in text):
                qa += 1

        for word in negative_words_dictionary:
            if (word in text):
                qb += 1

        qc = qa - qb

        positive_count.append(qa)
        negative_count.append(qb)
        net_assessment.append(qc)

    #Make a new data frame of the results
    results = {'Review': reviews, 'Postive_Words_Count': positive_count, 'Negative_Words_Count': negative_count, 
                    'Overall_Sentiment': net_assessment}
    
    dataframe = pd.DataFrame(results)
    return dataframe

### Apply Bing Liu's Lexicon to the movie reviews data and compare with the true labels

In [4]:
#Download Bing Liu's Lexicon (importing it was done with all the rest of the Python modules)
#nltk.download("opinion_lexicon")

#Create our separate sets of positive and negative words from Bing Liu's Lexicon
positive_list_BL=set(opinion_lexicon.positive())
negative_list_BL=set(opinion_lexicon.negative())

#Using Bing Liu's Lexicon and our "counter" function, generate the assessments of overall positiveity and negativity
#And, make a new data frame of the results
Bing_Liu_Lexicon_dataframe = count_positive_and_negative_words(reviews, positive_list_BL, negative_list_BL)

#Add a new column to the dataframe, converting the "Overall_Sentiment" into 1 or 0.
#Note, I will consider a net score of 0 to be in the negative bucket; thus it will get coded as 0. I consider a movie which
#generates a totally neutral sentiment to be a flop.
Bing_Liu_model_classifications = []
for i in Bing_Liu_Lexicon_dataframe['Overall_Sentiment']:
    if i > 0:
        Bing_Liu_model_classifications.append(1)
    else:
        Bing_Liu_model_classifications.append(0)
        
Bing_Liu_Lexicon_dataframe['Bing_Liu_Model_Classifications'] = Bing_Liu_model_classifications
        
#Add the true labels back to the dataframe so we can see how Bing Liu's Lexicon did
Bing_Liu_Lexicon_dataframe['Actual_Classifications'] = actual_classifications


### Calculate the performance of the Bing Liu Lexicon Model

In [5]:
#Count the predicted one and zeros and the actual ones and zeros
#confusion_matrix(y_true, y_pred)

#Note from sci-k itlearn.org:
#Thus in binary classification, the count of true negatives is M[0,0], false negatives is M[1,0], 
    #true positives is M[1,1], and false positives is M[0,1] .
Bing_Liu_confusion_matrix = confusion_matrix(Bing_Liu_Lexicon_dataframe['Actual_Classifications'], Bing_Liu_Lexicon_dataframe['Bing_Liu_Model_Classifications'])

Bing_Liu_true_positive = Bing_Liu_confusion_matrix[1,1]
Bing_Liu_true_negative = Bing_Liu_confusion_matrix[0,0]
Bing_Liu_false_positive = Bing_Liu_confusion_matrix[0,1]
Bing_Liu_false_negative = Bing_Liu_confusion_matrix[1,0]

#Calculate positive precision
Bing_Liu_positive_precision = Bing_Liu_true_positive / (Bing_Liu_true_positive + Bing_Liu_false_positive)

#Calculate negative precision
Bing_Liu_negative_precision = Bing_Liu_true_negative / (Bing_Liu_true_negative + Bing_Liu_false_negative)

#Calculate average precision
Bing_Liu_average_precision = (Bing_Liu_positive_precision + Bing_Liu_negative_precision) / 2

#Calculate positive recall
Bing_Liu_positive_recall = Bing_Liu_true_positive / (Bing_Liu_true_positive + Bing_Liu_false_negative)

#Calculate negative recall
Bing_Liu_negative_recall = Bing_Liu_true_negative / (Bing_Liu_true_negative + Bing_Liu_false_positive)

#Calculate average recall
Bing_Liu_average_recall = (Bing_Liu_positive_recall + Bing_Liu_negative_recall) / 2

#Calculate average F score
Bing_Liu_average_F_score = (2 * Bing_Liu_average_precision * Bing_Liu_average_recall) / (Bing_Liu_average_precision + Bing_Liu_average_recall)

## Question 1b: Loughran-McDonald (LM) Dictionary

### Create the LM Dictionary from the two lab files, "negative-words-LM" and "positive-words-LM" (stolen directly from Lab)

In [6]:
def read_local_dictionary(file):
    # create dictionary list
    LM_dictionary = []
    with open(file, "r") as f: 
        for line in f:
            t = line.strip().lower()
            LM_dictionary.append(t)
    return LM_dictionary

### Apply LM Dictionary to the movie reviews data and compare with the true labels

In [7]:
#Create our separate sets of positive and negative words from the LM files
positive_list_LM = read_local_dictionary('positive-words-LM.txt')
negative_list_LM = read_local_dictionary('negative-words-LM.txt')

#Using the LM Dictionary and our "counter" function, generate the assessments of overall positivity and negativity
#And, make a new data frame of the results
LM_dataframe = count_positive_and_negative_words(reviews, positive_list_LM, negative_list_LM)

#Add a new column to the dataframe, converting the "Overall_Sentiment" into 1 or 0.
#Note, I will consider a net score of 0 to be in the negative bucket; thus it will get coded as 0. I consider a movie which
#generates a totally neutral sentiment to be a flop.
LM_model_classifications = []
for i in LM_dataframe['Overall_Sentiment']:
    if i > 0:
        LM_model_classifications.append(1)
    else:
        LM_model_classifications.append(0)
        
LM_dataframe['LM_Model_Classifications'] = LM_model_classifications
        
#Add the true labels back to the dataframe so we can see how the LM Dictionary did
LM_dataframe['Actual_Classifications'] = actual_classifications

### Calculate the performance of the LM Dictionary Model

In [8]:
#Count the predicted one and zeros and the actual ones and zeros
#confusion_matrix(y_true, y_pred)

#Note from sci-k itlearn.org:
#Thus in binary classification, the count of true negatives is M[0,0], false negatives is M[1,0], 
    #true positives is M[1,1], and false positives is M[0,1] .
LM_confusion_matrix = confusion_matrix(LM_dataframe['Actual_Classifications'], LM_dataframe['LM_Model_Classifications'])

LM_true_positive = LM_confusion_matrix[1,1]
LM_true_negative = LM_confusion_matrix[0,0]
LM_false_positive = LM_confusion_matrix[0,1]
LM_false_negative = LM_confusion_matrix[1,0]

#Calculate positive precision
LM_positive_precision = LM_true_positive / (LM_true_positive + LM_false_positive)

#Calculate negative precision
LM_negative_precision = LM_true_negative / (LM_true_negative + LM_false_negative)

#Calculate average precision
LM_average_precision = (LM_positive_precision + LM_negative_precision) / 2

#Calculate positive recall
LM_positive_recall = LM_true_positive / (LM_true_positive + LM_false_negative)

#Calculate negative recall
LM_negative_recall = LM_true_negative / (LM_true_negative + LM_false_positive)

#Calculate average recall
LM_average_recall = (LM_positive_recall + LM_negative_recall) / 2

#Calculate average F score
LM_average_F_score = (2 * LM_average_precision * LM_average_recall) / (LM_average_precision + LM_average_recall)

## Question 1c: Textblob

### Make Textblob Dataframe (stolen directly from Lab) and compare with true labels

In [9]:
textblob_dataframe = pd.DataFrame({'Review': reviews})
textblob_dataframe["Textblob_Score"] = textblob_dataframe["Review"].map(lambda x:TextBlob(x).sentiment.polarity)

#Note:  TextBlob’s output for a polarity task is a float within the range [-1.0, 1.0] where -1.0 is a 
        #negative polarity and 1.0 is positive. This score can also be equal to 0, which stands for a neutral 
        #evaluation of a statement as it doesn’t contain any words from the training set.
        
#Add a new column to the dataframe, converting the "Overall_Sentiment" into 1 or 0.
#Note, I will consider a net score of 0 to be in the negative bucket; thus it will get coded as 0. I consider a movie which
#generates a totally neutral sentiment to be a flop.
textblob_dataframe_classifications = []
for i in textblob_dataframe['Textblob_Score']:
    if i > 0:
        textblob_dataframe_classifications.append(1)
    else:
        textblob_dataframe_classifications.append(0)
        
textblob_dataframe['Textblob_Model_Classifications'] = textblob_dataframe_classifications
        
#Add the true labels back to the dataframe so we can see how Textblob did
textblob_dataframe['Actual_Classifications'] = actual_classifications

### Calculate the performance of the Textblob Model

In [10]:
#Count the predicted one and zeros and the actual ones and zeros
#confusion_matrix(y_true, y_pred)

#Note from sci-k itlearn.org:
#Thus in binary classification, the count of true negatives is M[0,0], false negatives is M[1,0], 
    #true positives is M[1,1], and false positives is M[0,1] .
textblob_confusion_matrix = confusion_matrix(textblob_dataframe['Actual_Classifications'], textblob_dataframe['Textblob_Model_Classifications'])

textblob_true_positive = textblob_confusion_matrix[1,1]
textblob_true_negative = textblob_confusion_matrix[0,0]
textblob_false_positive = textblob_confusion_matrix[0,1]
textblob_false_negative = textblob_confusion_matrix[1,0]

#Calculate positive precision
textblob_positive_precision = textblob_true_positive / (textblob_true_positive + textblob_false_positive)

#Calculate negative precision
textblob_negative_precision = textblob_true_negative / (textblob_true_negative + textblob_false_negative)

#Calculate average precision
textblob_average_precision = (textblob_positive_precision + textblob_negative_precision) / 2

#Calculate positive recall
textblob_positive_recall = textblob_true_positive / (textblob_true_positive + textblob_false_negative)

#Calculate negative recall
textblob_negative_recall = textblob_true_negative / (textblob_true_negative + textblob_false_positive)

#Calculate average recall
textblob_average_recall = (textblob_positive_recall + textblob_negative_recall) / 2

#Calculate average F score
textblob_average_F_score = (2 * textblob_average_precision * textblob_average_recall) / (textblob_average_precision + textblob_average_recall)

## Question 1d: VADER - from NLTK (stolen mostly from Lab)

### Make VADER dataframe and compare with true labels

In [11]:
#Setup VADER scores output (in the form of a Python dictionary)
scores = [SentimentIntensityAnalyzer().polarity_scores(sentence) for sentence in reviews]

#Separate the sentiments into the respective buckets, per the above dictionary output
negative_sentiment = [i["neg"] for i in scores]
neutral_sentiment = [i["neu"] for i in scores]
positive_sentiment = [i["pos"] for i in scores]
compound_sentiment = [i["compound"] for i in scores]

#Create VADER Dataframe with "compound sentiment" score
VADER_dataframe = pd.DataFrame({'Review': reviews})
VADER_dataframe['VADER_Compound_Score'] = compound_sentiment

#Add a new column to the dataframe, converting the "Compound_Score" into 1 or 0.
#Note, I will consider neutral scores to be in the negative bucket; thus it will get coded as 0. I consider a movie which
#generates a totally neutral sentiment to be a flop.

#Another note: per the guidance in the Lab, positive scores will be where the Compound_Score >= .05
VADER_dataframe_classifications = []
for i in VADER_dataframe['VADER_Compound_Score']:
    if i >= .05:
        VADER_dataframe_classifications.append(1)
    else:
        VADER_dataframe_classifications.append(0)
        
VADER_dataframe['VADER_Model_Classifications'] = VADER_dataframe_classifications

#Add true labels for comparison with VADER model
VADER_dataframe['Actual_Classifications'] = actual_classifications

### Calculate the performance of the VADER Model

In [12]:
#Count the predicted one and zeros and the actual ones and zeros
#confusion_matrix(y_true, y_pred)

#Note from sci-k itlearn.org:
#Thus in binary classification, the count of true negatives is M[0,0], false negatives is M[1,0], 
    #true positives is M[1,1], and false positives is M[0,1] .
VADER_confusion_matrix = confusion_matrix(VADER_dataframe['Actual_Classifications'], VADER_dataframe['VADER_Model_Classifications'])

VADER_true_positive = VADER_confusion_matrix[1,1]
VADER_true_negative = VADER_confusion_matrix[0,0]
VADER_false_positive = VADER_confusion_matrix[0,1]
VADER_false_negative = VADER_confusion_matrix[1,0]

#Calculate positive precision
VADER_positive_precision = VADER_true_positive / (VADER_true_positive + VADER_false_positive)

#Calculate negative precision
VADER_negative_precision = VADER_true_negative / (VADER_true_negative + VADER_false_negative)

#Calculate average precision
VADER_average_precision =  (VADER_positive_precision + VADER_negative_precision) / 2

#Calculate positive recall
VADER_positive_recall = VADER_true_positive / (VADER_true_positive + VADER_false_negative)

#Calculate negative recall
VADER_negative_recall = VADER_true_negative / (VADER_true_negative + VADER_false_positive)

#Calculate average recall
VADER_average_recall = (VADER_positive_recall + VADER_negative_recall) / 2

#Calculate average F score
VADER_average_F_score = (2 * VADER_average_precision * VADER_average_recall) / (VADER_average_precision + VADER_average_recall)

## Question 1e: Make Performance Comparison Table of the Four Previous Models

In [13]:
model_names = ['Bing Liu Dictionary', 'LM Dictionary', 'Textblob', 'VADER']
precision_list = [Bing_Liu_average_precision, LM_average_precision, textblob_average_precision, VADER_average_precision]
recall_list = [Bing_Liu_average_recall, LM_average_recall, textblob_average_recall, VADER_average_recall]
F_score_list = [Bing_Liu_average_F_score, LM_average_F_score, textblob_average_F_score, VADER_average_F_score]

data = {'Model Name': model_names, 'Average Precision': precision_list, 'Average Recall': recall_list, 
                    'Average F-Score': F_score_list}

model_comparisons = pd.DataFrame(data)

In [14]:
model_comparisons

Unnamed: 0,Model Name,Average Precision,Average Recall,Average F-Score
0,Bing Liu Dictionary,0.661706,0.5495,0.600406
1,LM Dictionary,0.612191,0.5455,0.576924
2,Textblob,0.667701,0.5975,0.630653
3,VADER,0.607468,0.6025,0.604974


# Question 2: Ensembling to Increase Model Performance

## Step 1: Assemble the dataframe with the three best (as defined by F-score, since precision and recall are of approximately equal importance in movie reviews classification) models' calculations and normalize the values

In [15]:
#Focusing on F-scores leads me to drop the LM Dictionary
side_by_side_model_calculations = pd.DataFrame({'Reviews': reviews, 'Bing_Liu_Model_Final_Sentiment': Bing_Liu_Lexicon_dataframe['Overall_Sentiment'],
                                           'Textblob_Model_Final_Sentiment': textblob_dataframe['Textblob_Score'],
                                           'VADER_Model_Final_Sentiment': VADER_dataframe['VADER_Compound_Score']})

#Normalize values so everything is on the same scale
standardized_calculations = side_by_side_model_calculations.copy()
calculations_list = ['Bing_Liu_Model_Final_Sentiment', 'Textblob_Model_Final_Sentiment', 'VADER_Model_Final_Sentiment']

original_column_values = side_by_side_model_calculations[calculations_list]
sample_mean = side_by_side_model_calculations[calculations_list].mean()
sample_stddev = side_by_side_model_calculations[calculations_list].std()

standardized_calculations[calculations_list] = ((original_column_values - sample_mean)/sample_stddev)

In [16]:
standardized_calculations.head()

Unnamed: 0,Reviews,Bing_Liu_Model_Final_Sentiment,Textblob_Model_Final_Sentiment,VADER_Model_Final_Sentiment
0,films adapted from comic books have had plenty...,-0.703362,-1.544088,-1.401508
1,every now and then a movie comes along from a ...,0.114322,0.056327,0.68892
2,you've got mail works alot better than it dese...,1.749691,-0.012745,0.906751
3,jaws is a rare film that grabs your attent...,-0.805572,-0.176215,0.848942
4,moviemaking is a lot like being the general ma...,0.625375,-0.301433,0.89731


## Step 2: Create rules for a new classification column

In [17]:
#Create the rule and make a new vector based off of that rule
ensembled_classifications = []

for i in range(len(standardized_calculations['Reviews'])):
    if (standardized_calculations['Bing_Liu_Model_Final_Sentiment'][i] +
        standardized_calculations['Textblob_Model_Final_Sentiment'][i] + 
        standardized_calculations['VADER_Model_Final_Sentiment'][i]) > 0:
        
        ensembled_classifications.append(1)
        
    else:
        ensembled_classifications.append(0)
        
#Stick the new vector on the existing dataframe
standardized_calculations['Ensembled_Model_Classifications'] = ensembled_classifications

#Finally, add the "actual_classifications" vector
standardized_calculations['Actual_Classifications'] = actual_classifications

## Step 3: Compute Performance of Ensemble Model

In [18]:
#Count the predicted one and zeros and the actual ones and zeros
#confusion_matrix(y_true, y_pred)

#Note from sci-k itlearn.org:
#Thus in binary classification, the count of true negatives is M[0,0], false negatives is M[1,0], 
    #true positives is M[1,1], and false positives is M[0,1] .
ensemble_confusion_matrix = confusion_matrix(standardized_calculations['Actual_Classifications'], standardized_calculations['Ensembled_Model_Classifications'])

ensemble_true_positive = ensemble_confusion_matrix[1,1]
ensemble_true_negative = ensemble_confusion_matrix[0,0]
ensemble_false_positive = ensemble_confusion_matrix[0,1]
ensemble_false_negative = ensemble_confusion_matrix[1,0]

#Calculate positive precision
ensemble_positive_precision = ensemble_true_positive / (ensemble_true_positive + ensemble_false_positive)

#Calculate negative precision
ensemble_negative_precision = ensemble_true_negative / (ensemble_true_negative + ensemble_false_negative)

#Calculate average precision
ensemble_average_precision =  (ensemble_positive_precision + ensemble_negative_precision) / 2

#Calculate positive recall
ensemble_positive_recall = ensemble_true_positive / (ensemble_true_positive + ensemble_false_negative)

#Calculate negative recall
ensemble_negative_recall = ensemble_true_negative / (ensemble_true_negative + ensemble_false_positive)

#Calculate average recall
ensemble_average_recall = (ensemble_positive_recall + ensemble_negative_recall) / 2

#Calculate average F score
ensemble_average_F_score = (2 * ensemble_average_precision * ensemble_average_recall) / (ensemble_average_precision + ensemble_average_recall)

## Recreate Performance Comparison Table

In [19]:
model_names_2 = ['Bing Liu Dictionary', 'Textblob', 'VADER', 'Ensemble Model']
precision_list_2 = [Bing_Liu_average_precision, textblob_average_precision, VADER_average_precision, ensemble_average_precision]
recall_list_2 = [Bing_Liu_average_recall, textblob_average_recall, VADER_average_recall, ensemble_average_recall]
F_score_list_2 = [Bing_Liu_average_F_score, textblob_average_F_score, VADER_average_F_score, ensemble_average_F_score]

data_2 = {'Model Name': model_names_2, 'Average Precision': precision_list_2, 'Average Recall': recall_list_2, 
                    'Average F-Score': F_score_list_2}

model_comparisons_2 = pd.DataFrame(data_2)

In [20]:
model_comparisons_2

Unnamed: 0,Model Name,Average Precision,Average Recall,Average F-Score
0,Bing Liu Dictionary,0.661706,0.5495,0.600406
1,Textblob,0.667701,0.5975,0.630653
2,VADER,0.607468,0.6025,0.604974
3,Ensemble Model,0.65122,0.6505,0.65086


## Make table of percentage improvements

In [21]:
#Create dataframe without Ensemble Model in it
model_comparisons_3 = model_comparisons_2[model_comparisons_2['Model Name'] != 'Ensemble Model']

#Calculate percentage improvement for each value in each column
precision_improvement = []
for i in model_comparisons_3['Average Precision']:
    precision_improvement.append(((ensemble_average_precision - i) / ensemble_average_precision)*100)

recall_improvement = []
for i in model_comparisons_3['Average Recall']:
    recall_improvement.append(((ensemble_average_recall - i) / ensemble_average_recall)*100)
    
F_score_improvement = []
for i in model_comparisons_3['Average F-Score']:
    F_score_improvement.append(((ensemble_average_F_score - i) / ensemble_average_F_score)*100)
    
#Make new dataframe for the percentage improvements
data_3 = {'Inferior Model Name': model_comparisons_3['Model Name'], 
          'Precision Improvement of Ensemble Over Inferior Model (%)': precision_improvement,
         'Recall Improvement of Ensemble Over Inferior Model (%)': recall_improvement, 
          'F-Score Improvement of Ensemble Over Inferior Model (%)': F_score_improvement}

percentage_improvements = pd.DataFrame(data_3)

In [22]:
percentage_improvements

Unnamed: 0,Inferior Model Name,Precision Improvement of Ensemble Over Inferior Model (%),Recall Improvement of Ensemble Over Inferior Model (%),F-Score Improvement of Ensemble Over Inferior Model (%)
0,Bing Liu Dictionary,-1.610222,15.526518,7.751921
1,Textblob,-2.530834,8.147579,3.104625
2,VADER,6.718508,7.378939,7.050079
