In [1]:
import pandas as pd

# Sentiment Analysis
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Evaluation of the Sentiment Analysis
from sklearn.metrics._classification import classification_report
from sklearn.metrics._classification import confusion_matrix
from sklearn.metrics._classification import f1_score
from sklearn.metrics import ConfusionMatrixDisplay

In [2]:
# List of the crators that are part of the sample for the evaluation: MKBHD, Jeremy Jahns, James Charles
creator = "MKBHD" # Put the name of the creator here
output_df = pd.read_excel(f"./comments_spreadsheets/sample_extracted_comments_{creator}.xlsx")

FileNotFoundError: [Errno 2] No such file or directory: './comments_spreadsheets/sample_extracted_comments_MKBHD.xlsx'

In [None]:
duplicates = output_df[output_df.duplicated(("Comments"))]
print ("Count of duplicate comments in dataframe"
, duplicates.shape[0])

print ("Count of unique comments in dataframe"
, output_df.shape[0] - duplicates.shape[0])

# Remove duplicated comments from dataset
unique_df = output_df.drop_duplicates(subset=["Comments"], keep='first')
df = unique_df.reset_index()

# Removes line return "\n"
df = df.replace(r'\n',' ', regex=True)

display(df)

# Sentiment Analysis

In [None]:
sentimentAnalyser = SentimentIntensityAnalyzer()
sentimentScoreList = []
sentimentLabelList = []

for i in df["Comments"].values.tolist():
    sentimentScore = sentimentAnalyser.polarity_scores(i)

    if sentimentScore['compound'] >= 0.05:
        sentimentScoreList.append(sentimentScore['compound'])
        sentimentLabelList.append('Positive')
    elif sentimentScore['compound'] > -0.05 and sentimentScore['compound'] < 0.05:
        sentimentScoreList.append(sentimentScore['compound'])
        sentimentLabelList.append('Neutral')
    elif sentimentScore['compound'] <= -0.05:
        sentimentScoreList.append(sentimentScore['compound'])
        sentimentLabelList.append('Negative')

df["Sentiment"] = sentimentLabelList
df["Sentiment Score"] = sentimentScoreList

In [None]:
numbersOfPositiveComments = df[df["Sentiment"] == "Positive"].shape[0]
numbersOfNegativeComments = df[df["Sentiment"] == "Negative"].shape[0]
numbersOfNeutralComments = df[df["Sentiment"] == "Neutral"].shape[0]

# # Display the number of positive, negative and neutral comments
print ("Number of positive comments: ", numbersOfPositiveComments)
print ("Number of negative comments: ", numbersOfNegativeComments)
print ("Number of neutral comments: ", numbersOfNeutralComments)

# Display the percentage of positive, negative and neutral comments
print ("Percentage of positive comments: ", numbersOfPositiveComments / df.shape[0] * 100)
print ("Percentage of negative comments: ", numbersOfNegativeComments / df.shape[0] * 100)
print ("Percentage of neutral comments: ", numbersOfNeutralComments / df.shape[0] * 100)

# Sentiment Analysis Evaluation

In [None]:
y_actual = df["Actual Polarity"].tolist()
y_predicted = df['Sentiment'].tolist()

confusionMatrix = confusion_matrix(y_actual, y_predicted)
disp = ConfusionMatrixDisplay(confusionMatrix, display_labels=['Negative', 'Neutral', 'Positive'])
disp.plot()
print(classification_report(y_actual, y_predicted, labels=['Negative', 'Neutral', 'Positive']))
print()
print(f1_score(y_actual, y_predicted, labels=['Negative', 'Neutral', 'Positive'], average='weighted'))