In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

import text_cleaner as cln

from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image

from textblob import TextBlob
import flair

In [None]:
input_file = "comments.csv"
raw_comments = pd.read_csv(input_file, lineterminator='\n')

In [None]:
data = raw_comments.drop(['videoId', 'commentId', 'publishedAt'], axis=1)

# Preprocessing

In [None]:
cleaner = cln.TextCleaner()
cleaned_comments = cleaner.clean(data['text'].astype(str))

In [None]:
cleaned_data = data.copy()
cleaned_data['cleaned_comments'] = cleaned_comments

In [None]:
cleaned_data.to_csv('cleaned_comments.csv', index=False)

# Data Visualization

In [None]:
mask = np.array(Image.open('reeves.png'))

In [None]:
words = ''
for x in cleaned_data['cleaned_comments']:
    words += "".join(str(x).strip("[]").replace("'", "").replace(",", ""))

In [None]:
wordcloud = WordCloud(background_color="white", max_words=10000, random_state=42, mask=mask).generate(words)

In [None]:
image_colors = ImageColorGenerator(mask)
plt.figure(figsize=[15,15])
plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation='bilinear')
plt.axis("off")
plt.show()

# Sentiment Analysis

In [None]:
sentiment_analysis = cleaned_data.copy()

## TextBlob pre-trained model

In [None]:
summary = {"positive":0,"neutral":0,"negative":0}
sentiment_val = []
for x in cleaned_data['cleaned_comments']: 
    res = TextBlob(x).sentiment
    sentiment_val.append(res)
    if res[0] == 0.0: 
        summary["neutral"] +=1
    elif res[0] > 0.0:
        summary["positive"] +=1
    else:
        summary["negative"] +=1
print(summary)

In [None]:
labels = summary.keys()
percentages = summary.values()
explode = (0.1, 0, 0)

fig, ax = plt.subplots()
ax.pie(percentages, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax.axis('equal')

plt.show()

In [None]:
sentiment_analysis['text_blob'] = sentiment_val

## Flair pre-built model

In [None]:
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')

In [None]:
summary = {"positive":0, "negative":0}
sentiment_val = []
for x in cleaned_data['cleaned_comments']: 
    sentiment = flair.data.Sentence(x)
    flair_sentiment.predict(sentiment)
    res = sentiment.labels
    res = str(res[0]).replace('(', '').replace(')', '').split(' ')
    sentiment_val.append(res)
    if res[1] == 0.0: 
        summary["neutral"] +=1
    elif res[0] == "POSITIVE":
        summary["positive"] +=1
    else:
        summary["negative"] +=1
print(summary)

In [None]:
labels = summary.keys()
percentages = summary.values()
explode = (0.1, 0)

fig, ax = plt.subplots()
ax.pie(percentages, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax.axis('equal')

plt.show()

In [None]:
sentiment_analysis['flair'] = sentiment_val

In [None]:
sentiment_analysis.to_csv('sentiment_analysis.csv', index=False)