In [46]:
# Standard data manipulation/visualization libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

# Custom text cleaning tool
import text_cleaner as cln

# Word clound library tools
from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image

# Sentiment analysis libraries
from textblob import TextBlob
import flair

# Interactive dashboard library
import streamlit

import csv_to_sqlite 

In [9]:
input_file = "../data/raw_data/comments.csv"
raw_comments = pd.read_csv(input_file, lineterminator='\n')
raw_comments_replies = pd.read_csv('../data/raw_data/comment_replies.csv', lineterminator='\n')

In [14]:
data = raw_comments.drop(['author'], axis=1)
reply_data = raw_comments_replies.drop(['author'], axis=1)

# Preprocessing

In [16]:
cleaner = cln.TextCleaner()
cleaned_comments = cleaner.clean(data['text'].astype(str))
cleaned_replies = cleaner.clean(reply_data['replyText'].astype(str))

In [17]:
cleaned_data = data.copy()
cleaned_data['cleaned_comments'] = cleaned_comments

cleaned_reply_data = reply_data.copy()
cleaned_reply_data['cleaned_replies'] = cleaned_replies

In [None]:
cleaned_data.to_json('../data/cleaned_comments.csv')
cleaned_reply_data.to_json('../data/cleaned_replies.csv')

# Sentiment Analysis

In [None]:
sentiment_analysis = cleaned_data.copy()

## TextBlob pre-trained model

In [None]:
textblob_results = {"positive":0,"neutral":0,"negative":0}
sentiment_polarity = []
sentiment_subj = []
for x in cleaned_data['cleaned_comments']: 
    res = TextBlob(x).sentiment
    sentiment_polarity.append(res[0])
    sentiment_subj.append(res[1])
    if res[0] == 0.0: 
        textblob_results["neutral"] +=1
    elif res[0] > 0.0:
        textblob_results["positive"] +=1
    else:
        textblob_results["negative"] +=1
print(textblob_results)

In [None]:
labels = textblob_results.keys()
percentages = textblob_results.values()
explode = (0.1, 0, 0)

fig, ax = plt.subplots()
ax.pie(percentages, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax.axis('equal')

plt.show()

In [None]:
sentiment_analysis['textblob_polarity'] = sentiment_polarity
sentiment_analysis['textblob_subjectivity'] = sentiment_subj

## Flair pre-built model

In [None]:
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')

In [None]:
flair_results = {"positive":0, "negative":0}
sentiment_val = []
sentiment_score = []
for x in cleaned_data['cleaned_comments']: 
    sentiment = flair.data.Sentence(x)
    flair_sentiment.predict(sentiment)
    res = sentiment.labels
    res = str(res[0]).replace('(', '').replace(')', '').split(' ')
    sentiment_val.append(res[0])
    sentiment_score.append(res[1])
    if res[0] == "POSITIVE":
        flair_results["positive"] +=1
    else:
        flair_results["negative"] +=1
print(flair_results)

In [None]:
labels = flair_results.keys()
percentages = flair_results.values()
explode = (0.1, 0)

fig, ax = plt.subplots()
ax.pie(percentages, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax.axis('equal')

plt.show()

In [None]:
sentiment_analysis['flair_sentiment'] = sentiment_val
sentiment_analysis['flair_score'] = sentiment_score

In [None]:
sentiment_analysis.to_json('../data/sentiment_analysis.csv')

In [None]:
model_results = pd.DataFrame()
model_results = model_results.append(pd.DataFrame(data=[textblob_results.values()], columns=['positive', 'neutral', 'negative'], index=['textblob']))
model_results = model_results.append(pd.DataFrame(data=[flair_results.values()], columns=['positive', 'negative'], index=['flair']))
model_results = model_results.T
model_results.to_csv('../data/model_results.csv')

# Word Blob

In [None]:
df = pd.read_json("../data/sentiment_analysis.csv")

In [None]:
mask = np.array(Image.open('../images/reeves.png'))

In [None]:
words = ''
for x in df['cleaned_comments']:
    words += "".join(str(x).strip("[]").replace("'", "").replace(",", ""))

In [None]:
wordcloud = WordCloud(background_color="white", max_words=10000, random_state=42, mask=mask).generate(words)

In [None]:
image_colors = ImageColorGenerator(mask)
plt.figure(figsize=[15,15])
plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation='bilinear')
plt.axis("off")
plt.show()

# Creating SQLite Database File

In [None]:
df = pd.read_json("../data/sentiment_analysis.csv")
df.to_json('../data/sentiment_analysis.csv')

In [None]:
df.to_csv('../data/csv_results.csv', index=False)
options = csv_to_sqlite.CsvOptions(typing_style="full", encoding="utf-8") 
csv_to_sqlite.write_csv(['../data/csv_results.csv'], "sentiment_analysis_db.sqlite", options)

In [1]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

In [65]:
df = pd.read_json("../data/sentiment_analysis.csv")
vid = pd.read_csv('../data/raw_data/videos.csv')

In [67]:
vid.head()

Unnamed: 0,channelId,videoId,categoryId,title,viewCount,likeCount,dislikeCount,commentCount,publishedAt,description
0,UC4zyoIAzmdsgpDZQfO1-lSA,aWbQ5WcB0m4,20,Announcement Trailer - Cyber Up Your PC! Cyber...,551793,14324,740,1492,2020-04-07T14:45:17Z,Design the Cyberpunk 2077 PC Case of Your Drea...
1,UC4zyoIAzmdsgpDZQfO1-lSA,mrZC1Jcv0dw,20,Grimes – 4ÆM,2447237,104973,1958,7342,2019-12-19T15:06:33Z,"oo-\naphrodite, i wrote your constellation\nin..."
2,UC4zyoIAzmdsgpDZQfO1-lSA,Q4ZdkEiYvK0,20,Cyberpunk 2077 – Grimes performing 4ÆM live at...,3618884,34040,962,2758,2019-12-13T04:51:20Z,Watch Grimes – who will be also voicing Lizzy ...
3,UC4zyoIAzmdsgpDZQfO1-lSA,aZ_ARLDWK9Y,20,Cyberpunk 2077 – Behind The Music,413220,23218,340,2702,2019-12-13T02:57:19Z,Meet some of the artists behind Cyberpunk 2077...
4,UC4zyoIAzmdsgpDZQfO1-lSA,cgFvZmfjTYc,20,Cyberpunk 2077 — Official E3 2019 Cinematic Tr...,1015116,67731,465,3945,2019-09-11T14:01:45Z,CD PROJEKT RED and Goodbye Kansas present: the...


In [64]:
t = df['textblob_polarity'] > 0
t['videoId'] = df.videoId
t = t.groupby('videoId').count()
t

videoId
8X2kIfS6fb8    24563
AN1RJF55NXI     1534
FknHjl7eQ6o    10557
Igq3d6XA75Y     6063
P99qJGrPNLs    27815
Q4ZdkEiYvK0     1529
SVAryZ0GLwE     1352
aWbQ5WcB0m4      987
aZ_ARLDWK9Y     1736
cGmWwFpNIHg      836
cgFvZmfjTYc     2694
dXt1m7gg12c     1248
j7-qaRs3XxQ     1362
mrZC1Jcv0dw     4263
qIcTM8WXFjk    25077
qU3-4IrZ7tk      833
vjF9GgrY9c0    54523
wwr6TlEbiuU     3131
xYxt7cwDk4E     1678
Name: textblob_polarity, dtype: int64

In [70]:
t = pd.DataFrame(data=t)

In [74]:
t

Unnamed: 0_level_0,textblob_polarity
videoId,Unnamed: 1_level_1
8X2kIfS6fb8,24563
AN1RJF55NXI,1534
FknHjl7eQ6o,10557
Igq3d6XA75Y,6063
P99qJGrPNLs,27815
Q4ZdkEiYvK0,1529
SVAryZ0GLwE,1352
aWbQ5WcB0m4,987
aZ_ARLDWK9Y,1736
cGmWwFpNIHg,836


In [77]:
t.merge(vid, left_on='videoId', right_on='videoId').drop(['categoryId', 'channelId', 'description'], axis=1)

Unnamed: 0,videoId,textblob_polarity,title,viewCount,likeCount,dislikeCount,commentCount,publishedAt
0,8X2kIfS6fb8,24563,Cyberpunk 2077 – Official E3 2018 Trailer,18831679,412419,5419,41710,2018-06-10T21:40:15Z
1,AN1RJF55NXI,1534,Cyberpunk 2077 — Never Fade Away by SAMURAI (R...,1105052,43491,630,3090,2019-08-23T10:00:10Z
2,FknHjl7eQ6o,10557,Cyberpunk 2077 – Deep Dive Video,6234729,211715,4016,22104,2019-08-30T18:33:17Z
3,Igq3d6XA75Y,6063,Cyberpunk 2077 — Chippin’ In by SAMURAI (Refused),4590029,148995,1542,9986,2019-07-02T15:00:12Z
4,P99qJGrPNLs,27815,Cyberpunk 2077 Teaser Trailer,16793930,274059,4852,39153,2013-01-10T20:58:07Z
5,Q4ZdkEiYvK0,1529,Cyberpunk 2077 – Grimes performing 4ÆM live at...,3618884,34040,962,2758,2019-12-13T04:51:20Z
6,SVAryZ0GLwE,1352,Cyberpunk 2077 – Deep Dive Video + Q&A panel w...,515688,19717,429,2771,2019-08-30T19:13:31Z
7,aWbQ5WcB0m4,987,Announcement Trailer - Cyber Up Your PC! Cyber...,551793,14324,740,1492,2020-04-07T14:45:17Z
8,aZ_ARLDWK9Y,1736,Cyberpunk 2077 – Behind The Music,413220,23218,340,2702,2019-12-13T02:57:19Z
9,cGmWwFpNIHg,836,Cyberpunk 2077 title reveal,1128543,18364,227,1622,2012-10-19T08:13:43Z
