In [15]:
!pip install text2emotion



In [10]:
import pandas as pd
import re
import glob 

import nltk
from nltk import tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')


import text2emotion as te

import plotly.graph_objects as go

FILE_DIR = "Data/tweets/"
FILE_DIR_UNPOP = "Data/unpop_tweets/"

[nltk_data] Downloading package vader_lexicon to C:\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# Generate Emotions and Sentiments

In [6]:

def getJSONData(file):
    df = pd.read_json(file, lines=True)
    data = df[df.lang == "en"].loc[:, ['content']]
    data["sentiments"] = data.content.apply(lambda x: getSentimentEmotion(x))
    data[['neg','neu', 'pos','comp', 'angry', 'fear', 'happy', 'sad', 'surprise']] = data['sentiments'].str.split(',',expand=True)
    data.drop(["sentiments"], inplace=True, axis = 1)
    return data

In [7]:

def getSentimentEmotion(tweet):
    sid = SentimentIntensityAnalyzer()
    tweet_words = [t.lower() for t in tweet.split() if re.match(r"(\w+)", t)]
    text = " ".join(tweet_words)
    ss = sid.polarity_scores(text)
    emo = te.get_emotion(text)
    return f'{ss["neg"]}, {ss["neu"]}, {ss["pos"]}, {ss["compound"]}, {emo["Angry"]}, {emo["Fear"]}, {emo["Happy"]}, {emo["Sad"]}, {emo["Surprise"]}'

In [39]:

json_files = glob.glob(f"{FILE_DIR}*.json")
for file in json_files: 
    print (file)
    data = getJSONData(file)
    data.to_csv(f"{file.split('.')[0]}.csv")


Data/tweets\13 reasons why.json
Data/tweets\all of us are dead.json
Data/tweets\bridgerton.json
Data/tweets\maid.json
Data/tweets\money heist.json
Data/tweets\red notice.json
Data/tweets\squid game.json
Data/tweets\stranger things.json
Data/tweets\the witcher.json
Data/tweets\you.json


In [9]:

json_files = glob.glob(f"{FILE_DIR_UNPOP}*.json")
for file in json_files: 
    print (file)
    data = getJSONData(file)
    data.to_csv(f"{file.split('.')[0]}.csv")


[]


# Charts

In [75]:
def getCSVdata(file):
    data = pd.read_csv(file)
    data = data.mean()
    data['title'] = file.split("/")[-1].split('.')[0]
    return data

In [76]:
df = pd.DataFrame()
csv_files = glob.glob(f"{FILE_DIR}processed/*.csv")
for file in csv_files: 
    print (file)
    data = getCSVdata(file)
    df = df.append(data, ignore_index=True)

df.drop("Unnamed: 0", axis = 1, inplace = True)

Data/tweets/processed/squid game.csv



Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



Data/tweets/processed/red notice.csv
Data/tweets/processed/stranger things.csv
Data/tweets/processed/maid.csv
Data/tweets/processed/the witcher.csv
Data/tweets/processed/bridgerton.csv
Data/tweets/processed/all of us are dead.csv
Data/tweets/processed/money heist.csv
Data/tweets/processed/you.csv
Data/tweets/processed/13 reasons why.csv


In [98]:
emotions_df = df[['angry', 'fear', 'happy',
       'sad', 'surprise', 'title']]

sentiment_df = df[['pos', 'neu', 'neg',
       'comp']]

In [46]:
categories = ['angry', 'fear', 'happy',
       'sad', 'surprise']

fig = go.Figure()

for i, row in emotions_df.iterrows(): 
  fig.add_trace(go.Scatterpolar(
        r=row[:5],
        theta=categories,
        fill='toself',
        name=row[-1]
  ))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=False
)

fig.show()

In [54]:
categories = ['pos', 'neu', 'neg',
       'comp']

fig = go.Figure()

for i, row in sentiment_df.iterrows(): 
  fig.add_trace(go.Scatterpolar(
        r=row[:5],
        theta=categories,
        fill='toself',
        name=row[-1]
  ))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=False
)

fig.show()

In [99]:
# sentiment_df.columns

sentiment_df = sentiment_df[['comp']]

In [100]:
import plotly.express as px


fig = px.imshow(sentiment_df,
                labels=dict(x="Sentiment", y="Show", color="Value"),
                y=['s1', '2', '3', '4','5','6','7','8','9','10'],
                x=['comp']
               )
fig.update_xaxes(side="top")
fig.show()