In [15]:
!pip install text2emotion



In [1]:
import pandas as pd
import re
import glob 

import nltk
from nltk import tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')


import text2emotion as te

import plotly.graph_objects as go

FILE_DIR = "Data/tweets/"
FILE_DIR_UNPOP = "Data/unpop_tweets/"

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/mr/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/mr/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/mr/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/mr/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Generate Emotions and Sentiments

In [6]:

def getJSONData(file):
    df = pd.read_json(file, lines=True)
    data = df[df.lang == "en"].loc[:, ['content']]
    data["sentiments"] = data.content.apply(lambda x: getSentimentEmotion(x))
    data[['neg','neu', 'pos','comp', 'angry', 'fear', 'happy', 'sad', 'surprise']] = data['sentiments'].str.split(',',expand=True)
    data.drop(["sentiments"], inplace=True, axis = 1)
    return data

In [7]:

def getSentimentEmotion(tweet):
    sid = SentimentIntensityAnalyzer()
    tweet_words = [t.lower() for t in tweet.split() if re.match(r"(\w+)", t)]
    text = " ".join(tweet_words)
    ss = sid.polarity_scores(text)
    emo = te.get_emotion(text)
    return f'{ss["neg"]}, {ss["neu"]}, {ss["pos"]}, {ss["compound"]}, {emo["Angry"]}, {emo["Fear"]}, {emo["Happy"]}, {emo["Sad"]}, {emo["Surprise"]}'

In [39]:

json_files = glob.glob(f"{FILE_DIR}*.json")
for file in json_files: 
    print (file)
    data = getJSONData(file)
    data.to_csv(f"{file.split('.')[0]}.csv")


Data/tweets\13 reasons why.json
Data/tweets\all of us are dead.json
Data/tweets\bridgerton.json
Data/tweets\maid.json
Data/tweets\money heist.json
Data/tweets\red notice.json
Data/tweets\squid game.json
Data/tweets\stranger things.json
Data/tweets\the witcher.json
Data/tweets\you.json


In [9]:

json_files = glob.glob(f"{FILE_DIR_UNPOP}*.json")
for file in json_files: 
    print (file)
    data = getJSONData(file)
    data.to_csv(f"{file.split('.')[0]}.csv")


[]


# Charts

In [2]:
def getCSVdata(file):
    data = pd.read_csv(file)
    data = data.mean()
    data['title'] = file.split("/")[-1].split('.')[0]
    return data

In [3]:
df = pd.DataFrame()
csv_files = glob.glob(f"{FILE_DIR}processed/*.csv")
for file in csv_files: 
    print (file)
    data = getCSVdata(file)
    df = df.append(data, ignore_index=True)

df.drop("Unnamed: 0", axis = 1, inplace = True)

unpop_df = pd.DataFrame()
csv_files = glob.glob(f"{FILE_DIR_UNPOP}processed/*.csv")
for file in csv_files: 
    print (file)
    data = getCSVdata(file)
    unpop_df = unpop_df.append(data, ignore_index=True)

unpop_df.drop("Unnamed: 0", axis = 1, inplace = True)

Data/tweets/processed/squid game.csv


  data = data.mean()


Data/tweets/processed/red notice.csv
Data/tweets/processed/stranger things.csv
Data/tweets/processed/maid.csv
Data/tweets/processed/the witcher.csv
Data/tweets/processed/bridgerton.csv
Data/tweets/processed/all of us are dead.csv
Data/tweets/processed/money heist.csv
Data/tweets/processed/you.csv
Data/tweets/processed/13 reasons why.csv
Data/unpop_tweets/processed/freud.csv
Data/unpop_tweets/processed/game on! a comedy crossover event.csv
Data/unpop_tweets/processed/resident evil infinite darkness.csv
Data/unpop_tweets/processed/jinn.csv
Data/unpop_tweets/processed/tiny creatures.csv
Data/unpop_tweets/processed/too hot to handle.csv
Data/unpop_tweets/processed/away.csv
Data/unpop_tweets/processed/racket boys.csv
Data/unpop_tweets/processed/reboot the guardian code.csv
Data/unpop_tweets/processed/hache.csv
Data/unpop_tweets/processed/sexy beasts.csv


In [4]:
emotions_df = df[['angry', 'fear', 'happy',
       'sad', 'surprise', 'title']]

sentiment_df = df[['pos', 'neu', 'neg',
       'comp','title']]

unpop_emotions_df = unpop_df[['angry', 'fear', 'happy',
       'sad', 'surprise', 'title']]

unpop_sentiment_df = unpop_df[['pos', 'neu', 'neg',
       'comp','title']]

In [5]:

categories = ['angry', 'fear', 'happy',
    'sad', 'surprise']

emotions_df = df[categories]
emotions_df["title"] = df.title

print (emotions_df)

      angry      fear     happy       sad  surprise               title
0  0.045700  0.169132  0.075885  0.113403  0.178198          squid game
1  0.300108  0.419029  0.069952  0.074270  0.123461          red notice
2  0.023986  0.131804  0.073044  0.094017  0.642423     stranger things
3  0.037159  0.169072  0.171680  0.173442  0.305763                maid
4  0.044983  0.233445  0.109411  0.148591  0.223481         the witcher
5  0.039466  0.175201  0.139073  0.164688  0.226760          bridgerton
6  0.019843  0.124824  0.108340  0.119612  0.302505  all of us are dead
7  0.045836  0.152156  0.087371  0.133690  0.231778         money heist
8  0.042213  0.247536  0.140547  0.193759  0.287570                 you
9  0.027593  0.131541  0.085609  0.599204  0.145017      13 reasons why


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  emotions_df["title"] = df.title


In [14]:
categories = ['angry', 'fear', 'happy',
       'sad', 'surprise']

fig = go.Figure()

for i, row in emotions_df.iterrows(): 
  
  fig.add_trace(go.Scatterpolar(
        r=row.values[0:5],
        theta=categories,
        fill='toself',
        name=row[-1]
  ))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=False
)

fig.show()

In [13]:
print(row.values[0:5], row[:5])

[0.027592983745063776 0.13154100468362653 0.08560933051703598
 0.5992037836348546 0.1450169896225553] angry       0.027593
fear        0.131541
happy       0.085609
sad         0.599204
surprise    0.145017
Name: 9, dtype: object


In [16]:

fig = go.Figure()

for i, row in unpop_emotions_df.iterrows(): 
  fig.add_trace(go.Scatterpolar(
        r=row[:5],
        theta=categories,
        fill='toself',
        name=row[-1]
  ))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=False
)

fig.show()

In [17]:
categories = ['pos', 'neu', 'neg',
       'comp']

fig = go.Figure()

for i, row in sentiment_df.iterrows(): 
  fig.add_trace(go.Scatterpolar(
        r=row[:5],
        theta=categories,
        fill='toself',
        name=row[-1]
  ))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=False
)

fig.show()

In [19]:
categories = ['pos', 'neu', 'neg',
       'comp']

fig = go.Figure()

for i, row in unpop_sentiment_df.iterrows(): 
  fig.add_trace(go.Scatterpolar(
        r=row[:5],
        theta=categories,
        fill='toself',
        name=row[-1]
  ))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=False
)

fig.show()