## Sentiment Analysis Score

Predict the sentiment analysis label, using a deep learning model for each query/emotion inside the relations file

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

## Load Tokenizer

Import and load the tokenizer from a `.pickle` file

In [2]:
import pickle
from pathlib import Path

In [3]:
tokenizer_file = Path('../datasets/sentiment140/tokenizer.pickle').resolve()
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

## Load Model

Load the sentiment analysis model, using the saved weights

In [4]:
from tensorflow.keras.layers import Input, Embedding, GRU
from tensorflow.keras.layers import Dropout, GlobalMaxPooling1D
from tensorflow.keras.layers import Bidirectional, Dense
from tensorflow.keras.models import Sequential

In [5]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
embedding_dim = 200
input_length = 100
gru_units = 128
gru_dropout = 0.1
recurrent_dropout = 0.1
dropout = 0.1

In [6]:
model = Sequential()
model.add(Embedding(
    input_dim=input_dim,
    output_dim=embedding_dim,
    input_shape=(input_length,)
))

model.add(Bidirectional(GRU(
    gru_units,
    return_sequences=True,
    dropout=gru_dropout,
    recurrent_dropout=recurrent_dropout
)))
model.add(GlobalMaxPooling1D())
model.add(Dense(32, activation='relu'))
model.add(Dropout(dropout))

model.add(Dense(1, activation='sigmoid'))

In [7]:
weights_path = Path('../models/sentiment_analysis/model_weights.h5').resolve()
model.load_weights(weights_path.as_posix())

## Load Query Relations

Load the relations between queries and emotions from a `.json` file

In [8]:
import json

In [9]:
relations_path = Path('../query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

## Predict polarity

Predict the polarity of the texts, using the sentiment analysis model

In [10]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp import preprocess
from tqdm import tqdm
import pandas as pd
import numpy as np
import re

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/vladislavklyuev/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [11]:
dataset_dir = Path('../datasets/tweepy').resolve()

In [12]:
data_dict = {}

query_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|:.+:)', filename)[0]

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=100)

        result = model.predict(x_predict)
        
        emotion = relations[query]
        query_dict['query'].append(query)
        query_dict['mean'].append(np.mean(result))
        query_dict['max'].append(np.amax(result))
        query_dict['min'].append(np.amin(result))
        query_dict['count'].append(len(dataset))
        query_dict['std'].append(np.std(result))
        query_dict['emotion'].append(emotion)

        if emotion in data_dict:
            data_dict[emotion] = np.concatenate([data_dict[emotion], result])
        else:
            data_dict[emotion] = result
        
        t.update()

100%|██████████| 29/29 [02:48<00:00,  5.81s/it]


## Print Results

Print the queries/emotions and the values

In [13]:
df = pd.DataFrame(data=query_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,:grinning_face_with_smiling_eyes:,0.69014,0.995362,0.016203,0.242131,4682,joy
6,#joy,0.822292,0.994438,0.047048,0.178356,2829,joy
13,#happiness,0.859708,0.994822,0.064436,0.155626,4578,joy
15,#excited,0.884466,0.994933,0.145889,0.137462,1613,joy
21,:red_heart:,0.781182,0.994983,0.020156,0.222091,4591,joy
27,:smiling_face_with_smiling_eyes:,0.785217,0.994891,0.026256,0.220473,4484,joy


Unnamed: 0,query,mean,max,min,std,count,emotion
1,:face_with_steam_from_nose:,0.525856,0.991684,0.007834,0.266844,9000,anger
4,#mad,0.387373,0.966865,0.017412,0.21947,484,anger
7,#angry,0.330573,0.974755,0.015015,0.222967,450,anger
9,#furious,0.487592,0.9493,0.04474,0.257909,77,anger
10,#hateyou,0.372692,0.785669,0.123487,0.197314,11,anger
16,#pissed,0.240259,0.935397,0.018089,0.175839,296,anger
18,#pissedoff,0.342486,0.925229,0.016533,0.23002,117,anger
19,:face_with_symbols_on_mouth:,0.416722,0.989133,0.008382,0.251025,9000,anger
20,:pouting_face:,0.454024,0.992345,0.005212,0.256322,9000,anger
25,:anger_face:,0.444339,0.985892,0.026906,0.223909,1789,anger


Unnamed: 0,query,mean,max,min,std,count,emotion
2,#depressed,0.13703,0.919184,0.005528,0.12763,644,sadness
8,#sad,0.104854,0.964132,0.001725,0.109697,8996,sadness
22,#depression,0.290758,0.984563,0.00662,0.19848,8920,sadness
28,:broken_heart:,0.401407,0.99108,0.005458,0.274192,5754,sadness


Unnamed: 0,query,mean,max,min,std,count,emotion
3,#fear,0.57331,0.985692,0.007214,0.209829,2256,fear
5,#worried,0.325823,0.894836,0.013049,0.229899,157,fear
11,#scared,0.303335,0.92572,0.010762,0.213827,543,fear
12,#scary,0.491123,0.971156,0.015015,0.221699,1692,fear
14,:face_screaming_in_fear:,0.551556,0.995031,0.014395,0.251346,8902,fear
17,#anxious,0.493215,0.990066,0.017704,0.257176,292,fear
23,#afraid,0.412652,0.932353,0.050459,0.235034,172,fear
24,:anxious_face_with_sweat:,0.449569,0.9917,0.005922,0.262181,8797,fear
26,:fearful_face:,0.491166,0.994414,0.008334,0.251725,8204,fear


In [14]:
emotion_dict = {
    'emotion': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': []
}

for emotion, result in data_dict.items():
    emotion_dict['emotion'].append(emotion)
    emotion_dict['mean'].append(np.mean(result))
    emotion_dict['max'].append(np.amax(result))
    emotion_dict['min'].append(np.amin(result))
    emotion_dict['std'].append(np.std(result))
    emotion_dict['count'].append(len(result))
    
emotion_df = pd.DataFrame(data=emotion_dict)
display(emotion_df)

Unnamed: 0,emotion,mean,max,min,std,count
0,joy,0.791465,0.995362,0.016203,0.213368,22777
1,anger,0.458358,0.992345,0.005212,0.259689,30224
2,sadness,0.244089,0.99108,0.001725,0.226408,24314
3,fear,0.498132,0.995031,0.005922,0.255033,31015
