In [1]:
import re, os
import json
import torch
import requests
import numpy    as np
import pandas   as pd
import seaborn  as sns
import matplotlib.pyplot   as plt
import tensorflow_datasets as tfds

from tqdm     import tqdm
from torch    import nn
from dotenv   import load_dotenv
from pathlib  import Path
from datasets import load_dataset
from transformers import AutoModel, AutoTokenizer, AdamW
from sklearn.metrics import f1_score, accuracy_score
from torch.utils.data import Dataset, DataLoader

# NLP APIs
import nltk
from wordcloud   import WordCloud
from nltk.corpus import stopwords

In [2]:
# Load DATA_PATH from .env file
load_dotenv()
data_path  = os.getenv("DATA_PATH")
model_path = os.getenv("MODEL_PATH")
cebab_path = os.getenv("CEBAB_PATH")

In [3]:
df_goemo = pd.read_csv(data_path+"/goemo_samples_1000.csv")

In [4]:
concepts = ['admiration', 'amusement', 'anger', 'annoyance', 
            'approval', 'caring', 'confusion', 'curiosity', 'desire', 
            'disappointment', 'disapproval', 'disgust', 'embarrassment', 
            'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 
            'nervousness', 'optimism', 'pride', 'realization', 'relief',
            'remorse', 'sadness', 'surprise', 'neutral']

emotion_stats = {
    'Emotion': [],
    'Positive Count': [],
    'Positive Percentage': [],
    'Unknown Count': [],
    'Unknown Percentage': []
}

# Calculate counts and percentages
total_counts = len(df_goemo)
for emotion in concepts:
    positive_count = (df_goemo[emotion] == 'Positive').sum()
    unknown_count = (df_goemo[emotion] == 'unknown').sum()
    positive_percentage = (positive_count / total_counts) * 100
    unknown_percentage = (unknown_count / total_counts) * 100
    emotion_stats['Emotion'].append(emotion)
    emotion_stats['Positive Count'].append(positive_count)
    emotion_stats['Positive Percentage'].append(positive_percentage)
    emotion_stats['Unknown Count'].append(unknown_count)
    emotion_stats['Unknown Percentage'].append(unknown_percentage)

# Convert the dictionary to a DataFrame for better display
df_emotion_stats = pd.DataFrame(emotion_stats)

# Display the DataFrame
print(df_emotion_stats)


           Emotion  Positive Count  Positive Percentage  Unknown Count  \
0       admiration             106                 10.6            894   
1        amusement              31                  3.1            969   
2            anger              49                  4.9            951   
3        annoyance              61                  6.1            939   
4         approval             114                 11.4            886   
5           caring              35                  3.5            965   
6        confusion              43                  4.3            957   
7        curiosity              36                  3.6            964   
8           desire              10                  1.0            990   
9   disappointment              27                  2.7            973   
10     disapproval              58                  5.8            942   
11         disgust              27                  2.7            973   
12   embarrassment              16    