In [None]:
import pandas as pd
from nrclex import NRCLex

In [8]:
# Read the parquet file
AZ = pd.read_parquet('data/AZ.parquet')

# Display the first few rows of the dataframe
AZ.head()

Unnamed: 0,station,range_key,text
0,KFNN,2024-07-31 12:00:00,I just paid for my membership.It's unbelievab...
1,KFNN,2024-07-31 15:00:00,and he put his hat in the ring.He was the fir...
2,KFNN,2024-08-01 03:00:00,It's towards the goal of eliminating and neut...
3,KFNN,2024-08-01 06:00:00,acts the israeli military said it has confirm...
4,KFNN,2024-08-01 09:00:00,Why would they do this? Cash is an asset and ...


In [9]:
# Example text
text = "I feel so happy and excited about this wonderful news!"

# Initialize NRCLex object
emotion = NRCLex(text)

# Get emotions and their scores
print(emotion.raw_emotion_scores)  # Dictionary of emotions and their counts
print(emotion.top_emotions)        # List of top emotions

{'anticipation': 2, 'joy': 3, 'positive': 3, 'trust': 3, 'surprise': 2}
[('trust', 0.23076923076923078), ('positive', 0.23076923076923078), ('joy', 0.23076923076923078)]


In [10]:
# Function to get emotion scores for a text
def get_emotion_scores(text):
    emotion = NRCLex(text)
    return emotion.raw_emotion_scores

# Apply NRCLex to the dataset
AZ['emotion_scores'] = AZ['text'].apply(get_emotion_scores)

# Display the updated DataFrame
print(AZ[['text', 'emotion_scores']].head())

                                                text  \
0   I just paid for my membership.It's unbelievab...   
1   and he put his hat in the ring.He was the fir...   
2   It's towards the goal of eliminating and neut...   
3   acts the israeli military said it has confirm...   
4   Why would they do this? Cash is an asset and ...   

                                      emotion_scores  
0  {'surprise': 173, 'negative': 288, 'anger': 18...  
1  {'anticipation': 161, 'joy': 115, 'positive': ...  
2  {'fear': 156, 'positive': 380, 'anger': 122, '...  
3  {'fear': 398, 'positive': 1057, 'trust': 694, ...  
4  {'positive': 1203, 'trust': 800, 'anger': 260,...  


In [11]:
# List of emotions and sentiments in the NRC lexicon
emotions = ['fear', 'anger', 'anticipation', 'trust', 'surprise', 'sadness', 'disgust', 'joy']
sentiments = ['positive', 'negative']

# Initialize emotion and sentiment columns with 0
for emotion in emotions:
    AZ[emotion] = 0
for sentiment in sentiments:
    AZ[sentiment] = 0

# Populate emotion and sentiment columns
for idx, row in AZ.iterrows():
    for emotion, score in row['emotion_scores'].items():
        if emotion in emotions:
            AZ.at[idx, emotion] = score
        elif emotion in sentiments:
            AZ.at[idx, emotion] = score

# Display the updated DataFrame
print(AZ.head())

  station           range_key  \
0    KFNN 2024-07-31 12:00:00   
1    KFNN 2024-07-31 15:00:00   
2    KFNN 2024-08-01 03:00:00   
3    KFNN 2024-08-01 06:00:00   
4    KFNN 2024-08-01 09:00:00   

                                                text  \
0   I just paid for my membership.It's unbelievab...   
1   and he put his hat in the ring.He was the fir...   
2   It's towards the goal of eliminating and neut...   
3   acts the israeli military said it has confirm...   
4   Why would they do this? Cash is an asset and ...   

                                      emotion_scores  fear  anger  \
0  {'surprise': 173, 'negative': 288, 'anger': 18...   184    185   
1  {'anticipation': 161, 'joy': 115, 'positive': ...    76     78   
2  {'fear': 156, 'positive': 380, 'anger': 122, '...   156    122   
3  {'fear': 398, 'positive': 1057, 'trust': 694, ...   398    262   
4  {'positive': 1203, 'trust': 800, 'anger': 260,...   329    260   

   anticipation  trust  surprise  sadness  disgus

In [19]:
# Define the file name (customize this as needed)
parquet_file_name = 'AZ_with_emotion_score.parquet'  # Change this to your desired file name

# Save the entire DataFrame to a Parquet file
AZ.to_parquet(parquet_file_name, index=False)

print(f"DataFrame saved as {parquet_file_name}")

DataFrame saved as AZ_with_emotion_score.parquet
