In [4]:
import pandas as pd
from nrclex import NRCLex
from textblob import TextBlob
import os

In [5]:
#Load in dataset
input_path = 'stratifiedSentimentData/stratified_radio_sample.csv'
data = pd.read_csv(input_path)

# Get the directory from input path to save output in same location
output_dir = os.path.dirname(input_path)

# Define output filename (same name with suffix) in same directory
output_filename = os.path.join(output_dir, 'stratified_radio_sample_with_sentiment.csv')

# Emotion and Sentiment Scoring Using NRCLex

In [6]:
def get_emotion_scores(text):
    """
    Analyze text to extract emotion and sentiment scores using NRCLex.
    NRCLex uses the NRC Emotion Lexicon which associates words with eight basic emotions
    (anger, fear, anticipation, trust, surprise, sadness, joy, disgust) and two sentiments
    (positive, negative).
    
    For each text input:
    1. Tokenizes and lemmatizes the text
    2. Matches words against the NRC Emotion Lexicon
    3. Returns raw counts of emotion/sentiment words found
    
    Parameters:
        text (str): Input text to analyze
    
    Returns:
        dict: Dictionary with emotion/sentiment categories as keys and their counts as values
              Example: {'joy': 3, 'positive': 2, 'trust': 1}
    """
    if pd.isna(text) or not str(text).strip():
        return {}
    emotion = NRCLex(str(text))
    return emotion.raw_emotion_scores

# Apply NRCLex to the dataset
data['emotion_scores'] = data['text'].apply(get_emotion_scores)

# List of emotions and sentiments in the NRC lexicon
emotions = ['fear', 'anger', 'anticipation', 'trust', 'surprise', 'sadness', 'disgust', 'joy']
sentiments = ['positive', 'negative']

# Initialize emotion and sentiment columns with 0
for emotion in emotions:
    data[emotion] = 0
for sentiment in sentiments:
    data[sentiment] = 0

# Populate emotion and sentiment columns
for idx, row in data.iterrows():
    if isinstance(row['emotion_scores'], dict):
        for emotion, score in row['emotion_scores'].items():
            if emotion in emotions:
                data.at[idx, emotion] = score
            elif emotion in sentiments:
                data.at[idx, emotion] = score

In [7]:
print(data.head())

  station            range_key  \
0    KAWC  2024-11-30 09:00:00   
1    KAWC  2024-09-26 06:00:00   
2    KVOI  2024-11-15 15:00:00   
3    KQNA  2024-11-03 21:00:00   
4    KVOI  2024-09-01 00:00:00   

                                                text state  \
0   Weather forecast for Yuma today is mostly sun...    AZ   
1   K-A-W-C.Classical music of Peter Van de Graaf...    AZ   
2   At Cortero, Tucson's only live local news and...    AZ   
3   Can't get enough of YCCA's Hammer Time show?W...    AZ   
4   Under the Gaza city of Rafa, including that o...    AZ   

                                      emotion_scores  fear  anger  \
0  {'anticipation': 254, 'trust': 325, 'joy': 235...   141     92   
1  {'joy': 311, 'positive': 796, 'sadness': 209, ...   275    206   
2  {'positive': 1104, 'trust': 786, 'joy': 509, '...   312    315   
3  {'trust': 590, 'positive': 937, 'joy': 407, 'a...   286    229   
4  {'positive': 812, 'trust': 552, 'anticipation'...   328    245   

   anti

# Additional Sentiment Scoring Using TextBlob

In [None]:
def analyze_sentiment(df, text_column='text'):
    """
    Analyze sentiment for each row in the DataFrame using TextBlob.
    Scores each sentence in an entry for positivity, negativity, and neutrality,
    and sums these scores for all sentences in the entry.
    Adds new columns: 'positive_sum', 'negative_sum', and 'neutral_sum'.
    """
    positive_sums = []
    negative_sums = []
    neutral_sums = []

    for text in df[text_column]:
        # Skip empty or null text
        if not text or pd.isna(text):
            positive_sums.append(0)
            negative_sums.append(0)
            neutral_sums.append(0)
            continue

        # Split text into sentences
        sentences = [sent.strip() for sent in str(text).split('.') if sent.strip()]
        
        # If no valid sentences, assign default values
        if not sentences:
            positive_sums.append(0)
            negative_sums.append(0)
            neutral_sums.append(0)
            continue

        positive_sum = 0
        negative_sum = 0
        neutral_sum = 0

        for sentence in sentences:
            # Analyze sentiment for the sentence
            polarity = TextBlob(sentence).sentiment.polarity

            # Score the sentence for positivity, negativity, and neutrality
            if polarity > 0.1:
                positive_sum += polarity
            elif polarity < -0.1:
                negative_sum += abs(polarity)
            else:
                neutral_sum += 1

        # Append sums for this entry
        positive_sums.append(positive_sum)
        negative_sums.append(negative_sum)
        neutral_sums.append(neutral_sum)

    # Add sentiment results to the DataFrame
    df['positive_sum'] = positive_sums
    df['negative_sum'] = negative_sums
    df['neutral_sum'] = neutral_sums

    return df

# Apply TextBlob sentiment analysis
data = analyze_sentiment(data)

In [9]:
# Save to same directory as input
data.to_csv(output_filename, index=False)
print(f"Analysis complete. Output saved to:\n{output_filename}")

Analysis complete. Output saved to:
stratifiedSentimentData\stratified_radio_sample_with_sentiment.csv
