## Load and format data

In [6]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [7]:
# Load VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Load the JSON file into a DataFrame
file_path = 'data/sarcasm_data.json'
df = pd.read_json(file_path).transpose()

# Reset the index to turn the first element into a new column
df = df.reset_index()

# Define a function to apply sentiment analysis to a text
def get_sentiment(text):
    return analyzer.polarity_scores(text)

# Apply sentiment analysis to the 'utterance' column
df['sentiment_utterance'] = df['utterance'].apply(get_sentiment)

# Apply sentiment analysis to the 'context' column
df['sentiment_context_all'] = df['context'].apply(get_sentiment)

# Apply sentiment analysis to each sentence in the 'context' column
df['sentiment_context_per_sentence'] = df['context'].apply(lambda context: [get_sentiment(sentence) for sentence in context])

# Visualize the DataFrame
df

Unnamed: 0,index,utterance,speaker,context,context_speakers,show,sarcasm,sentiment_utterance,sentiment_context_all,sentiment_context_per_sentence
0,160,It's just a privilege to watch your mind at work.,SHELDON,[I never would have identified the fingerprint...,"[LEONARD, SHELDON]",BBT,True,"{'neg': 0.0, 'neu': 0.783, 'pos': 0.217, 'comp...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","[{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compoun..."
1,170,I don't think I'll be able to stop thinking ab...,PENNY,[This is one of my favorite places to kick bac...,"[HOWARD, PENNY, HOWARD, HOWARD, HOWARD, PENNY,...",BBT,True,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...","{'neg': 0.0, 'neu': 0.871, 'pos': 0.129, 'comp...","[{'neg': 0.0, 'neu': 0.705, 'pos': 0.295, 'com..."
2,180,"Since it's not bee season, you can have my epi...",SHELDON,"[Here we go. Pad thai, no peanuts., But does i...","[LEONARD, HOWARD, LEONARD]",BBT,False,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.143, 'neu': 0.857, 'pos': 0.0, 'comp...","[{'neg': 0.268, 'neu': 0.732, 'pos': 0.0, 'com..."
3,190,"Lois Lane is falling, accelerating at an initi...",SHELDON,[A marathon? How many Superman movies are ther...,"[PENNY, SHELDON, PENNY, SHELDON, SHELDON, PENN...",BBT,False,"{'neg': 0.058, 'neu': 0.851, 'pos': 0.091, 'co...","{'neg': 0.0, 'neu': 0.906, 'pos': 0.094, 'comp...","[{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compoun..."
4,1105,I'm just inferring this is a couch because the...,SHELDON,"[Great Caesar's ghost, look at this place., So...","[SHELDON, LEONARD, SHELDON, SHELDON, SHELDON, ...",BBT,True,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.097, 'neu': 0.815, 'pos': 0.088, 'co...","[{'neg': 0.202, 'neu': 0.439, 'pos': 0.36, 'co..."
...,...,...,...,...,...,...,...,...,...,...
685,2169,"Hes not right for the part, and if I suggest h...",CHANDLER,"[What am I gonna do now?, Just pass the tape a...","[CHANDLER, RACHEL]",FRIENDS,True,"{'neg': 0.102, 'neu': 0.898, 'pos': 0.0, 'comp...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","[{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compoun..."
686,2235,"Oh yeah he has a caretaker his older brother, ...",CHANDLER,"[Helo! Anybody in there order a celebrity?, Wh...","[JOEY, PERSON, CHANDLER, PERSON]",FRIENDS,False,"{'neg': 0.0, 'neu': 0.858, 'pos': 0.142, 'comp...","{'neg': 0.062, 'neu': 0.751, 'pos': 0.187, 'co...","[{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compoun..."
687,234,Is it me or the greetings gone downhill around...,CHANDLER,"[Hey, You son of a bitch!]","[CHANDLER, JOEY]",FRIENDS,True,"{'neg': 0.0, 'neu': 0.763, 'pos': 0.237, 'comp...","{'neg': 0.506, 'neu': 0.494, 'pos': 0.0, 'comp...","[{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compoun..."
688,2608,"You are right, by saying nice, I am virtually ...",CHANDLER,"[Did I go to this school?, Hey, there's Missy ...","[CHANDLER, ROSS, CHANDLER, ROSS]",FRIENDS,True,"{'neg': 0.0, 'neu': 0.781, 'pos': 0.219, 'comp...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","[{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compoun..."


In [9]:
"""# TO DO: handle warning
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")


import json
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # add pip install vaderSentiment to readme

# Load VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Open the JSON file
with open('data/sarcasm_data.json', 'r') as file:
    # Load JSON data from the file
    json_data = json.load(file)

# Define a list of labels (columns) you want to extract
labels = ['utterance', 'speaker', 'context', 'context_speakers', 'show', 'sarcasm']

# Create an empty DataFrame
df = pd.DataFrame(columns=labels)

# Iterate over each key-value pair in the JSON data
for key, value in json_data.items():
    # Extract only the desired labels from the JSON data and add a new row to the DataFrame
    row_data = {label: value[label] for label in labels}
    
    # Perform SA on the 'utterance' 
    sentiment_score_utterance = analyzer.polarity_scores(row_data['utterance'])
    # Perform SA on all the context dialog  
    sentiment_score_context_all = analyzer.polarity_scores(row_data['context'])

    # List to store SA for every sentence in context dialog
    sentiment_score_context_sentences = []
    # Extract context dialog
    context_sentences = row_data['context']

    # Iterates over each sentence of the context dialog
    for sentence in context_sentences:
        # Perform SA
        sentiment_score = analyzer.polarity_scores(sentence)
        # Add sentence sentiment score to the list
        sentiment_score_context_sentences.append(sentiment_score)

        # If we want to match the sentence to the score:
        #sentiment_score_context_sentences.append({'sentence': sentence, 'sentiment_score': sentiment_score})
        
    
    # Add sentiment scores to the row data
    row_data['sentiment_utterance'] = sentiment_score_utterance                     # Utterance
    row_data['sentiment_context_all'] = sentiment_score_context_all                 # Context overall
    row_data['sentiment_context_per_sentence'] = sentiment_score_context_sentences  # Context per sentence
    
    # Append the row to the DataFrame
    df = df.append(row_data, ignore_index=True)

# Visualize final dataframe df
df
"""



### Visualisation format SA context and utterance

In [8]:
df_context = df['sentiment_context_all']

print("context all sequences:", df['sentiment_context_all'][0])
df['sentiment_context_all'][0]
print("context per sequence:", df['sentiment_context_per_sentence'][0])
print("utterance:", df['sentiment_utterance'][0] )

context all sequences: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
context per sequence: [{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}, {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}]
utterance: {'neg': 0.0, 'neu': 0.783, 'pos': 0.217, 'compound': 0.3612}
