In [2]:
import pandas as pd

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [8]:
def get_original_data():
    # Load VADER sentiment analyzer
    analyzer = SentimentIntensityAnalyzer()

    # Load the JSON file into a DataFrame
    file_path = 'data/sarcasm_data.json'
    df = pd.read_json(file_path).transpose()

    # Reset the index to turn the first element into a new column
    df = df.reset_index()

    # Define a function to apply sentiment analysis to a text
    def get_sentiment(text):
        return analyzer.polarity_scores(text)

    # Apply sentiment analysis to the 'utterance' column
    df['sentiment_utterance'] = df['utterance'].apply(get_sentiment)

    # Apply sentiment analysis to the 'context' column
    df['sentiment_context_all'] = df['context'].apply(get_sentiment)

    # Apply sentiment analysis to each sentence in the 'context' column
    df['sentiment_context_per_sentence'] = df['context'].apply(lambda context: [get_sentiment(sentence) for sentence in context])

    return df

## Add one extra predictor as an array call "sentiment_utterance" (logic then is onehot-encoded)

In [20]:
# U : Uterance sentiment
# O : Overall context sentiment
# Co : With compound information

### Uterance & overall context sentiments with pos, neu, neg and compound

In [39]:
df = get_original_data()

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: list(x.values())) + df['sentiment_context_all'].apply(lambda x: list(x.values()))

# Transpose the dataframe to get the orignal json format
df = df.transpose()

df.to_json('sarcasm_data_sentiment_UOC.json')

### Uterance & overall context sentiments with pos, neu, neg 

In [38]:
df = get_original_data()

def remove_compound(dict):
    return {key: value for key, value in dict.items() if key != 'compound'}

# Apply the function to the 'sentiment_utterance' and 'sentiment_context_all' columns
df['sentiment_utterance'] = df['sentiment_utterance'].apply(remove_compound)
df['sentiment_context_all'] = df['sentiment_context_all'].apply(remove_compound)

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: list(x.values())) + df['sentiment_context_all'].apply(lambda x: list(x.values()))

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_sentiment_UO.json')

### Uterance sentiments with pos, neu, neg and compound

In [4]:
df = get_original_data()

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: list(x.values()))

# Transpose the dataframe to get the orignal json format
df = df.transpose()

df.to_json('sarcasm_data_sentiment_UC.json')

### Uterance sentiments with pos, neu, neg

In [5]:
df = get_original_data()

def remove_compound(dict):
    return {key: value for key, value in dict.items() if key != 'compound'}

# Apply the function to the 'sentiment_utterance' columns
df['sentiment_utterance'] = df['sentiment_utterance'].apply(remove_compound)

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: list(x.values())) 

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_sentiment_U.json')

# Add columns

In [31]:
df = get_original_data()

# Splits the dictionaries into columns in a DataFrame
df_utterance = pd.DataFrame(df['sentiment_utterance'].tolist()).add_prefix('utterance_')
df_context = pd.DataFrame(df['sentiment_context_all'].tolist()).add_prefix('context_')

# Concatenates the two DataFrames
df_sentiment = pd.concat([df_utterance, df_context], axis=1)

df_sentiment

df_sentiment.to_csv('sarcasm_data_sentiment_all.csv')