In [2]:
from sklearn.metrics import confusion_matrix
import pandas as pd
from statsmodels.stats import inter_rater as irr
import json

In [24]:
### Convert the annotated csv file into jsonline file in the required format  
### Used for tweets

# Load the CSV file
csv_file = "data/drone/responses/all_tweets_full_responses.csv"
df = pd.read_csv(csv_file)
# print(df.describe())
output_path = "data/drone/responses/all_tweets_annotated.jsonl"

# Open a file to write the JSON Lines
with open(output_path, "w") as jsonl_file:
    # Iterate through each row in the dataframe
    for _, row in df.iterrows():
        # Create the JSON structure for each row
        json_data = {
            "text": row['text'],
            "emotion": row['voted_emotion'],
            "sentiment": row['voted_sentiment'],
            "annotations": {
                "emotion": [row['emotion_sw'], row['emotion_sn'], row['emotion_do']],
                "sentiment": [row['overall_sentiment_sw'], row['overall_sentiment_sn'], row['overall_sentiment_do']]
            }
        }
        # Write the JSON data as a line in the JSON Lines file
        jsonl_file.write(json.dumps(json_data) + "\n")

print("Conversion to JSON Lines completed.")


       Unnamed: 15
count          0.0
mean           NaN
std            NaN
min            NaN
25%            NaN
50%            NaN
75%            NaN
max            NaN
Conversion to JSON Lines completed.


In [5]:
### Converts csv file to required jsonline format
### Used for reddit

import pandas as pd
import json

# Load the CSV file
csv_file = "data/energy/responses/full_energy_annotated.csv"
df = pd.read_csv(csv_file)

# Load the existing JSON Lines file
jsonl_file = "data/energy/shuffled_CandD.jsonl"   #only use up to 290 convo for energy
with open(jsonl_file, "r") as file:
    jsonl_data = [json.loads(line) for line in file]

# Function to determine agreement level
def get_annotations(row):
    return {
        "emotion": [row['emotion_sw'], row['emotion_sn'], row['emotion_do']],
        "sentiment": [row['overall_sentiment_sw'], row['overall_sentiment_sn'], row['overall_sentiment_do']]
    }

grouped = df.groupby((df['speaker'].str.startswith('Dialogue')).cumsum())
# Write the combined data to a new JSON Lines file
output_jsonl_file = "full_energy_annotated.jsonl"

# Iterate through each conversation in jsonl_data and corresponding dialogue group in the dataframe
with open(output_jsonl_file, "w") as output_file:
    for i, (name, group) in enumerate(grouped):
        if i >= len(jsonl_data):
            break
        # print(f"Convo {i}")
        jsonl_conversation = jsonl_data[i]
        first_utterance = jsonl_conversation['conversation'][0]['utterance']
        # Create the conversation list
        conversation_list = []
        for j, row in group.iterrows():
            # check if first utterance is the same
            if j == 1 and row['text'] != first_utterance:
                raise AssertionError(f"first utterance of convo {i} does not match!")
            # print(f"row {_} of dialogue {i}")
            if row['speaker'].startswith("Dialogue"):
                continue

            conversation_list.append({
                "utterance": row['text'],
                "speaker": row['speaker'],
                "emotion": row['voted_emotion'],
                "sentiment": row['voted_sentiment'],
                "annotations": get_annotations(row)
            })

        # Update the conversation in jsonl_conversation
        jsonl_conversation['conversation'] = conversation_list
        
        # Write the updated conversation to the output file
        output_file.write(json.dumps(jsonl_conversation) + "\n")

print("Conversion to combined JSON Lines completed.")


Conversion to combined JSON Lines completed.


for confusion matrix VV

In [None]:
df = pd.read_csv("Batch1_tweets_responses.csv")

# confusion_matrix(df['golden_sentiment'], df['sentiment_sw'], labels=['anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise', 'other'])
confusion_matrix(df['golden_sentiment'], df['overall_sentiment_do'], labels=['positive', 'neutral', 'negative'])
confusion_matrix(df['golden_sentiment'], df['sentiment_sw'])

In [10]:
df = pd.read_csv("data/energy/responses/full_energy_annotated.csv")
df = df[~df['text'].isna()]

emotions = df[['emotion_sw', 'emotion_sn', 'emotion_do']]
agg_emotions = irr.aggregate_raters(emotions)
print(irr.fleiss_kappa(agg_emotions[0], method='fleiss'))

0.3340581531758013


In [12]:
sentiments = df[['overall_sentiment_sw', 'overall_sentiment_sn', 'overall_sentiment_do']]
agg_sentiments = irr.aggregate_raters(sentiments)
print(irr.fleiss_kappa(agg_sentiments[0], method='fleiss'))

0.4690062436824931


In [20]:
sentiments[sentiments['overall_sentiment_sn'].isna()]

Unnamed: 0,overall_sentiment_sw,overall_sentiment_sn,overall_sentiment_do
71,neutral,,neutral


In [11]:
%cd ..

/home/chaoming/emotion_classification


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)

datafile = "data/drone/responses/all_tweets_full_responses.csv"
df = pd.read_csv(datafile, encoding="utf-8", encoding_errors="replace")

emotions = ["happiness", "anger", "disgust", "fear", "sadness", "surprise", "other"]
sentiments = ["positive", "negative", "neutral"]
sent_mapping = {'positive': 2, 'neutral': 1, 'negative': 0}
emotion_mapping = {"happiness":0, "anger":1, "disgust":2, "fear":3, "sadness":4, "surprise":5, "other":6}

df["emotion"] = df["voted_emotion"]
df["sentiment"] = df["voted_sentiment"]

# pred_path = "output/drone/local_llama3_8B/test/masked_all_tweets_llama3.csv"
# preds_df = pd.read_csv(pred_path)
# preds_df["emotion"] = preds_df["llama3_emotion"]
# preds_df["sentiment"] = preds_df["llama3_sentiment"]

# def map_func(x):
#     return mapping.get(x, 1)

def evaluate(y_true, y_pred, labels):
    
    y_true = y_true.tolist()
    y_pred = y_pred.tolist()
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.5f}')
    
    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels
    
    for label in labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.5f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred, digits=5)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels)
    print('\nConfusion Matrix:')
    print(conf_matrix)



In [5]:
preds_df["llama3_emotion"].value_counts()

llama3_emotion
other        1355
anger         540
happiness     317
surprise      109
fear           63
sadness        34
disgust        15
Name: count, dtype: int64

In [6]:
# print(df['emotion'].describe())
evaluate(df['emotion'], preds_df['emotion'], emotions)
evaluate(df['sentiment'], preds_df['sentiment'], sentiments)

Accuracy: 0.58137
Accuracy for label happiness: 0.92000
Accuracy for label anger: 0.97872
Accuracy for label disgust: 0.01471
Accuracy for label fear: 0.23077
Accuracy for label sadness: 0.33333
Accuracy for label surprise: 0.30508
Accuracy for label other: 0.59465

Classification Report:
              precision    recall  f1-score   support

       anger    0.08519   0.97872   0.15673        47
     disgust    0.06667   0.01471   0.02410        68
        fear    0.09524   0.23077   0.13483        26
   happiness    0.14511   0.92000   0.25068        50
         nan    0.00000   0.00000   0.00000         0
       other    0.98524   0.59465   0.74167      2245
     sadness    0.05882   0.33333   0.10000         6
    surprise    0.16514   0.30508   0.21429        59

    accuracy                        0.58137      2501
   macro avg    0.20018   0.42216   0.20279      2501
weighted avg    0.89573   0.58137   0.68106      2501


Confusion Matrix:
[[  46    1    0    0    1    0    2]
 [

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from evaluate import evaluate
from pathlib import Path
import pandas as pd

emotion_labels = ["anger", "joy", "optimism", "sadness"]
sentiment_labels = ["negative", "neutral", "positive"]
pred_path = Path("output/tweet_eval_emotion/tweet_eval-emotion-1.0-lora/max_new_toks=30-temp=0.1-rep_pen=1.2-combine_prompts=False-few_shots=True")
golden = pd.read_csv("data/tweet_eval/emotion/test.csv")
pred_file = Path(pred_path / "predictions.csv")
predictions = pd.read_csv(pred_file)

#### TODO: PUT PRINTED RESULTS IN THE OUTPUT_PATH log file########
emotion_report = evaluate(golden["label"],predictions["emotion"], emotion_labels)
# sentiment_report = evaluate(golden["label"],predictions["sentiment"], sentiment_labels)

In [None]:


with open(pred_path / "emotion_report.json", "w") as f2:
    json.dump(emotion_report, f2, indent=2, ensure_ascii=False)

# with open(output_path / "sentiment_report.json", "w") as f2:
#     json.dump(sentiment_report, f2, indent=2, ensure_ascii=False)