In [45]:
import pandas as pd
import numpy as np

SENTIMENTS = "anger fear joy sadness".split()

def load_predictions(folder_path: str) -> dict:
    result = {}
    for sen in SENTIMENTS:
        path = f"{folder_path}/{sen}-pred.txt"
        result[sen] = pd.read_csv(path, sep='\t', names=['id', 'low', 'medium', 'high'])
        result[sen].index()
    return result

def get_comparison(a: dict, b: dict, sentiment=None) -> [dict, pd.DataFrame]:
    result = {}
    for sen in SENTIMENTS:
        intensity_a = a[sen].apply(lambda x: np.argmax(x[1:]), axis=1)
        intensity_b = b[sen].apply(lambda x: np.argmax(x[1:]), axis=1)
        different = intensity_a != intensity_b
        result[sen] = pd.DataFrame({"id": a[sen]["id"], "a": intensity_a, "b": intensity_b, "different": different}).set_index("id")
    if sentiment is not None:
        return result[sentiment]
    percentage = {}
    equal_count = {}
    different_count = {}
    for sen in SENTIMENTS:
        df = result[sen]
        equal_count[sen] = df[df["different"] == True]["different"].count()
        different_count[sen] = df[df["different"] == False]["different"].count()
        percentage[sen] = equal_count[sen] / len(df.index)
    meta_result = {"result": result, "different_count": different_count, "equal_count": equal_count, "percentage": percentage}
    return meta_result

In [4]:
base_predictions = load_predictions("../assignment_1/predictions")
felipe_benja_predictions = load_predictions("../felipe-benja/predictions")

In [46]:
get_comparison(base_predictions, felipe_benja_predictions, "anger")

Unnamed: 0_level_0,a,b,different
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10941,1,1,False
10942,0,0,False
10943,1,0,True
10944,1,1,False
10945,1,0,True
...,...,...,...
11696,1,1,False
11697,1,1,False
11698,1,0,True
11699,1,0,True


In [48]:
get_comparison(base_predictions, felipe_benja_predictions)["equal_count"]["anger"]

230