In [59]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, recall_score, precision_score

import statsmodels.api as sm
from statsmodels.formula.api import ols, logit

foundations = {"mftc": {
                    "binding": ["individual", "binding", "non-moral"], 
                    "moral": ["moral", "non-moral"],
                    "full": ["care", "fairness", "loyalty", "authority", "purity", "non-moral"],
                    "complete": ["care", "harm", "fairness", "cheating", "loyalty", "betrayal", "authority", "subversion", "purity", "degradation", "non-moral"]
                },
               "mfrc":  {
                    "binding": ["individual", "binding", "proportionality", "thin morality", "non-moral"], 
                    "moral": ["moral", "thin morality", "non-moral"],
                    "full": ["care", "proportionality", "loyalty", "authority", "purity", "equality", "thin morality", "non-moral"],
                    "complete": ["care", "harm", "equality", "proportionality", "loyalty", "betrayal", "authority", "subversion", "purity", "degradation", "thin morality", "non-moral"]
               }
              }

foundations_dict = {
                    "full": {"harm": "care", "care": "care", "degradation": "purity", 
                            "purity": "purity", "betrayal": "loyalty", "loyalty": "loyalty", 
                            "subversion": "authority", "authority": "authority",
                            "cheating": "fairness", "fairness": "fairness", "equality": "fairness",
                            "non-moral": "non-moral", "nm": "non-moral", "thin morality": "thin morality", "proportionality": "fairness"},
}  

General Parameters

In [132]:
corp = "mftc"
mode = "full"
training = "cross"

if corp == "mftc":
    if training == "cross":
        eval = "mfrc"
    else:
        eval = "mftc"
elif corp == "mfrc":
    if training == "cross":
        eval = "mftc"
    else:
        eval = "mfrc"

Load Data

In [133]:
# load samples (groundtruth)
if training == "cross":
    df_groundtruth = pd.read_csv("../data/preprocessed/" + eval + "_cleaned_" + mode + ".csv")
    df_meta = pd.read_csv("../data/preprocessed/" + eval + "_eval_" + mode + ".csv")
elif training == "normal":
    df_groundtruth = pd.read_csv("../data/preprocessed/" + corp + "_sample_" + mode + ".csv")   
    df_meta = pd.read_csv("../data/preprocessed/" + corp + "_meta_sample_" + mode + ".csv")   
else:
    pass

df_pred = pd.read_csv("../results/predictions/" + corp + "_labels_" + training + "_" + mode + ".csv")

# load eval data
# convert wide to long (convert dummy to label)

# analyze
    # accuracy per annotator
    # regression model: label ~ annotator

Process Data

In [134]:
# reformat labels for cross corpus classifications (because they are trained on slightly different labels)
if training == "cross":
    if corp == "mftc": #if an mftc classifier predicts mfrc -> combine eq and prop to fairness & drop thin morality because the classifier was not trained on these labels
        df_groundtruth["fairness"] = (df_groundtruth.equality + df_groundtruth.proportionality) > 0
        df_groundtruth.fairness = df_groundtruth.fairness.astraining(int)
        df_groundtruth = df_groundtruth.drop(["thin morality", "equality", "proportionality"], axis = 1)
        
        df_meta.annotation = df_meta.annotation.replace(foundations_dict[mode])
        df_meta = df_meta[df_meta.annotation != "thin morality"].reset_index(drop=True)
    elif corp == "mfrc": # if mfrc classifier predicts mftc -> combine eq&prop predictions and drop thin morality predictions because these labels are not in groundtruth
        df_pred["fairness"] = (df_pred.equality + df_pred.proportionality) > 0
        df_pred.fairness = df_pred.fairness.astraining(int)
        df_pred = df_pred.drop(["thin morality", "equality", "proportionality"], axis = 1)
        
        df_meta.annotation = df_meta.annotation.replace(foundations_dict[mode])
        df_meta = df_meta[df_meta.annotation != "thin morality"].reset_index(drop=True)
    else:
        pass
elif training == "normal":
    pass
else:
    pass

Get Performance

Create dataframes for analyses (Matching of annotator and classifier)

In [136]:
# for cross domain predictions -> transform to compatible classes (e.g., MFQ1 from MFQ2)
if training == "cross":
    cols = foundations["mftc"][mode]
else:
    cols = foundations[corp][mode]

y_true = df_groundtruth[cols].values
y_pred = df_pred[cols]

print(f1_score(y_true, y_pred, average="macro"))

df_total = df_meta.merge(df_pred[cols + ["text"]], on="text")
df_total = df_total[df_total.annotation!="nh"].reset_index(drop=True)
df_total["success"] = df_total.apply(lambda x: x[x["annotation"]] == 1, axis = 1)

df_total.to_csv("../results/evals/" + corp + "_success_" + training + "_" + mode + ".csv", index = False) # uncomment for analyses

0.4088107417900318


### Additional Information

Average number of words in texts

In [137]:
# average number of words in texts
df_pred.text.str.split("\\s+").str.len().mean()

33.88431359399101

In [138]:
# search for entries
i = 15
print(df_pred.text[i])
# print(df_pred.annotation[i])
print(df_pred.iloc[i])

  people just WORRIED because they know that Obama and other foreign countries interfered in the French  election

Macron can't possibly be seen as legitimate yntil we have an investigation into what Obama did to  help him while president

I've heard rumours  Obama might have even used the NSA to spy on le pen  to find out her strategies
text                 people just WORRIED because they know that O...
care                                                             0.0
proportionality                                                    0
loyalty                                                          0.0
authority                                                        0.0
purity                                                           0.0
equality                                                           0
thin morality                                                      0
non-moral                                                        1.0
fairness                               

Accuracy by annotator

In [139]:
df_total["success"].groupby(df_total['annotator']).mean()

annotator
annotator00    0.731435
annotator01    0.603389
annotator02    0.723866
annotator03    0.783196
annotator04    0.191821
annotator05    0.303060
Name: success, dtype: float64

Distribution of foundations across predictions

In [140]:
if training == "cross":
    print(df_total[foundations["mftc"][mode]].sum(0)/df_total.shape[0])
else:
    print(df_total[foundations[corp][mode]].sum(0)/df_total.shape[0])

care         0.059409
fairness     0.107755
loyalty      0.022542
authority    0.037052
purity       0.022842
non-moral    0.758314
dtype: float64


In [141]:
# compare distribution of foundations over predictions and groundtruth (are they similar? -> better classifier)
if training == "cross":
    print(df_groundtruth[foundations["mftc"][mode]].sum(0)/df_groundtruth.shape[0])
else:
    print(df_groundtruth[foundations[corp][mode]].sum(0)/df_groundtruth.shape[0])

care         0.117363
fairness     0.105560
loyalty      0.032057
authority    0.049292
purity       0.018309
non-moral    0.650526
dtype: float64
