In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
sent_df = pd.read_pickle("./data/processed_data.pkl")

In [3]:
class_cols = list(sent_df.iloc[:,4:23].columns)
mixed_cols = list(sent_df.iloc[:,24:].columns)

In [4]:
def class_results(row, col_name):
    if row[col_name] == "PARSE_FAIL":
        return "PARSE_FAIL"
    elif row["gold_label"]:
        if row[col_name]:
            return "True Positive"
        else:
            return "False Negative"
    else:
        if row[col_name]:
            return "False Positive"
        else:
            return "True Negative"

In [5]:
classical_df = pd.DataFrame(columns=class_cols)
for col in class_cols:
    classical_df[col] = sent_df.apply(lambda row: class_results(row, col), axis=1)
classical_df.head()

Unnamed: 0,classical@0.05,classical@0.10,classical@0.15,classical@0.20,classical@0.25,classical@0.30,classical@0.35,classical@0.40,classical@0.45,classical@0.50,classical@0.55,classical@0.60,classical@0.65,classical@0.70,classical@0.75,classical@0.80,classical@0.85,classical@0.90,classical@0.95
0,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative
1,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative
2,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative
3,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive
4,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative


In [6]:
mixed_df = pd.DataFrame(columns=mixed_cols)
for col in mixed_cols:
    mixed_df[col] = sent_df.apply(lambda row: class_results(row, col), axis=1)
mixed_df.head()

Unnamed: 0,mixed@0.60,mixed@0.61,mixed@0.62,mixed@0.63,mixed@0.64,mixed@0.65,mixed@0.66,mixed@0.67,mixed@0.68,mixed@0.69,...,mixed@0.90,mixed@0.91,mixed@0.92,mixed@0.93,mixed@0.94,mixed@0.95,mixed@0.96,mixed@0.97,mixed@0.98,mixed@0.99
0,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,...,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative
1,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,...,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative
2,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,...,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative,True Negative
3,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,...,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive,True Positive
4,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,...,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative,False Negative


In [7]:
def get_scores(dist):
    vals = dist.value_counts()
    tp = vals["True Positive"]
    fp = vals["False Positive"]
    tn = vals["True Negative"]
    fn = vals["False Negative"]
    pf = vals["PARSE_FAIL"]
    acc = (tp + tn) / (tp + tn + fp + fn + pf)
    acc_wo_pf = (tp + tn) / (tp + tn + fp + fn)
    prec = tp / (tp + fp)
    rec = tp / (tp + fn)
    f1 = 2 * ((prec * rec) / (prec + rec))
    return acc, acc_wo_pf, prec, rec, f1


In [8]:
mixed_acc_df = mixed_df.apply(lambda col: get_scores(col), axis=0).transpose()
mixed_acc_df.columns = ["Accuracy", "Accuracy_wo_parsefails", "Precision", "Recall", "F1"]
mixed_acc_df


Unnamed: 0,Accuracy,Accuracy_wo_parsefails,Precision,Recall,F1
mixed@0.60,0.797042,0.798002,0.962168,0.606406,0.743942
mixed@0.61,0.796526,0.797486,0.962104,0.605338,0.743119
mixed@0.62,0.795494,0.796453,0.9625,0.602847,0.741357
mixed@0.63,0.793774,0.79473,0.962286,0.599288,0.738596
mixed@0.64,0.79257,0.793525,0.962134,0.596797,0.736657
mixed@0.65,0.792054,0.793008,0.962069,0.59573,0.735824
mixed@0.66,0.79085,0.791803,0.961916,0.593238,0.733876
mixed@0.67,0.789302,0.790253,0.961717,0.590036,0.731363
mixed@0.68,0.789302,0.790253,0.962791,0.589324,0.731126
mixed@0.69,0.787754,0.788703,0.962595,0.586121,0.7286


In [10]:
class_acc_df = classical_df.apply(lambda col: get_scores(col), axis=0).transpose()
class_acc_df.columns = ["Accuracy", "Accuracy_wo_parsefails", "Precision", "Recall", "F1"]
class_acc_df

Unnamed: 0,Accuracy,Accuracy_wo_parsefails,Precision,Recall,F1
classical@0.05,0.847953,0.848975,0.922606,0.75089,0.827938
classical@0.10,0.840901,0.841915,0.935945,0.722776,0.815663
classical@0.15,0.833677,0.834682,0.939221,0.703915,0.804719
classical@0.20,0.828001,0.828999,0.943387,0.6879,0.795637
classical@0.25,0.824217,0.825211,0.945852,0.67758,0.78955
classical@0.30,0.818885,0.819873,0.952308,0.660854,0.780252
classical@0.35,0.814757,0.81574,0.953125,0.651246,0.773784
classical@0.40,0.809598,0.810573,0.955272,0.638434,0.765358
classical@0.45,0.805298,0.806268,0.956145,0.62847,0.758428
classical@0.50,0.80031,0.801274,0.95898,0.615658,0.749892


In [5]:
def print_scores(dist):
    acc, acc_wo_pf, prec, rec, f1 = get_scores(dist)
    print("Accuracy: {:.4f}".format(acc))
    print("Accuracy (w/o parse failures): {:.4f}".format(acc_wo_pf))
    print("Percentage of parse failures: {:.4f}".format(pf / (tp + tn + fp + fn + pf)))
    print("Precision: {:.4f}".format(prec))
    print("Recall: {:.4f}".format(rec))
    print("F1 Score: {:.4f}".format(f1))

In [30]:
sent_df.loc[:,"classical@0.60"].compare(sent_df.loc[:,"classical@0.99"])

Unnamed: 0,self,other


In [12]:
max_class = max(classical_accs, key=lambda key: classical_accs[key][4])
max_mix = max(mixed_accs, key=lambda key: mixed_accs[key][4])
print(max_class, max_mix)

classical_dist = classical_res[max_class]
mixed_dist = mixed_res[max_mix]

classical@0.60 mixed@0.60


In [None]:
classical_dist.value_counts()

In [None]:
mixed_dist.value_counts()

In [None]:
get_scores(classical_dist)

In [None]:
get_scores(mixed_dist)


In [None]:
true_results = list(pd.to_numeric(sent_df.query("gold_label==True").loc[:,"fuzzy_result"], errors="coerce").dropna())
false_results = list(pd.to_numeric(sent_df.query("gold_label==False").loc[:,"fuzzy_result"], errors="coerce").dropna())

data = [true_results, false_results]
fig, ax = plt.subplots()
ax.violinplot(data)
ax.yaxis.grid(True)
ax.set_xticks([y + 1 for y in range(len(data))])
ax.set_xlabel("Gold label")
ax.set_ylabel("Generated probability")

plt.setp(ax, xticks=[y + 1 for y in range(len(data))], xticklabels=["True", "False"])

plt.savefig("./images/final_fuzzy_violin.png", format="png", bbox_inches="tight")
plt.show()