# Ružička: Authorship Verification in Python

In [44]:
import logging

logging.basicConfig(level="INFO")
logger = logging.getLogger("ruzicka")

In [45]:
import numpy as np
from sklearn.pipeline import make_pipeline, Pipeline
from ruzicka.Order2Verifier import Order2Verifier
from ruzicka.utilities import *
from sklearn.preprocessing import LabelEncoder
from ruzicka.score_shifting import ScoreShifter
from ruzicka.evaluation import pan_metrics
from ruzicka.BDIVerifier import BDIVerifier
from typing import Union
import pandas as pd

In [63]:
PREFIX = "../data/2014/"
CORPORA = [
    "du_essays",
    "du_reviews",
    "en_essays",
    "en_novels",
    "gr_articles",
    "sp_articles",
]

In [64]:
def fit_shifter(
    corpus: str,
    vectorizer: Pipeline,
    verifier: Union[BDIVerifier, Order2Verifier],
    min_spread: float = 0.0,
) -> ScoreShifter:
    logger.info("Fitting shifter")
    train_data, test_data = load_pan_dataset(os.path.join(PREFIX, corpus, "train"))
    train_labels, train_documents = zip(*train_data)
    test_labels, test_documents = zip(*test_data)
    train_X = vectorizer.fit_transform(train_documents)
    test_X = vectorizer.transform(test_documents)
    label_encoder = LabelEncoder()
    label_encoder.fit(train_labels + test_labels)
    train_y = np.array(label_encoder.transform(train_labels), dtype="int")
    test_y = np.array(label_encoder.transform(test_labels), dtype="int")
    verifier.fit(train_X, train_y)
    test_scores = verifier.predict_proba(
        test_X=test_X, test_y=np.array(test_y), nb_imposters=30
    )
    gt_scores = load_ground_truth(
        filepath=os.path.join(PREFIX, corpus, "train", "truth.txt"), labels=test_labels
    )
    shifter = ScoreShifter(min=0.053, max=0.947, min_spread=min_spread)
    shifter.fit(predicted_scores=test_scores, ground_truth_scores=gt_scores)
    return shifter

In [65]:
def evaluate(corpus, vectorizer, verifier, shifter):
    logger.info("Loading evaluation corpus")
    train_data, test_data = load_pan_dataset(os.path.join(PREFIX, corpus, "test"))
    train_labels, train_documents = zip(*train_data)
    test_labels, test_documents = zip(*test_data)
    train_X = vectorizer.fit_transform(train_documents)
    test_X = vectorizer.transform(test_documents)
    label_encoder = LabelEncoder()
    label_encoder.fit(train_labels + test_labels)
    train_y = np.array(label_encoder.transform(train_labels), dtype="int")
    test_y = np.array(label_encoder.transform(test_labels), dtype="int")
    verifier.fit(train_X, train_y)
    test_scores = verifier.predict_proba(
        test_X=test_X, test_y=np.array(test_y), nb_imposters=30
    )
    gt_scores = load_ground_truth(
        filepath=os.path.join(PREFIX, corpus, "test", "truth.txt"), labels=test_labels
    )
    unanswered = (
        (shifter.optimal_p1 < test_scores) & (test_scores < shifter.optimal_p2)
    ).sum()
    high_conf = ((0.055 > test_scores) | (test_scores > 0.945)).sum()
    test_scores = np.array(shifter.transform(test_scores))
    badness = np.abs(test_scores - gt_scores).sum()
    fps = ((test_scores - gt_scores) > 0.5).sum()
    acc_score, auc_score, c_at_1_score = pan_metrics(
        prediction_scores=test_scores, ground_truth_scores=gt_scores
    )

    logger.info(f"Unanswered: {unanswered}")
    logger.info(f"High Confidence: {high_conf}")
    logger.info(f"False Positives: {fps}")
    logger.info(f"Badness: {badness:.3f}")
    logger.info(f"Accuracy: {acc_score:.3f}")
    logger.info(f"AUC: {auc_score:.3f}")
    logger.info(f"c@1: {c_at_1_score:.3f}")
    logger.info(f"AUC x c@1: {auc_score * c_at_1_score:.3f}")
    return (
        {
            "test_size": len(test_scores),
            "unanswed": unanswered,
            "high_conf": high_conf,
            "badness": badness,
            "p1": shifter.optimal_p1,
            "p2": shifter.optimal_p2,
            "false_positives": fps,
            "accuracy": acc_score,
            "auc": auc_score,
            "c@1": c_at_1_score,
            "final_score": auc_score * c_at_1_score,
        },
        (test_scores, gt_scores),
    )

In [52]:
vectorizer_35 = make_pipeline(
    TfidfVectorizer(
        sublinear_tf=True,
        use_idf=False,
        norm="l2",
        analyzer="char",
        ngram_range=(3, 5),
        max_features=10000,
    ),
    StandardScaler(with_mean=False),
    FunctionTransformer(lambda x: x.todense(), accept_sparse=True),
)

vectorizer_24 = make_pipeline(
    TfidfVectorizer(
        sublinear_tf=True,
        use_idf=False,
        norm="l2",
        analyzer="char",
        ngram_range=(2, 4),
        max_features=10000,
    ),
    StandardScaler(with_mean=False),
    FunctionTransformer(lambda x: x.todense(), accept_sparse=True),
)

vectorizer_25 = make_pipeline(
    TfidfVectorizer(
        sublinear_tf=True,
        use_idf=False,
        norm="l2",
        analyzer="char",
        ngram_range=(2, 5),
        max_features=10000,
    ),
    StandardScaler(with_mean=False),
    FunctionTransformer(lambda x: x.todense(), accept_sparse=True),
)

vecs = [
    ("2,3,4-grams", vectorizer_24),
    ("2,3,4,5-grams", vectorizer_25),
]

In [53]:
o2v_mm = Order2Verifier(
    metric="minmax", base="instance", rank=True, nb_bootstrap_iter=500, rnd_prop=0.5
)
o2v_cs = Order2Verifier(
    metric="cosine", base="instance", rank=True, nb_bootstrap_iter=500, rnd_prop=0.5
)
bdi_mm = BDIVerifier(metric="minmax", nb_bootstrap_iter=500, rnd_prop=0.33)
bdi_cs = BDIVerifier(metric="cosine", nb_bootstrap_iter=500, rnd_prop=0.33)

classifs = [
    ("Kestemont GI, Minmax", o2v_mm),
    ("Kestemont GI, Cosine", o2v_cs),
    ("BDI, Minmax", bdi_mm),
    ("BDI, Cosine", bdi_cs),
]

In [None]:
results = []
global_results = []
for classif_name, classifier in classifs:
    for vec_name, vectorizer in vecs:
        globals_m = ([], [])
        globals_f = ([], [])
        for corpus in CORPORA:
            result_dict, tgt_m = evaluate(
                corpus, vectorizer, classifier, ScoreShifter().manual_fit(0.11, 0.89)
            )
            result_dict["classifier"] = f"{classif_name} + {vec_name} + manual"
            result_dict["corpus"] = corpus
            results.append(result_dict)
            globals_m[0].extend(tgt_m[0])
            globals_m[1].extend(tgt_m[1])
            fitted, tgt_f = evaluate(
                corpus,
                vectorizer,
                classifier,
                fit_shifter(corpus, vectorizer, classifier, min_spread=0.0),
            )
            globals_f[0].extend(tgt_f[0])
            globals_f[1].extend(tgt_f[1])
            fitted["classifier"] = f"{classif_name} + {vec_name} + fitted"
            fitted["corpus"] = corpus
            results.append(fitted)
        fitted_pan = pan_metrics(*globals_f)
        manual_pan = pan_metrics(*globals_m)
        global_results.append(
            {
                "classif": classif_name,
                "vec": vec_name,
                "shifter": "manual",
                "final": manual_pan[0],
                "auc": manual_pan[1],
                "c@1": manual_pan[2],
            }
        )
        global_results.append(
            {
                "classif": classif_name,
                "vec": vec_name,
                "shifter": "fitted",
                "final": fitted_pan[0],
                "auc": fitted_pan[1],
                "c@1": fitted_pan[2],
            }
        )

In [55]:
global_df = pd.DataFrame(global_results)
global_df = global_df.rename(columns={"final": "acc"})
global_df["final"] = global_df["auc"] * global_df["c@1"]
global_df

Unnamed: 0,classif,vec,shifter,acc,auc,c@1,final
0,"Kestemont GI, Minmax","2,3,4-grams",manual,0.628686,0.707473,0.572086,0.404735
1,"Kestemont GI, Minmax","2,3,4-grams",fitted,0.66756,0.759295,0.694336,0.527206
2,"Kestemont GI, Minmax","2,3,4,5-grams",manual,0.632708,0.705636,0.568043,0.400832
3,"Kestemont GI, Minmax","2,3,4,5-grams",fitted,0.660858,0.77251,0.702476,0.542669
4,"Kestemont GI, Cosine","2,3,4-grams",manual,0.613941,0.684785,0.536524,0.367404
5,"Kestemont GI, Cosine","2,3,4-grams",fitted,0.664879,0.755022,0.678076,0.511962
6,"Kestemont GI, Cosine","2,3,4,5-grams",manual,0.620643,0.698266,0.556847,0.388827
7,"Kestemont GI, Cosine","2,3,4,5-grams",fitted,0.672922,0.76787,0.694848,0.533553
8,"BDI, Minmax","2,3,4-grams",manual,0.684987,0.72349,0.652281,0.471919
9,"BDI, Minmax","2,3,4-grams",fitted,0.682306,0.730092,0.694406,0.50698


In [57]:
print(
    global_df.groupby(["classif", "vec", "shifter"])["acc", "auc", "c@1", "final"]
    .agg("mean")
    .to_latex(float_format="%.3f")
)

\begin{tabular}{lllrrrr}
\toprule
                     &             &        &   acc &   auc &   c@1 &  final \\
classif & vec & shifter &       &       &       &        \\
\midrule
BDI, Cosine & 2,3,4,5-grams & fitted & 0.681 & 0.727 & 0.694 &  0.505 \\
                     &             & manual & 0.672 & 0.715 & 0.649 &  0.464 \\
                     & 2,3,4-grams & fitted & 0.686 & 0.723 & 0.689 &  0.499 \\
                     &             & manual & 0.681 & 0.715 & 0.645 &  0.461 \\
BDI, Minmax & 2,3,4,5-grams & fitted & 0.689 & 0.731 & 0.695 &  0.508 \\
                     &             & manual & 0.682 & 0.726 & 0.660 &  0.479 \\
                     & 2,3,4-grams & fitted & 0.682 & 0.730 & 0.694 &  0.507 \\
                     &             & manual & 0.685 & 0.723 & 0.652 &  0.472 \\
Kestemont GI, Cosine & 2,3,4,5-grams & fitted & 0.673 & 0.768 & 0.695 &  0.534 \\
                     &             & manual & 0.621 & 0.698 & 0.557 &  0.389 \\
                     & 2,3,4-

  global_df.groupby(["classif", "vec", "shifter"])["acc", "auc", "c@1", "final"]
  global_df.groupby(["classif", "vec", "shifter"])["acc", "auc", "c@1", "final"]


In [71]:
# df = pd.DataFrame(results)
# df[["classif", "vec", "shifter"]] = df["classifier"].str.split("+", expand=True)
# df = df.drop("classifier", axis=1)

In [212]:
df = pd.read_csv("bdi_evaluation_newshifter.csv", index_col=0)
df

Unnamed: 0,test_size,unanswed,high_conf,badness,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
0,96,29,50,25.961086,0.11000,0.89000,2,0.739583,0.938802,0.881619,0.827666,du_essays,"Kestemont GI, Minmax","2,3,4-grams",manual
1,96,10,51,15.402530,0.36590,0.65198,4,0.875000,0.962240,0.943142,0.907529,du_essays,"Kestemont GI, Minmax","2,3,4-grams",fitted
2,50,28,9,21.839900,0.11000,0.89000,0,0.660000,0.727200,0.499200,0.363018,du_reviews,"Kestemont GI, Minmax","2,3,4-grams",manual
3,50,3,9,20.437122,0.08876,0.10664,11,0.680000,0.742400,0.678400,0.503644,du_reviews,"Kestemont GI, Minmax","2,3,4-grams",fitted
4,200,148,19,94.892606,0.11000,0.89000,2,0.570000,0.587050,0.295800,0.173649,en_essays,"Kestemont GI, Minmax","2,3,4-grams",manual
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,200,3,161,82.323204,0.05300,0.06194,9,0.615000,0.616750,0.614075,0.378731,en_novels,"BDI, Cosine","2,3,4,5-grams",fitted
92,100,37,58,31.316047,0.11000,0.89000,3,0.790000,0.850400,0.671300,0.570874,gr_articles,"BDI, Cosine","2,3,4,5-grams",manual
93,100,1,57,24.461978,0.07982,0.16922,10,0.790000,0.877000,0.787800,0.690901,gr_articles,"BDI, Cosine","2,3,4,5-grams",fitted
94,100,40,52,28.710825,0.11000,0.89000,1,0.750000,0.867000,0.770000,0.667590,sp_articles,"BDI, Cosine","2,3,4,5-grams",manual


In [74]:
from IPython.display import display

In [214]:
for name, group in df.drop(["badness"], axis=1).groupby(["classif", "vec", "shifter"]):
    print(
        f"{''.join(name)}: Accuracy {group.accuracy.mean()*100:.2f} "
        f"C@1: {group['c@1'].mean()*100:.2f}  "
        f"Final: {group['final_score'].mean()*100:.2f}  "
        f"FP%: {group['false_positives'].sum()/group['test_size'].sum()*100:.3f} "
        f"Unanswered: {group['unanswed'].sum():>3} "
        f"({group['unanswed'].sum()/group['test_size'].sum()*100:.3f} %)"
    )
    display(group)

    # this_df = group[["corpus"] + [col for col in group.columns if col != "corpus"]]
    # print(
    #     this_df.drop(["badness", "p1", "p2", "classif", "vec", "shifter"], axis=1).to_latex(
    #         index=False, float_format="%.3f"
    #     )
    # )

BDI, Cosine  2,3,4,5-grams  fitted: Accuracy 69.60 C@1: 71.57  Final: 57.53  FP%: 6.032 Unanswered:  28 (3.753 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
85,96,5,73,0.34802,0.5894,5,0.90625,0.962674,0.920573,0.886211,du_essays,"BDI, Cosine","2,3,4,5-grams",fitted
87,50,0,37,0.07088,0.0977,6,0.54,0.6976,0.54,0.376704,du_reviews,"BDI, Cosine","2,3,4,5-grams",fitted
89,200,5,167,0.06194,0.0977,6,0.595,0.5748,0.599625,0.344664,en_essays,"BDI, Cosine","2,3,4,5-grams",fitted
91,200,3,161,0.053,0.06194,9,0.615,0.61675,0.614075,0.378731,en_novels,"BDI, Cosine","2,3,4,5-grams",fitted
93,100,1,57,0.07982,0.16922,10,0.79,0.877,0.7878,0.690901,gr_articles,"BDI, Cosine","2,3,4,5-grams",fitted
95,100,14,52,0.07088,0.33908,9,0.73,0.9308,0.8322,0.774612,sp_articles,"BDI, Cosine","2,3,4,5-grams",fitted


BDI, Cosine  2,3,4,5-grams  manual: Accuracy 68.82 C@1: 67.03  Final: 52.89  FP%: 0.938 Unanswered: 157 (21.046 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
84,96,19,73,0.11,0.89,2,0.854167,0.955078,0.923394,0.881914,du_essays,"BDI, Cosine","2,3,4,5-grams",manual
86,50,10,38,0.11,0.89,0,0.56,0.6656,0.552,0.367411,du_reviews,"BDI, Cosine","2,3,4,5-grams",manual
88,200,25,163,0.11,0.89,0,0.585,0.6153,0.55125,0.339184,en_essays,"BDI, Cosine","2,3,4,5-grams",manual
90,200,26,162,0.11,0.89,1,0.59,0.6257,0.5537,0.34645,en_novels,"BDI, Cosine","2,3,4,5-grams",manual
92,100,37,58,0.11,0.89,3,0.79,0.8504,0.6713,0.570874,gr_articles,"BDI, Cosine","2,3,4,5-grams",manual
94,100,40,52,0.11,0.89,1,0.75,0.867,0.77,0.66759,sp_articles,"BDI, Cosine","2,3,4,5-grams",manual


BDI, Cosine  2,3,4-grams  fitted: Accuracy 70.60 C@1: 71.20  Final: 57.77  FP%: 6.568 Unanswered:  43 (5.764 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
73,96,4,72,0.2765,0.35696,7,0.90625,0.970486,0.922309,0.895088,du_essays,"BDI, Cosine","2,3,4-grams",fitted
75,50,0,38,0.053,0.06194,3,0.64,0.756,0.64,0.48384,du_reviews,"BDI, Cosine","2,3,4-grams",fitted
77,200,1,163,0.07982,0.08876,10,0.58,0.5947,0.577875,0.343662,en_essays,"BDI, Cosine","2,3,4-grams",fitted
79,200,9,161,0.053,0.11558,6,0.6,0.6304,0.6061,0.382085,en_novels,"BDI, Cosine","2,3,4-grams",fitted
81,100,23,53,0.16922,0.69668,4,0.78,0.8744,0.7626,0.666817,gr_articles,"BDI, Cosine","2,3,4-grams",fitted
83,100,6,51,0.053,0.19604,19,0.73,0.91,0.7632,0.694512,sp_articles,"BDI, Cosine","2,3,4-grams",fitted


BDI, Cosine  2,3,4-grams  manual: Accuracy 71.17 C@1: 67.44  Final: 53.38  FP%: 0.670 Unanswered: 179 (23.995 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
72,96,23,72,0.11,0.89,2,0.875,0.950738,0.916775,0.871613,du_essays,"BDI, Cosine","2,3,4-grams",manual
74,50,11,38,0.11,0.89,0,0.66,0.6864,0.5856,0.401956,du_reviews,"BDI, Cosine","2,3,4-grams",manual
76,200,31,160,0.11,0.89,0,0.575,0.60635,0.537075,0.325655,en_essays,"BDI, Cosine","2,3,4-grams",manual
78,200,29,161,0.11,0.89,1,0.6,0.6239,0.555325,0.346467,en_novels,"BDI, Cosine","2,3,4-grams",manual
80,100,45,54,0.11,0.89,1,0.81,0.8712,0.6815,0.593723,gr_articles,"BDI, Cosine","2,3,4-grams",manual
82,100,40,53,0.11,0.89,1,0.75,0.8612,0.77,0.663124,sp_articles,"BDI, Cosine","2,3,4-grams",manual


BDI, Minmax  2,3,4,5-grams  fitted: Accuracy 69.73 C@1: 70.88  Final: 56.85  FP%: 5.362 Unanswered:  66 (8.847 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
61,96,18,74,0.07982,0.7682,2,0.84375,0.959201,0.927734,0.889884,du_essays,"BDI, Minmax","2,3,4,5-grams",fitted
63,50,3,35,0.053,0.1424,5,0.56,0.6936,0.5724,0.397017,du_reviews,"BDI, Minmax","2,3,4,5-grams",fitted
65,200,14,162,0.08876,0.20498,5,0.615,0.6137,0.59385,0.364446,en_essays,"BDI, Minmax","2,3,4,5-grams",fitted
67,200,0,160,0.053,0.06194,11,0.605,0.62605,0.605,0.37876,en_novels,"BDI, Minmax","2,3,4,5-grams",fitted
69,100,25,62,0.08876,0.66986,4,0.79,0.8396,0.7375,0.619205,gr_articles,"BDI, Minmax","2,3,4,5-grams",fitted
71,100,6,54,0.07982,0.20498,13,0.77,0.9334,0.8162,0.761841,sp_articles,"BDI, Minmax","2,3,4,5-grams",fitted


BDI, Minmax  2,3,4,5-grams  manual: Accuracy 70.24 C@1: 68.82  Final: 54.78  FP%: 0.670 Unanswered: 159 (21.314 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
60,96,19,75,0.11,0.89,2,0.854167,0.955295,0.923394,0.882114,du_essays,"BDI, Minmax","2,3,4,5-grams",manual
62,50,10,35,0.11,0.89,0,0.58,0.6928,0.576,0.399053,du_reviews,"BDI, Minmax","2,3,4,5-grams",manual
64,200,27,162,0.11,0.89,0,0.605,0.62425,0.5675,0.354262,en_essays,"BDI, Minmax","2,3,4,5-grams",manual
66,200,34,161,0.11,0.89,0,0.595,0.6219,0.55575,0.345621,en_novels,"BDI, Minmax","2,3,4,5-grams",manual
68,100,34,61,0.11,0.89,2,0.8,0.8482,0.6968,0.591026,gr_articles,"BDI, Minmax","2,3,4,5-grams",manual
70,100,35,53,0.11,0.89,1,0.78,0.8822,0.81,0.714582,sp_articles,"BDI, Minmax","2,3,4,5-grams",manual


BDI, Minmax  2,3,4-grams  fitted: Accuracy 71.16 C@1: 72.32  Final: 59.07  FP%: 5.630 Unanswered:  39 (5.228 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
49,96,13,72,0.10664,0.61622,4,0.864583,0.964627,0.934353,0.901302,du_essays,"BDI, Minmax","2,3,4-grams",fitted
51,50,1,36,0.07088,0.08876,4,0.64,0.7464,0.6324,0.472023,du_reviews,"BDI, Minmax","2,3,4-grams",fitted
53,200,2,161,0.13346,0.16922,6,0.6,0.60655,0.5959,0.361443,en_essays,"BDI, Minmax","2,3,4-grams",fitted
55,200,1,162,0.06194,0.07088,10,0.585,0.63575,0.587925,0.373773,en_novels,"BDI, Minmax","2,3,4-grams",fitted
57,100,18,55,0.1871,0.57152,4,0.78,0.8656,0.767,0.663915,gr_articles,"BDI, Minmax","2,3,4-grams",fitted
59,100,4,49,0.11558,0.30332,14,0.8,0.939,0.8216,0.771482,sp_articles,"BDI, Minmax","2,3,4-grams",fitted


BDI, Minmax  2,3,4-grams  manual: Accuracy 71.41 C@1: 69.25  Final: 55.58  FP%: 0.804 Unanswered: 168 (22.520 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
48,96,21,72,0.11,0.89,2,0.864583,0.954427,0.926758,0.884523,du_essays,"BDI, Minmax","2,3,4-grams",manual
50,50,9,36,0.11,0.89,0,0.64,0.7264,0.6136,0.445719,du_reviews,"BDI, Minmax","2,3,4-grams",manual
52,200,32,163,0.11,0.89,0,0.605,0.6383,0.5568,0.355405,en_essays,"BDI, Minmax","2,3,4-grams",manual
54,200,29,163,0.11,0.89,2,0.595,0.62455,0.5496,0.343253,en_novels,"BDI, Minmax","2,3,4-grams",manual
56,100,40,55,0.11,0.89,1,0.79,0.845,0.7,0.5915,gr_articles,"BDI, Minmax","2,3,4-grams",manual
58,100,37,52,0.11,0.89,1,0.79,0.884,0.8083,0.714537,sp_articles,"BDI, Minmax","2,3,4-grams",manual


Kestemont GI, Cosine  2,3,4,5-grams  fitted: Accuracy 70.26 C@1: 72.06  Final: 57.91  FP%: 17.292 Unanswered:  91 (12.198 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
37,96,5,47,0.4553,0.59834,5,0.895833,0.968316,0.931532,0.902017,du_essays,"Kestemont GI, Cosine","2,3,4,5-grams",fitted
39,50,7,12,0.08876,0.12452,8,0.66,0.7216,0.6384,0.460669,du_reviews,"Kestemont GI, Cosine","2,3,4,5-grams",fitted
41,200,5,16,0.08876,0.0977,76,0.57,0.5368,0.568875,0.305372,en_essays,"Kestemont GI, Cosine","2,3,4,5-grams",fitted
43,200,52,80,0.06194,0.15134,20,0.59,0.66715,0.5985,0.399289,en_novels,"Kestemont GI, Cosine","2,3,4,5-grams",fitted
45,100,14,19,0.42848,0.70562,5,0.77,0.8554,0.798,0.682609,gr_articles,"Kestemont GI, Cosine","2,3,4,5-grams",fitted
47,100,8,26,0.37484,0.60728,15,0.73,0.9192,0.7884,0.724697,sp_articles,"Kestemont GI, Cosine","2,3,4,5-grams",fitted


Kestemont GI, Cosine  2,3,4,5-grams  manual: Accuracy 62.58 C@1: 56.72  Final: 44.04  FP%: 1.340 Unanswered: 385 (51.609 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
36,96,30,48,0.11,0.89,2,0.75,0.938368,0.875,0.821072,du_essays,"Kestemont GI, Cosine","2,3,4,5-grams",manual
38,50,20,12,0.11,0.89,0,0.58,0.7048,0.504,0.355219,du_reviews,"Kestemont GI, Cosine","2,3,4,5-grams",manual
40,200,148,16,0.11,0.89,2,0.575,0.5898,0.3045,0.179594,en_essays,"Kestemont GI, Cosine","2,3,4,5-grams",manual
42,200,64,81,0.11,0.89,2,0.59,0.6448,0.5412,0.348966,en_novels,"Kestemont GI, Cosine","2,3,4,5-grams",manual
44,100,63,18,0.11,0.89,3,0.68,0.756,0.5542,0.418975,gr_articles,"Kestemont GI, Cosine","2,3,4,5-grams",manual
46,100,60,26,0.11,0.89,1,0.58,0.8314,0.624,0.518794,sp_articles,"Kestemont GI, Cosine","2,3,4,5-grams",manual


Kestemont GI, Cosine  2,3,4-grams  fitted: Accuracy 69.52 C@1: 70.28  Final: 56.35  FP%: 15.416 Unanswered: 130 (17.426 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
25,96,2,43,0.44636,0.51788,6,0.90625,0.966363,0.92513,0.894011,du_essays,"Kestemont GI, Cosine","2,3,4-grams",fitted
27,50,0,9,0.19604,0.20498,3,0.64,0.7664,0.64,0.490496,du_reviews,"Kestemont GI, Cosine","2,3,4-grams",fitted
29,200,23,17,0.07982,0.11558,69,0.56,0.5113,0.5575,0.28505,en_essays,"Kestemont GI, Cosine","2,3,4-grams",fitted
31,200,70,79,0.06194,0.20498,13,0.585,0.6688,0.58725,0.392753,en_novels,"Kestemont GI, Cosine","2,3,4-grams",fitted
33,100,27,22,0.47318,0.83078,5,0.79,0.844,0.762,0.643128,gr_articles,"Kestemont GI, Cosine","2,3,4-grams",fitted
35,100,8,27,0.37484,0.55364,19,0.69,0.9064,0.7452,0.675449,sp_articles,"Kestemont GI, Cosine","2,3,4-grams",fitted


Kestemont GI, Cosine  2,3,4-grams  manual: Accuracy 62.82 C@1: 55.36  Final: 42.20  FP%: 1.609 Unanswered: 397 (53.217 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
24,96,32,44,0.11,0.89,2,0.729167,0.929036,0.861111,0.800004,du_essays,"Kestemont GI, Cosine","2,3,4-grams",manual
26,50,21,9,0.11,0.89,0,0.62,0.7024,0.5112,0.359067,du_reviews,"Kestemont GI, Cosine","2,3,4-grams",manual
28,200,148,16,0.11,0.89,1,0.565,0.58215,0.2958,0.1722,en_essays,"Kestemont GI, Cosine","2,3,4-grams",manual
30,200,71,80,0.11,0.89,2,0.615,0.649,0.548775,0.356155,en_novels,"Kestemont GI, Cosine","2,3,4-grams",manual
32,100,60,21,0.11,0.89,5,0.68,0.743,0.56,0.41608,gr_articles,"Kestemont GI, Cosine","2,3,4-grams",manual
34,100,65,27,0.11,0.89,2,0.56,0.7872,0.5445,0.42863,sp_articles,"Kestemont GI, Cosine","2,3,4-grams",manual


Kestemont GI, Minmax  2,3,4,5-grams  fitted: Accuracy 70.15 C@1: 73.62  Final: 59.23  FP%: 13.807 Unanswered: 127 (17.024 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
13,96,16,54,0.28544,0.80396,2,0.854167,0.96441,0.947917,0.91418,du_essays,"Kestemont GI, Minmax","2,3,4,5-grams",fitted
15,50,12,14,0.07982,0.13346,7,0.64,0.696,0.6448,0.448781,du_reviews,"Kestemont GI, Minmax","2,3,4,5-grams",fitted
17,200,14,22,0.07088,0.08876,73,0.565,0.56865,0.5778,0.328566,en_essays,"Kestemont GI, Minmax","2,3,4,5-grams",fitted
19,200,42,79,0.08876,0.21392,12,0.61,0.6697,0.6292,0.421375,en_novels,"Kestemont GI, Minmax","2,3,4,5-grams",fitted
21,100,28,18,0.47318,0.83972,3,0.79,0.8396,0.7552,0.634066,gr_articles,"Kestemont GI, Minmax","2,3,4,5-grams",fitted
23,100,15,27,0.42848,0.65198,6,0.75,0.9352,0.8625,0.80661,sp_articles,"Kestemont GI, Minmax","2,3,4,5-grams",fitted


Kestemont GI, Minmax  2,3,4,5-grams  manual: Accuracy 64.83 C@1: 58.89  Final: 46.12  FP%: 1.340 Unanswered: 379 (50.804 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
12,96,29,54,0.11,0.89,2,0.75,0.943793,0.881619,0.832066,du_essays,"Kestemont GI, Minmax","2,3,4,5-grams",manual
14,50,24,14,0.11,0.89,0,0.64,0.7088,0.5328,0.377649,du_reviews,"Kestemont GI, Minmax","2,3,4,5-grams",manual
16,200,142,22,0.11,0.89,2,0.595,0.60795,0.342,0.207919,en_essays,"Kestemont GI, Minmax","2,3,4,5-grams",manual
18,200,68,80,0.11,0.89,2,0.585,0.6405,0.5293,0.339017,en_novels,"Kestemont GI, Minmax","2,3,4,5-grams",manual
20,100,59,19,0.11,0.89,3,0.71,0.7712,0.5883,0.453697,gr_articles,"Kestemont GI, Minmax","2,3,4,5-grams",manual
22,100,57,28,0.11,0.89,1,0.61,0.8448,0.6594,0.557061,sp_articles,"Kestemont GI, Minmax","2,3,4,5-grams",manual


Kestemont GI, Minmax  2,3,4-grams  fitted: Accuracy 69.42 C@1: 72.08  Final: 57.87  FP%: 16.622 Unanswered: 123 (16.488 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
1,96,10,51,0.3659,0.65198,4,0.875,0.96224,0.943142,0.907529,du_essays,"Kestemont GI, Minmax","2,3,4-grams",fitted
3,50,3,9,0.08876,0.10664,11,0.68,0.7424,0.6784,0.503644,du_reviews,"Kestemont GI, Minmax","2,3,4-grams",fitted
5,200,6,18,0.07088,0.07982,78,0.56,0.54385,0.57165,0.310892,en_essays,"Kestemont GI, Minmax","2,3,4-grams",fitted
7,200,60,85,0.07088,0.21392,13,0.58,0.64715,0.5785,0.374376,en_novels,"Kestemont GI, Minmax","2,3,4-grams",fitted
9,100,34,16,0.25862,0.75032,4,0.73,0.8362,0.7504,0.627484,gr_articles,"Kestemont GI, Minmax","2,3,4-grams",fitted
11,100,10,26,0.42848,0.61622,14,0.74,0.9322,0.803,0.748557,sp_articles,"Kestemont GI, Minmax","2,3,4-grams",fitted


Kestemont GI, Minmax  2,3,4-grams  manual: Accuracy 64.49 C@1: 57.87  Final: 45.42  FP%: 1.475 Unanswered: 386 (51.743 %)


Unnamed: 0,test_size,unanswed,high_conf,p1,p2,false_positives,accuracy,auc,c@1,final_score,corpus,classif,vec,shifter
0,96,29,50,0.11,0.89,2,0.739583,0.938802,0.881619,0.827666,du_essays,"Kestemont GI, Minmax","2,3,4-grams",manual
2,50,28,9,0.11,0.89,0,0.66,0.7272,0.4992,0.363018,du_reviews,"Kestemont GI, Minmax","2,3,4-grams",manual
4,200,148,19,0.11,0.89,2,0.57,0.58705,0.2958,0.173649,en_essays,"Kestemont GI, Minmax","2,3,4-grams",manual
6,200,66,84,0.11,0.89,2,0.61,0.64335,0.55195,0.355097,en_novels,"Kestemont GI, Minmax","2,3,4-grams",manual
8,100,59,16,0.11,0.89,3,0.69,0.7696,0.5883,0.452756,gr_articles,"Kestemont GI, Minmax","2,3,4-grams",manual
10,100,56,24,0.11,0.89,2,0.6,0.8444,0.6552,0.553251,sp_articles,"Kestemont GI, Minmax","2,3,4-grams",manual


In [66]:
def evaluate_one(
    vectorizer: Pipeline,
    classifier: Union[BDIVerifier, Order2Verifier],
    shifter_strategy: str = "manual",
    min_spread: float = 0.0,
):
    # Evaluate one classifier/vectorizer/shifter on all the corpora
    results = []
    globals = ([], [])
    for corpus in CORPORA:
        if shifter_strategy == "manual":
            result_dict, tgt = evaluate(
                corpus, vectorizer, classifier, ScoreShifter().manual_fit(0.11, 0.89)
            )
        else:
            result_dict, tgt = evaluate(
                corpus,
                vectorizer,
                classifier,
                fit_shifter(corpus, vectorizer, classifier, min_spread=min_spread),
            )
        result_dict["classifier"] = f"{classif_name} + {vec_name} + {shifter_strategy}"
        result_dict["corpus"] = corpus
        results.append(result_dict)
        globals[0].extend(tgt[0])
        globals[1].extend(tgt[1])
    return (results, globals)

In [67]:
r, g = evaluate_one(vectorizer_25, bdi_mm)

01/27/2024 12:36:16 [ruzicka:INFO] Loading evaluation corpus
01/27/2024 12:36:17 [ruzicka:INFO] Fitting on 191 documents...
01/27/2024 12:36:17 [ruzicka:INFO] Predicting on 96 documents
01/27/2024 12:36:38 [ruzicka:INFO] Unanswered: 19
01/27/2024 12:36:38 [ruzicka:INFO] High Confidence: 74
01/27/2024 12:36:38 [ruzicka:INFO] False Positives: 2
01/27/2024 12:36:38 [ruzicka:INFO] Badness: 13.888
01/27/2024 12:36:38 [ruzicka:INFO] Accuracy: 0.854
01/27/2024 12:36:38 [ruzicka:INFO] AUC: 0.956
01/27/2024 12:36:38 [ruzicka:INFO] c@1: 0.923
01/27/2024 12:36:38 [ruzicka:INFO] AUC x c@1: 0.883
01/27/2024 12:36:38 [ruzicka:INFO] Loading evaluation corpus
01/27/2024 12:36:38 [ruzicka:INFO] Fitting on 50 documents...
01/27/2024 12:36:38 [ruzicka:INFO] Predicting on 50 documents
01/27/2024 12:36:49 [ruzicka:INFO] Unanswered: 10
01/27/2024 12:36:49 [ruzicka:INFO] High Confidence: 34
01/27/2024 12:36:49 [ruzicka:INFO] False Positives: 0
01/27/2024 12:36:49 [ruzicka:INFO] Badness: 21.345
01/27/2024 12:

In [68]:
pd.DataFrame(r)

Unnamed: 0,test_size,unanswed,high_conf,badness,p1,p2,false_positives,accuracy,auc,c@1,final_score,classifier,corpus
0,96,19,74,13.888282,0.11,0.89,2,0.854167,0.956163,0.923394,0.882915,"BDI, Cosine + 2,3,4,5-grams + manual",du_essays
1,50,10,34,21.344779,0.11,0.89,0,0.58,0.6968,0.576,0.401357,"BDI, Cosine + 2,3,4,5-grams + manual",du_reviews
2,200,29,163,85.001032,0.11,0.89,0,0.615,0.63135,0.5725,0.361448,"BDI, Cosine + 2,3,4,5-grams + manual",en_essays
3,200,34,162,88.390146,0.11,0.89,0,0.595,0.6331,0.55575,0.351845,"BDI, Cosine + 2,3,4,5-grams + manual",en_novels
4,100,33,62,32.638649,0.11,0.89,3,0.8,0.8264,0.6916,0.571538,"BDI, Cosine + 2,3,4,5-grams + manual",gr_articles
5,100,35,57,27.471932,0.11,0.89,1,0.78,0.8832,0.81,0.715392,"BDI, Cosine + 2,3,4,5-grams + manual",sp_articles


In [69]:
pan_metrics(*g)

(0.6849865951742627, 0.7245326279927262, 0.659334143133351)

In [70]:
len(g[0])

746

In [107]:
gdf = pd.DataFrame(g).T
gdf.columns = ["proba", "gt"]
gdf["classif"] = "BDI"
gdf

Unnamed: 0,proba,gt,classif
0,0.999995,1.0,BDI
1,0.999995,1.0,BDI
2,0.999995,1.0,BDI
3,0.999995,1.0,BDI
4,0.000000,0.0,BDI
...,...,...,...
741,0.972778,1.0,BDI
742,0.009073,0.0,BDI
743,0.800397,1.0,BDI
744,0.009073,0.0,BDI


In [74]:
def tfu(r):
    if r["proba"] == 0.5:
        return "U"
    elif (r["proba"] < 0.5) and r["gt"] == 0:
        return "T"
    elif (r["proba"] > 0.5) and r["gt"] == 1:
        return "T"
    else:
        return "F"

In [108]:
gdf["tfu"] = gdf.apply(tfu, axis=1)
gdf

Unnamed: 0,proba,gt,classif,tfu
0,0.999995,1.0,BDI,T
1,0.999995,1.0,BDI,T
2,0.999995,1.0,BDI,T
3,0.999995,1.0,BDI,T
4,0.000000,0.0,BDI,T
...,...,...,...,...
741,0.972778,1.0,BDI,T
742,0.009073,0.0,BDI,T
743,0.800397,1.0,BDI,T
744,0.009073,0.0,BDI,T


In [118]:
kr, kg = evaluate_one(vectorizer_25, o2v_mm, shifter_strategy="fitted")

01/27/2024 01:25:43 [ruzicka:INFO] Fitting shifter
01/27/2024 01:25:44 [ruzicka:INFO] Fitting on 172 documents in instance mode...
01/27/2024 01:25:44 [ruzicka:INFO] Predicting on 96 documents
01/27/2024 01:26:09 [ruzicka:INFO] p1 for optimal combo: 0.268
01/27/2024 01:26:09 [ruzicka:INFO] p2 for optimal combo: 0.795
01/27/2024 01:26:09 [ruzicka:INFO] AUC for optimal combo: 94.99%
01/27/2024 01:26:09 [ruzicka:INFO] c@1 for optimal combo: 95.35%
01/27/2024 01:26:09 [ruzicka:INFO] Loading evaluation corpus
01/27/2024 01:26:10 [ruzicka:INFO] Fitting on 191 documents in instance mode...
01/27/2024 01:26:10 [ruzicka:INFO] Predicting on 96 documents
01/27/2024 01:26:32 [ruzicka:INFO] Unanswered: 16
01/27/2024 01:26:32 [ruzicka:INFO] High Confidence: 54
01/27/2024 01:26:32 [ruzicka:INFO] False Positives: 2
01/27/2024 01:26:32 [ruzicka:INFO] Badness: 17.750
01/27/2024 01:26:32 [ruzicka:INFO] Accuracy: 0.854
01/27/2024 01:26:32 [ruzicka:INFO] AUC: 0.965
01/27/2024 01:26:32 [ruzicka:INFO] c@1: 0

In [119]:
kdf = pd.DataFrame(kg).T
kdf.columns = ["proba", "gt"]
kdf["classif"] = "O2V"
kdf["tfu"] = kdf.apply(tfu, axis=1)
kdf

Unnamed: 0,proba,gt,classif,tfu
0,0.999998,1.0,O2V,T
1,0.999998,1.0,O2V,T
2,0.997563,1.0,O2V,T
3,0.999998,1.0,O2V,T
4,0.101527,0.0,O2V,T
...,...,...,...,...
741,0.974287,1.0,O2V,T
742,0.277101,0.0,O2V,T
743,0.886715,1.0,O2V,T
744,0.255455,0.0,O2V,T


In [120]:
rdf = pd.concat([gdf, kdf]).reset_index(drop=True)

In [160]:
acc, auc, c1 = pan_metrics(*g)
print(f"Accuracy: {acc:.2%} AUC {auc:.2f} PAN Score (AUC * C@1): {auc*c1:.2%}")

Accuracy: 68.50% AUC 0.72 PAN Score (AUC * C@1): 47.77%


In [197]:
acc, auc, c1 = pan_metrics(*kg)
print(f"Accuracy: {acc:.2%} AUC {auc:.2f} PAN Score (AUC * C@1): {auc*c1:.2%}")

Accuracy: 67.02% AUC 0.76 PAN Score (AUC * C@1): 52.89%


In [202]:
%%R -i rdf -h 4.5 -w 8 -u in -r 300
library(ggridges)
library(paletteer)
library(ggplot2)
library(dplyr)
library(showtext)
font_add_google("Roboto Condensed", "fnt")
showtext_auto()

plt <- "ggsci::category10_d3"
fgcol <- '#1e1a1d'
bgcol <- 'white'
plt <- c('green', 'red', 'lightblue')
ggplot(data=rdf,aes(proba,y=classif,fill=tfu,color=tfu, shape=tfu)) +
coord_cartesian(clip = "off") +
geom_jitter(data=filter(rdf,tfu=='T'),size=3.2, stroke=1.5, alpha=0.8, width=0.0, height=0.25, color='#79f059',shape=16) +
geom_jitter(data=filter(rdf,tfu=='F'),size=2.8, stroke=1.0, alpha=0.9, width=0.0, height=0.25,color='red',shape=4) +
annotate('text', hjust=0,x=-0.01,y=2.37, label=c("Accuracy: 67.02% AUC 0.76 AUC × c@1: 52.89%"), color=fgcol,size=3) +
annotate('text',hjust=0,x=-0.01,y=1.37, label=c("Accuracy: 68.50% AUC 0.72 AUC × c@1: 47.77%"), color=fgcol, size=3) +

geom_vline(xintercept=0.5, colour=fgcol, linetype='dashed', size=0.3) +
scale_fill_manual(values=c('red','#63a152','lightblue'))+
scale_color_manual(values=c('red','#63a152','lightblue'))+
scale_shape_manual(values=c(4,16,1)) +
theme_bw() +
xlab("") +
ylab("") +
ggtitle("Correct / Incorrect results by reported probability (global micro-average)") +
theme(
    panel.border = element_blank(),
    legend.position='none',
    panel.background = element_rect(fill = bgcol,color=bgcol),
    plot.background = element_rect(fill = bgcol,color=bgcol),
    plot.title = element_text(hjust = 0.5, size=15, family="fnt", color=fgcol),
    axis.line.x=element_line(linewidth=0.3,color=fgcol),
    axis.line.y=element_line(linewidth=0.3,color=fgcol),
    axis.ticks.x=element_line(linewidth=0.2,color=fgcol),
    axis.ticks.y=element_line(linewidth=0.2,color=fgcol),
    axis.text.x=element_text(size=8, family="fnt", color=fgcol),
    axis.text.y=element_text(size=11, family="fnt", color=fgcol),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
)

# fn <- "~/papers/bdi-paper/paper/figures/bdi_o2v.pdf"
# ggsave(fn, dpi=600, width=8, height=4.5)
