In [1]:
import sys
sys.path.insert(0, "../src")
import pickle as pkl
from pathlib import Path
from collections import Counter
from functools import partial

import numpy as np
import pandas as pd
from joblib import Parallel, delayed

from scipy.special import kl_div

import constants
from gen.util import read_data, write_jsonl
from rte.aggregate import agg_predict, agg_predict_proba

# Init

In [2]:
sf_actual = pd.DataFrame(read_data(Path("/users/k21190024/study/fact-check-transfer-learning/scratch/dumps/feverised-scifact/scifact_all.jsonl")))
sf_actual["claim_id"] = "scifact|" + sf_actual["id"].astype(str)
sf_actual = sf_actual.rename(columns={"label": "predicted_label"})

sf_doc = Path("/users/k21190024/study/fact-check-transfer-learning/scratch/thesis/predictions/doc/scifact")
sf_pipe_doc = Path("/users/k21190024/study/fact-check-transfer-learning/scratch/thesis/predictions/doc/scifactpipeline")

sf_sent = Path("/users/k21190024/study/fact-check-transfer-learning/scratch/thesis/predictions/sent/scifact")
sf_pipe_sent = Path("/users/k21190024/study/fact-check-transfer-learning/scratch/thesis/predictions/sent/scifactpipeline")

In [3]:
def extract_model_dataset(fname):
    tok = fname.split("-")
    dataset = "-".join([tok[0], tok[1] if "climatefever" in tok[1] else ""]).strip("-")
    
    model = None
    if "xlnet" in fname:
        model = "xlnet"
    elif "bert" in fname:
        model = "bert"
    else:
        model = "da"
    
    return model, dataset

## Archive

For all correctly labelled instances, measure the model confidence

```doc_ls.pivot_table(index=["dataset", "model"], columns=["predicted_label"], values="label_proba", aggfunc=["count", "min", "mean", "std"]).reset_index().to_csv("tmp.csv")```

# Document

In [4]:
def doc_proba(fn, sf, correct_only=False):
    pred = pd.DataFrame(read_data(fn))
    pred["model"], pred["dataset"] = extract_model_dataset(fn.stem)
    pred["label_proba"] = pred["predicted_proba"].apply(lambda x: max(x))
    if "predicted" in pred:
        pred = pred.rename(columns={"predicted": "predicted_label"})
    if correct_only:
        pred = pred.merge(sf[["claim_id", "predicted_label"]], on=["claim_id", "predicted_label"], how="inner")
    pred["predicted_label"] = pred["predicted_label"].map(constants.LABEL2ID)
    
    return pred

In [5]:
doc_ls = pd.concat(Parallel()(delayed(doc_proba)(p, sf_actual) for p in sf_doc.glob("*.all*")))
doc_pipe_ls = pd.concat(Parallel()(delayed(doc_proba)(p, sf_actual) for p in sf_pipe_doc.glob("*.all*")))

In [6]:
doc_c_ls = pd.concat(Parallel()(delayed(doc_proba)(p, sf_actual, True) for p in sf_doc.glob("*.all*")))
doc_c_pipe_ls = pd.concat(Parallel()(delayed(doc_proba)(p, sf_actual, True) for p in sf_pipe_doc.glob("*.all*")))

# Sentence

## Majority

In [7]:
def majority_proba(fn, sf, correct_only=False):
    pred = pd.DataFrame(read_data(fn))
    pred["predicted_label"] = pred["predicted_label"].map(constants.LABEL2ID)
    pred["predicted_proba"] = pred["predicted_proba"].apply(np.array)
    pred = pred.groupby("claim_id", as_index=False, sort=False).agg({"predicted_proba": np.stack, "predicted_label": agg_predict})
    
    pred["_labls"] = pred["predicted_proba"].apply(lambda x: np.argmax(x, axis=1))
    pred["_probls"] = pred["predicted_proba"].apply(lambda x: np.max(x, axis=1))
    pred["_filter"] = pred[["predicted_label", "_labls"]].apply(lambda x: np.array([i == x["predicted_label"] for i in x["_labls"]]), axis=1)
    
    pred["label_proba"] = pred[["_probls", "_filter"]].apply(lambda x: np.mean(x["_probls"][x["_filter"]]), axis=1)
    pred["predicted_label"] = pred["predicted_label"].map(constants.ID2LABEL)
    if correct_only:
        pred = pred.merge(sf[["claim_id", "predicted_label"]], on=["claim_id", "predicted_label"], how="inner")
    pred["predicted_label"] = pred["predicted_label"].map(constants.LABEL2ID)
    
    pred = pred.drop(columns=[c for c in pred.columns if c.startswith("_")])
    pred["model"], pred["dataset"] = extract_model_dataset(fn.stem)
    # impute equal S/R counts as predicted NEI with mean of NEI
    pred.loc[pred["label_proba"].isnull()] = pred.loc[pred["predicted_label"] == constants.LABEL2ID[constants.LOOKUP["label"]["nei"]], "label_proba"].mean()
    
    return pred

In [8]:
maj_ls = pd.concat(Parallel()(delayed(majority_proba)(p, sf_actual) for p in sf_sent.glob("*.all*")))
maj_pipe_ls = pd.concat(Parallel()(delayed(majority_proba)(p, sf_actual) for p in sf_pipe_sent.glob("*.all*")))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [9]:
maj_c_ls = pd.concat(Parallel()(delayed(majority_proba)(p, sf_actual, True) for p in sf_sent.glob("*.all*")))
maj_c_pipe_ls = pd.concat(Parallel()(delayed(majority_proba)(p, sf_actual, True) for p in sf_pipe_sent.glob("*.all*")))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


## Mean proba

In [10]:
def meanproba_proba(fn, sf, correct_only=False):
    pred = pd.DataFrame(read_data(fn))
    ret_proba = partial(agg_predict_proba, return_proba=True)
    pred = pred.groupby("claim_id", as_index=False, sort=False).agg({"predicted_proba": ret_proba})
    pred["label_proba"] = pred["predicted_proba"].apply(np.max)
    pred["predicted_label"] = pred["predicted_proba"].apply(np.argmax).map(constants.ID2LABEL)
    
    pred["model"], pred["dataset"] = extract_model_dataset(fn.stem)
    if correct_only:
        pred = pred.merge(sf[["claim_id", "predicted_label"]], on=["claim_id", "predicted_label"], how="inner")
    pred["predicted_label"] = pred["predicted_label"].map(constants.LABEL2ID)
    
    return pred

In [11]:
mp_ls = pd.concat(Parallel()(delayed(meanproba_proba)(p, sf_actual) for p in sf_sent.glob("*.all*")))
mp_pipe_ls = pd.concat(Parallel()(delayed(meanproba_proba)(p, sf_actual) for p in sf_pipe_sent.glob("*.all*")))

In [12]:
mp_c_ls = pd.concat(Parallel()(delayed(meanproba_proba)(p, sf_actual, True) for p in sf_sent.glob("*.all*")))
mp_c_pipe_ls = pd.concat(Parallel()(delayed(meanproba_proba)(p, sf_actual, True) for p in sf_pipe_sent.glob("*.all*")))