In [23]:
import os
import glob

import wandb
import numpy as np
from tqdm.auto import tqdm
from sklearn.metrics import classification_report

from valerie.datasets import Phase2ValidationDataset, Phase2TrialDataset, Phase2Dataset
from valerie.modeling import SequenceClassificationModel, SequenceClassificationExample


def run_eval(models_to_run, examples):
    for run_dir, checkpoint_dirs in models_to_run.items():
        for checkpoint_dir in checkpoint_dirs:
            print("-" * 100)
            print(os.path.join(run_dir, checkpoint_dir).center(100, "-"))
            print("-" * 100)
            model = SequenceClassificationModel.from_pretrained(
                pretrained_model_name_or_path=run_dir, checkpoint_dir=checkpoint_dir
            )
            predict_dataset = model.create_dataset(examples)
            predict_output = model.predict(predict_dataset, predict_batch_size=8)
            labels = predict_output.label_ids
            predictions = [np.argmax(proba) for proba in predict_output.predictions]
            report = classification_report(labels, predictions)
            print(report)
            print()

In [4]:
def ge_plain(claims):
    examples = []
    for claim in tqdm(claims, desc="generating examples"):
        examples.append(
            SequenceClassificationExample(
                guid=claim.id, text_a=claim.claim, text_b=None, label=claim.label,
            )
        )
    return examples

def ge_claim_claimant(claims):
    examples = []
    for claim in tqdm(claims, desc="generating examples"):
        examples.append(
            SequenceClassificationExample(
                guid=claim.id,
                text_a=claim.claim,
                text_b=claim.claimant if claim.claimant else "no claimant",
                label=claim.label,
            )
        )
    return examples
    
def ge_claim_claimant_date(claims):
    examples = []
    for claim in tqdm(claims, desc="generating examples"):
        examples.append(
            SequenceClassificationExample(
                guid=claim.id,
                text_a=claim.claim,
                text_b=(claim.claimant if claim.claimant else "no claimant")
                + " "
                + (claim.date.split()[0] if claim.date else "no date"),
                label=claim.label,
            )
        )
    return examples

In [33]:
# 4 the generate examples experiments
def ge_claimant_date(claims):
    examples = []
    for claim in tqdm(claims, desc="generating examples"):
        text_b = claim.claimant if claim.claimant else "claimant"
        text_b += " "
        text_b += claim.date.split()[0] if claim.date else "placeholder"
        examples.append(
            SequenceClassificationExample(
                guid=claim.id, text_a=claim.claim, text_b=text_b, label=claim.label,
            )
        )
    return examples


def ge_date(claims):
    examples = []
    for claim in tqdm(claims, desc="generating examples"):
        examples.append(
            SequenceClassificationExample(
                guid=claim.id,
                text_a=claim.claim,
                text_b=claim.date.split()[0] if claim.date else "placeholder",
                label=claim.label,
            )
        )
    return examples


def ge_claimant(claims):
    examples = []
    for claim in tqdm(claims, desc="generating examples"):
        examples.append(
            SequenceClassificationExample(
                guid=claim.id,
                text_a=claim.claim,
                text_b=claim.claimant if claim.claimant else "claimant",
                label=claim.label,
            )
        )
    return examples


def ge_vanilla(claims):
    examples = []
    for claim in tqdm(claims, desc="generating examples"):
        examples.append(
            SequenceClassificationExample(
                guid=claim.id, text_a=claim.claim, text_b=None, label=claim.label,
            )
        )
    return examples

In [38]:
def main(dataset):
#     # woops I don't have these saved
#     # generate examples bois
#     examples = ge_claimant_date(dataset.claims)
#     runs = {"models/fnc/initial_simple_generate_examples/initial_simple_generate_examples-0": [""]}
#     run_eval(runs, examples)
    
#     examples = ge_date(dataset.claims)
#     runs = {"models/fnc/initial_simple_generate_examples/initial_simple_generate_examples-1": [""]}
#     run_eval(runs, examples)
    
#     examples = ge_claimant(dataset.claims)
#     runs = {"models/fnc/initial_simple_generate_examples/initial_simple_generate_examples-2": [""]}
#     run_eval(runs, examples)
    
#     examples = ge_vanilla(dataset.claims)
#     runs = {"models/fnc/initial_simple_generate_examples/initial_simple_generate_examples-3": [""]}
#     run_eval(runs, examples)
    
    
    
#     # woops I don't have these saved as well :(
#     # train test split bois
#     group_dir = "models/fnc/train_test_split_0.5_to_0.95"
#     examples = ge_plain(dataset.claims)
#     runs = {k: [""] for k in sorted(glob.glob(os.path.join(group_dir, "train_test_split_0.5_to_0.95*")))}
#     run_eval(runs, examples)


    
    # combined dataset probe
    examples = ge_plain(dataset.claims)
    group_dir = "models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0"
    runs = {
        group_dir: sorted(
            [
                os.path.basename(path)
                for path in glob.glob(os.path.join(group_dir, "checkpoint*"))
            ]
        )
        + [""]
    }
    run_eval(runs, examples)
    
    
    
    # initial test run
    group_dir = "models/fnc/initial_test_run"
    examples = ge_plain(dataset.claims)
    runs = {k: [""] for k in sorted(glob.glob(os.path.join(group_dir, "initial_test*")))}
    run_eval(runs, examples)
    
    
    
    # single claim claimant
    group_dir = "models/phase2/single-claim-claimant"
    examples = ge_claim_claimant(dataset.claims)
    runs = {k: [""] for k in sorted(glob.glob(os.path.join(group_dir, "bert-base-cased*")))}
    run_eval(runs, examples)
    

    
    # single claim claimant date
    group_dir = "models/phase2/single-claim-claimant-date"
    examples = ge_claim_claimant_date(dataset.claims)
    runs = {k: [""] for k in sorted(glob.glob(os.path.join(group_dir, "*-combined-*"))) if "fold" not in k}
    run_eval(runs, examples)

# trial data results

**for combined_dataset_first_probe, there was a dataleak so don't consider those results for the trail dataset**

In [39]:
main(Phase2TrialDataset.from_raw())

HBox(children=(FloatProgress(value=0.0, description='Phase2TrialDataset to claims', style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='generating examples', style=ProgressStyle(description_wid…


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-1000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…




              precision    recall  f1-score   support

           0       0.51      0.90      0.65        41
           1       0.58      0.39      0.47        38
           2       1.00      0.05      0.09        21

    accuracy                           0.53       100
   macro avg       0.69      0.45      0.40       100
weighted avg       0.64      0.53      0.46       100


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-2000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.62      0.80      0.70        41
           1       0.61      0.66      0.63        38
           2       0.67      0.19      0.30        21

    accuracy                           0.62       100
   macro avg       0.63      0.55      0.54       100
weighted avg       0.63      0.62      0.59       100


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-3000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.79      0.76      0.77        41
           1       0.62      0.79      0.70        38
           2       0.62      0.38      0.47        21

    accuracy                           0.69       100
   macro avg       0.68      0.64      0.65       100
weighted avg       0.69      0.69      0.68       100


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-4000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.94      0.83      0.88        41
           1       0.74      0.92      0.82        38
           2       0.88      0.71      0.79        21

    accuracy                           0.84       100
   macro avg       0.86      0.82      0.83       100
weighted avg       0.86      0.84      0.84       100


----------------------------------------------------------------------------------------------------
--------------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.92      0.83      0.87        41
           1       0.77      0.89      0.83        38
           2       0.89      0.81      0.85        21

    accuracy                           0.85       100
   macro avg       0.86      0.84      0.85       100
weighted avg       0.86      0.85      0.85       100




HBox(children=(FloatProgress(value=0.0, description='generating examples', style=ProgressStyle(description_wid…


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-0/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.46      0.54      0.49        41
           1       0.45      0.55      0.49        38
           2       0.40      0.10      0.15        21

    accuracy                           0.45       100
   macro avg       0.44      0.39      0.38       100
weighted avg       0.44      0.45      0.42       100


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-1/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.50      0.49      0.49        41
           1       0.48      0.61      0.53        38
           2       0.25      0.14      0.18        21

    accuracy                           0.46       100
   macro avg       0.41      0.41      0.40       100
weighted avg       0.44      0.46      0.44       100


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-2/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.49      0.66      0.56        41
           1       0.56      0.53      0.54        38
           2       0.33      0.14      0.20        21

    accuracy                           0.50       100
   macro avg       0.46      0.44      0.43       100
weighted avg       0.48      0.50      0.48       100


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-3/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.57      0.66      0.61        41
           1       0.54      0.55      0.55        38
           2       0.29      0.19      0.23        21

    accuracy                           0.52       100
   macro avg       0.47      0.47      0.46       100
weighted avg       0.50      0.52      0.51       100


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-4/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.59      0.73      0.65        41
           1       0.49      0.55      0.52        38
           2       0.00      0.00      0.00        21

    accuracy                           0.51       100
   macro avg       0.36      0.43      0.39       100
weighted avg       0.43      0.51      0.46       100


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-5/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.53      0.61      0.57        41
           1       0.47      0.50      0.49        38
           2       0.46      0.29      0.35        21

    accuracy                           0.50       100
   macro avg       0.49      0.47      0.47       100
weighted avg       0.50      0.50      0.49       100




HBox(children=(FloatProgress(value=0.0, description='generating examples', style=ProgressStyle(description_wid…


----------------------------------------------------------------------------------------------------
---------------------models/phase2/single-claim-claimant/bert-base-cased-128-0/---------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.51      0.78      0.62        41
           1       0.56      0.50      0.53        38
           2       1.00      0.14      0.25        21

    accuracy                           0.54       100
   macro avg       0.69      0.47      0.46       100
weighted avg       0.63      0.54      0.51       100


----------------------------------------------------------------------------------------------------
----------------models/phase2/single-claim-claimant/bert-base-cased-combined-128-2/-----------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.57      0.66      0.61        41
           1       0.49      0.58      0.53        38
           2       0.25      0.10      0.14        21

    accuracy                           0.51       100
   macro avg       0.44      0.44      0.43       100
weighted avg       0.47      0.51      0.48       100




HBox(children=(FloatProgress(value=0.0, description='generating examples', style=ProgressStyle(description_wid…


----------------------------------------------------------------------------------------------------
--------------models/phase2/single-claim-claimant-date/bert-base-cased-combined-128-0/--------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.57      0.73      0.64        41
           1       0.51      0.53      0.52        38
           2       0.38      0.14      0.21        21

    accuracy                           0.53       100
   macro avg       0.48      0.47      0.45       100
weighted avg       0.51      0.53      0.50       100


----------------------------------------------------------------------------------------------------
---------------models/phase2/single-claim-claimant-date/roberta-large-combined-128-0/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.58      0.63      0.60        41
           1       0.48      0.58      0.52        38
           2       0.44      0.19      0.27        21

    accuracy                           0.52       100
   macro avg       0.50      0.47      0.47       100
weighted avg       0.51      0.52      0.50       100


----------------------------------------------------------------------------------------------------
---------------models/phase2/single-claim-claimant-date/roberta-large-combined-128-1/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.60      0.73      0.66        41
           1       0.52      0.58      0.55        38
           2       0.50      0.19      0.28        21

    accuracy                           0.56       100
   macro avg       0.54      0.50      0.50       100
weighted avg       0.55      0.56      0.54       100


----------------------------------------------------------------------------------------------------
---------------models/phase2/single-claim-claimant-date/roberta-large-combined-128-2/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=2.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.62      0.76      0.68        41
           1       0.54      0.58      0.56        38
           2       0.44      0.19      0.27        21

    accuracy                           0.57       100
   macro avg       0.53      0.51      0.50       100
weighted avg       0.55      0.57      0.55       100




# validation results

In [40]:
main(Phase2ValidationDataset.from_raw())

HBox(children=(FloatProgress(value=0.0, description='Phase2ValidationDataset to claims', max=500.0, style=Prog…




HBox(children=(FloatProgress(value=0.0, description='generating examples', max=500.0, style=ProgressStyle(desc…


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-1000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…




              precision    recall  f1-score   support

           0       0.56      0.97      0.71       273
           1       0.65      0.07      0.12       166
           2       0.00      0.00      0.00        61

    accuracy                           0.55       500
   macro avg       0.40      0.35      0.28       500
weighted avg       0.52      0.55      0.43       500


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-2000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.60      0.89      0.71       273
           1       0.56      0.24      0.34       166
           2       0.35      0.13      0.19        61

    accuracy                           0.58       500
   macro avg       0.50      0.42      0.41       500
weighted avg       0.55      0.58      0.52       500


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-3000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.63      0.79      0.70       273
           1       0.52      0.40      0.46       166
           2       0.44      0.20      0.27        61

    accuracy                           0.59       500
   macro avg       0.53      0.46      0.48       500
weighted avg       0.57      0.59      0.57       500


----------------------------------------------------------------------------------------------------
-------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/checkpoint-4000-------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.65      0.71      0.68       273
           1       0.48      0.46      0.47       166
           2       0.44      0.33      0.38        61

    accuracy                           0.58       500
   macro avg       0.53      0.50      0.51       500
weighted avg       0.57      0.58      0.57       500


----------------------------------------------------------------------------------------------------
--------------models/fnc/combined_dataset_first_probe/combined_dataset_first_probe-0/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.66      0.77      0.71       273
           1       0.53      0.43      0.48       166
           2       0.48      0.34      0.40        61

    accuracy                           0.60       500
   macro avg       0.55      0.51      0.53       500
weighted avg       0.59      0.60      0.59       500




HBox(children=(FloatProgress(value=0.0, description='generating examples', max=500.0, style=ProgressStyle(desc…


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-0/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.61      0.73      0.66       273
           1       0.48      0.48      0.48       166
           2       0.57      0.07      0.12        61

    accuracy                           0.57       500
   macro avg       0.55      0.42      0.42       500
weighted avg       0.56      0.57      0.54       500


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-1/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.60      0.63      0.62       273
           1       0.43      0.47      0.45       166
           2       0.27      0.15      0.19        61

    accuracy                           0.52       500
   macro avg       0.44      0.42      0.42       500
weighted avg       0.51      0.52      0.51       500


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-2/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.61      0.69      0.65       273
           1       0.45      0.46      0.46       166
           2       0.25      0.08      0.12        61

    accuracy                           0.54       500
   macro avg       0.44      0.41      0.41       500
weighted avg       0.51      0.54      0.52       500


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-3/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.65      0.81      0.72       273
           1       0.54      0.45      0.49       166
           2       0.25      0.10      0.14        61

    accuracy                           0.60       500
   macro avg       0.48      0.45      0.45       500
weighted avg       0.57      0.60      0.57       500


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-4/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.58      0.71      0.64       273
           1       0.39      0.36      0.37       166
           2       0.13      0.03      0.05        61

    accuracy                           0.51       500
   macro avg       0.37      0.37      0.36       500
weighted avg       0.46      0.51      0.48       500


----------------------------------------------------------------------------------------------------
--------------------------models/fnc/initial_test_run/initial_test_run-5/---------------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.57      0.71      0.63       273
           1       0.43      0.37      0.39       166
           2       0.33      0.08      0.13        61

    accuracy                           0.52       500
   macro avg       0.44      0.39      0.39       500
weighted avg       0.49      0.52      0.49       500




HBox(children=(FloatProgress(value=0.0, description='generating examples', max=500.0, style=ProgressStyle(desc…


----------------------------------------------------------------------------------------------------
---------------------models/phase2/single-claim-claimant/bert-base-cased-128-0/---------------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.58      0.81      0.68       273
           1       0.44      0.30      0.36       166
           2       0.40      0.03      0.06        61

    accuracy                           0.55       500
   macro avg       0.47      0.38      0.36       500
weighted avg       0.51      0.55      0.49       500


----------------------------------------------------------------------------------------------------
----------------models/phase2/single-claim-claimant/bert-base-cased-combined-128-2/-----------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.63      0.73      0.67       273
           1       0.43      0.42      0.43       166
           2       0.62      0.21      0.32        61

    accuracy                           0.56       500
   macro avg       0.56      0.45      0.47       500
weighted avg       0.56      0.56      0.55       500




HBox(children=(FloatProgress(value=0.0, description='generating examples', max=500.0, style=ProgressStyle(desc…


----------------------------------------------------------------------------------------------------
--------------models/phase2/single-claim-claimant-date/bert-base-cased-combined-128-0/--------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.60      0.86      0.71       273
           1       0.46      0.28      0.34       166
           2       1.00      0.08      0.15        61

    accuracy                           0.57       500
   macro avg       0.68      0.41      0.40       500
weighted avg       0.60      0.57      0.52       500


----------------------------------------------------------------------------------------------------
---------------models/phase2/single-claim-claimant-date/roberta-large-combined-128-0/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.68      0.81      0.74       273
           1       0.52      0.43      0.47       166
           2       0.56      0.33      0.41        61

    accuracy                           0.62       500
   macro avg       0.58      0.52      0.54       500
weighted avg       0.61      0.62      0.61       500


----------------------------------------------------------------------------------------------------
---------------models/phase2/single-claim-claimant-date/roberta-large-combined-128-1/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.66      0.80      0.72       273
           1       0.50      0.43      0.46       166
           2       0.68      0.31      0.43        61

    accuracy                           0.62       500
   macro avg       0.61      0.51      0.54       500
weighted avg       0.61      0.62      0.60       500


----------------------------------------------------------------------------------------------------
---------------models/phase2/single-claim-claimant-date/roberta-large-combined-128-2/---------------
----------------------------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='converting examples to features', max=500.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Prediction', max=8.0, style=ProgressStyle(description_wid…


              precision    recall  f1-score   support

           0       0.68      0.78      0.73       273
           1       0.52      0.44      0.48       166
           2       0.54      0.41      0.47        61

    accuracy                           0.62       500
   macro avg       0.58      0.54      0.56       500
weighted avg       0.61      0.62      0.61       500


