In [5]:
from typing import Any, Generator, Protocol, List, Tuple
import pandas as pd
# import nltk
# nltk.download('punkt_tab')
from sklearn.metrics import balanced_accuracy_score, f1_score, precision_score, recall_score
from minicheck.minicheck import MiniCheck
import json
import os
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../assign')
from utils import *

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [6]:
def MinicheckEval(model_name, source, summary):
    model = MiniCheck(model_name=model_name, cache_dir='./ckpts')
    pred_label, raw_prob, _, _ = model.score(docs=[source], claims=[summary])
    print(pred_label)
    return pred_label[0]


In [7]:
# from batch ID to the corresponding sample IDs
skip_samples = {
    5: range(40, 50), 
    10: range(10,20),
    11: range(10),
    12: range(20, 30), 
    15: range(40, 50)
}

annotator_list = {
    7: ['yujia', 'manveer'],
    8: ['miaoran', 'chenyu'],
    10: ['erana', 'vivek', 'manveer'],
    11: ['rogger', 'matt'], #,'matt', , 'new', 'yujia'
    13: ['erana', 'miaoran'], #['erana', 'weisi', 'miaoran']
    16: ['miaoran', 'matt'] #['miaoran', 'yujia', 'matt', 'weisi', 'new']
}
# batch 16
# ['yujia', 'matt']: 0.299	0.267	0.209	0.294	0.244	0.340	0.330
# ['yujia', 'weisi']: -0.417	-0.376	-0.333	-0.384	-0.320	-0.391	-0.387
# ['matt', 'weisi']: -0.138	-0.117	-0.074	-0.104	-0.065	-0.147	-0.092
exclude_batch = []
# exclude_batch = [11,13,16]

sent_level_labels = {}
result_path = '../assign/batch_5_src_no_sports/results'
for batch_id in range(1,16+1):
    if batch_id in exclude_batch:
        continue
    file_path = os.path.join(result_path, f"batch_{batch_id}_annotation.json")
    skip_sample_ids = []
    if batch_id in skip_samples:
        skip_sample_ids = [str(s_id) for s_id in skip_samples[batch_id]]
        print (f"Skipping samples {skip_sample_ids}")
    selected_annotators = None
    # there is an unexpected "new" annotator in batch 7
    if batch_id in annotator_list:
        selected_annotators = annotator_list[batch_id]
    _, _, _, batch_sent_level_labels = read_annotation(file_path, summary_sent_file='../assign/summary_sent_list.jsonl', skip_sample_ids=skip_sample_ids)
    # print(batch_sent_level_labels)
    sent_level_labels.update(batch_sent_level_labels)
print(sent_level_labels)

Skipping samples ['40', '41', '42', '43', '44', '45', '46', '47', '48', '49']
Skipping samples ['10', '11', '12', '13', '14', '15', '16', '17', '18', '19']
Skipping samples ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
Skipping samples ['20', '21', '22', '23', '24', '25', '26', '27', '28', '29']
Skipping samples ['40', '41', '42', '43', '44', '45', '46', '47', '48', '49']


In [8]:
minicheck_models = ['roberta-large', 'deberta-v3-large', 'flan-t5-large']#, 'Bespoke-MiniCheck-7B']
fname = '../assign/dectectors_claim_level_preds.json'
sources = []
df = pd.read_csv('../assign/examples_to_annotate.csv')
for index, row in df.iterrows():
    sources.append(row['source'])

for idx, minichecker in enumerate(minicheck_models):
    print(f"Run model {idx} - {minichecker}")
    existing_meta_ids = []
    data = {}
    if os.path.exists(fname):
        with open(fname) as r:
            data = json.load(r)
            for meta_id in data:
                # print(list(data[meta_id].values())[0])
                if f"minicheck-{minichecker}" in list(data[meta_id].values())[0]:
                    existing_meta_ids.append(meta_id)
                
    for meta_id in sent_level_labels:
        meta_id = str(meta_id)
        if meta_id in existing_meta_ids:
            continue
        if meta_id in data:
            item = data[meta_id]
            print(item)
        else:
            item = {}
        for sent, sent_labels in sent_level_labels[int(meta_id)].items():
            if sent not in item:
                item[sent] = {'labels': sent_labels}
            
            item[sent][f"minicheck-{minichecker}"] = MinicheckEval(minichecker, sources[int(meta_id)], sent)
            
        print(item)
        if os.path.exists(fname):
            with open(fname, 'r') as f:
                json_data = json.load(f)
                json_data[meta_id] = item
        else:
            json_data = {meta_id:item}
        with open(fname, 'w') as f:
            f.write(json.dumps(json_data, indent=2))

Run model 0 - roberta-large
{'Based on the provided passage, here is a concise summary covering the core information:\n\n': {'labels': ['Consistent'], 'HHEMv1': 0.11803, 'HHEM-2.1': 0.36012, 'HHEM-2.1-English': 0.25962, 'trueteacher': 0, 'true_nli': 0, 'HHEM-2.1-Open': 0.79136}, 'The passage mentions two different actresses named Sheryl:\n\n': {'labels': ['Benign'], 'HHEMv1': 0.76171, 'HHEM-2.1': 0.91017, 'HHEM-2.1-English': 0.98448, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.92726}, '1. Sheryl Lee:\n- Appeared in the film "Café Society" in 2016\n- Reprised her role as Laura Palmer in the 2017 Showtime revival of "Twin Peaks"\n\n': {'labels': ['Unwanted', 'Benign', 'Unwanted.Instrinsic'], 'HHEMv1': 0.96827, 'HHEM-2.1': 0.96049, 'HHEM-2.1-English': 0.98957, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.91382}, '2. Sheryl Lee Ralph:\n- Played the role of Madame Morrible in the Broadway production of "Wicked" in 2016': {'labels': ['Unwanted', 'Benign', 'Unwanted.Instrinsic'

Evaluating: 100%|██████████| 1/1 [00:00<00:00, 57.27it/s]


[0]


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 56.31it/s]


[1]


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 57.28it/s]


[1]


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 56.03it/s]


[1]
{'Based on the provided passage, here is a concise summary covering the core information:\n\n': {'labels': ['Consistent'], 'HHEMv1': 0.11803, 'HHEM-2.1': 0.36012, 'HHEM-2.1-English': 0.25962, 'trueteacher': 0, 'true_nli': 0, 'HHEM-2.1-Open': 0.79136, 'minicheck-roberta-large': 0}, 'The passage mentions two different actresses named Sheryl:\n\n': {'labels': ['Benign'], 'HHEMv1': 0.76171, 'HHEM-2.1': 0.91017, 'HHEM-2.1-English': 0.98448, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.92726, 'minicheck-roberta-large': 1}, '1. Sheryl Lee:\n- Appeared in the film "Café Society" in 2016\n- Reprised her role as Laura Palmer in the 2017 Showtime revival of "Twin Peaks"\n\n': {'labels': ['Unwanted', 'Benign', 'Unwanted.Instrinsic'], 'HHEMv1': 0.96827, 'HHEM-2.1': 0.96049, 'HHEM-2.1-English': 0.98957, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.91382, 'minicheck-roberta-large': 1}, '2. Sheryl Lee Ralph:\n- Played the role of Madame Morrible in the Broadway production of "Wicked"

Evaluating: 100%|██████████| 1/1 [00:00<00:00,  2.74it/s]


[0]


Evaluating: 100%|██████████| 1/1 [00:00<00:00,  2.89it/s]


[1]


Evaluating: 100%|██████████| 1/1 [00:00<00:00,  2.64it/s]


[0]


Evaluating: 100%|██████████| 1/1 [00:00<00:00,  2.65it/s]


[0]
{'Based on the provided passage, here is a concise summary covering the core information:\n\n': {'labels': ['Consistent'], 'HHEMv1': 0.11803, 'HHEM-2.1': 0.36012, 'HHEM-2.1-English': 0.25962, 'trueteacher': 0, 'true_nli': 0, 'HHEM-2.1-Open': 0.79136, 'minicheck-roberta-large': 0, 'minicheck-deberta-v3-large': 0}, 'The passage mentions two different actresses named Sheryl:\n\n': {'labels': ['Benign'], 'HHEMv1': 0.76171, 'HHEM-2.1': 0.91017, 'HHEM-2.1-English': 0.98448, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.92726, 'minicheck-roberta-large': 1, 'minicheck-deberta-v3-large': 1}, '1. Sheryl Lee:\n- Appeared in the film "Café Society" in 2016\n- Reprised her role as Laura Palmer in the 2017 Showtime revival of "Twin Peaks"\n\n': {'labels': ['Unwanted', 'Benign', 'Unwanted.Instrinsic'], 'HHEMv1': 0.96827, 'HHEM-2.1': 0.96049, 'HHEM-2.1-English': 0.98957, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.91382, 'minicheck-roberta-large': 1, 'minicheck-deberta-v3-large': 0},

Evaluating: 100%|██████████| 1/1 [00:00<00:00,  5.60it/s]


[0]


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 15.66it/s]


[0]


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 15.12it/s]


[1]


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 15.54it/s]

[1]
{'Based on the provided passage, here is a concise summary covering the core information:\n\n': {'labels': ['Consistent'], 'HHEMv1': 0.11803, 'HHEM-2.1': 0.36012, 'HHEM-2.1-English': 0.25962, 'trueteacher': 0, 'true_nli': 0, 'HHEM-2.1-Open': 0.79136, 'minicheck-roberta-large': 0, 'minicheck-deberta-v3-large': 0, 'minicheck-flan-t5-large': 0}, 'The passage mentions two different actresses named Sheryl:\n\n': {'labels': ['Benign'], 'HHEMv1': 0.76171, 'HHEM-2.1': 0.91017, 'HHEM-2.1-English': 0.98448, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.92726, 'minicheck-roberta-large': 1, 'minicheck-deberta-v3-large': 1, 'minicheck-flan-t5-large': 0}, '1. Sheryl Lee:\n- Appeared in the film "Café Society" in 2016\n- Reprised her role as Laura Palmer in the 2017 Showtime revival of "Twin Peaks"\n\n': {'labels': ['Unwanted', 'Benign', 'Unwanted.Instrinsic'], 'HHEMv1': 0.96827, 'HHEM-2.1': 0.96049, 'HHEM-2.1-English': 0.98957, 'trueteacher': 1, 'true_nli': 1, 'HHEM-2.1-Open': 0.91382, 'mi


