In [None]:
from typing import Any, Generator, Protocol, List, Tuple
import pandas as pd
import json
from alignscore import AlignScore
import os
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../eval')
from utils import *

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
ALIGNSCORE_PATH = '/home/miaoran/AlignScore/'

In [None]:
def AlignScoreEval(model_size, sources, summaries, batch_size=20):
    if model_size == 'base':
        model = AlignScore(model='roberta-base', batch_size=batch_size, device='cuda:0', ckpt_path=os.path.join(ALIGNSCORE_PATH, 'AlignScore-base.ckpt'), evaluation_mode='bin_sp')
    elif model_size == 'large':
        model = AlignScore(model='roberta-large', batch_size=batch_size, device='cuda:0', ckpt_path=os.path.join(ALIGNSCORE_PATH, 'AlignScore-large.ckpt'), evaluation_mode='bin_sp')
    
    scores = model.score(contexts=sources, claims=summaries)
    print(scores)    
    scores = [round(x,5) for x in scores]
    
    return scores

## Sample-level Prediction

In [None]:
model_sizes = ['base', 'large']
filename = f"../assign/examples_to_annotate.csv"
outputfile = filename
for idx, model_size in enumerate(model_sizes):
    print(f"Run model {str(idx)}: {model_size} ......")
    col_name = f'alignscore-{model_size}'
    df = pd.read_csv(filename, encoding='utf-8').fillna('')
    scores = AlignScoreEval(model_size, df['source'].tolist(), df['summary'].tolist())
    print(scores)
    if col_name in df:
        df[col_name] = scores
    else:
        df.insert(len(df.columns), col_name, scores)
    df.to_csv(outputfile, mode='w', index=False, header=True)
    

## Sent-level Prediction

In [None]:
sent_level_labels = {}
result_files, skip_sample_ids, selected_annotators, num_annotators = process_result_files()
for file_path in result_files:
    _, _, _, batch_sent_level_labels = read_annotation(file_path, skip_sample_ids=skip_sample_ids)
    # print(batch_sent_level_labels)
    sent_level_labels.update(batch_sent_level_labels)
# print(sent_level_labels)

model_sizes = ['base', 'large']
fname = '../eval/sent_level_results/detectors_sent_level_preds.json'
sources = []
df = pd.read_csv('../assign/examples_to_annotate.csv')
for index, row in df.iterrows():
    sources.append(row['source'])

for idx, model_size in enumerate(model_sizes):
    print(f"Run model {str(idx)}: {model_size} ......")
    existing_meta_ids = []
    data = {}
    if os.path.exists(fname):
        with open(fname) as r:
            data = json.load(r)
            for meta_id in data:
                # print(list(data[meta_id].values())[0])
                if f"alignscore-{model_size}" in list(data[meta_id].values())[0]:
                    existing_meta_ids.append(meta_id)
                
    for meta_id in sent_level_labels:
        meta_id = str(meta_id)
        if meta_id in existing_meta_ids:
            continue
        if meta_id in data:
            item = data[meta_id]
            # print(item)
        else:
            item = {}
        for sent, sent_labels in sent_level_labels[int(meta_id)].items():
            if sent not in item:
                item[sent] = {'labels': sent_labels}
            item[sent][f"alignscore-{model_size}"] = AlignScoreEval(model_size, sources[int(meta_id)], sent)[0]
        # print(item)
        if os.path.exists(fname):
            with open(fname, 'r') as f:
                json_data = json.load(f)
                json_data[meta_id] = item
        else:
            json_data = {meta_id:item}
        with open(fname, 'w') as f:
            f.write(json.dumps(json_data, indent=2))
