In [1]:
import json 
import pathlib
import seaborn as sns
from matplotlib import pyplot as plt
import pandas as pd 
import numpy as np
from dataflow.core.lispress import parse_lispress, render_compact

In [2]:
def read_jsonl(path):
    with open(path, "r") as f:
        lines = f.readlines()
    return [json.loads(line) for line in lines]

def read_tgt(path):
    with open(path) as f1:
        return [line.strip() for line in f1.readlines()]

In [3]:
path = "/brtx/604-nvme1/estengel/calflow_calibration/miso/tune_roberta_tok_fix_benchclamp_data/translate_output_hitl/dev_all.tgt"
tgt_path = "/brtx/601-nvme1/estengel/resources/data/smcalflow.agent.data.from_benchclamp/dev_all.tgt"


gold_tgts = read_tgt(tgt_path)
pred_data = read_jsonl(path)

correct = 0
total = 0

total_rewritten = 0
total_chosen = 0
total_uncertain = 0
total_tokens = 0
skipped = 0
for gold_tgt, pred in zip(gold_tgts, pred_data):
    pred_lispress = render_compact(parse_lispress(pred['tgt_str']))
    gold_lispress = render_compact(parse_lispress(gold_tgt))

    if pred_lispress == gold_lispress:
        correct += 1
    total += 1
    try:
        n_uncertain = np.sum(pred['low_conf_tokens'])
        n_chosen = np.sum(pred['ann_chose_from_top_k'])
        n_rewritten = np.sum(pred['ann_manually_inserted'])
        n_tokens_predicted = np.sum(pred['tokens_predicted'])
        total_rewritten += n_rewritten
        total_chosen += n_chosen
        total_uncertain += n_uncertain
        total_tokens += n_tokens_predicted
    except KeyError:
        skipped+=1
        # print(pred)


print(skipped)
print(f"Exact match: {correct}/{total} = {correct/total*100:.2f}")
print(f"total tokens: {int(total_tokens)}")
print(f"total low confidence: {int(total_uncertain)}: {total_uncertain/total_tokens*100:.2f}% of total tokens")
print(f"total chosen from top k: {int(total_chosen)}: {total_chosen/total_uncertain*100:.2f}% of uncertain tokens")
print(f"total manually rewritten: {int(total_rewritten)}: {total_rewritten/total_uncertain*100:.2f}% of uncertain tokens")



0
Exact match: 10594/12271 = 86.33
total tokens: 176470
total low confidence: 2452: 1.39% of total tokens
total chosen from top k: 1951: 79.57% of uncertain tokens
total manually rewritten: 501: 20.43% of uncertain tokens
