In [15]:
import pandas as pd
import numpy as np
import os
import sys
import torch
import json
import pickle
current_dir = os.getcwd()
root_path = os.path.dirname(os.path.dirname(current_dir))
sys.path.append(f"{root_path}/sem_uncertainty/")
from semantic_entropy.utils import best_split
sys.path.append(root_path)
from src.detection_utils import LLAMA_PROBE_PATHS
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [None]:

datasets = ['trivia_qa', 'nq_open', 'pop_qa']
# model_name = "Meta-Llama-3.1-8B-Instruct"
model_name = "Mistral-7B-Instruct-v0.3"
# model_name = "Qwen2.5-7B-Instruct"

In [16]:
# get threshold for each dataset
all_vu_threshold, all_se_threshold = {}, {}
for dataset in datasets:
    # 'verbal_uncertainty' 'sentence_semantic_entropy' "sentence_eigen"
    data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}/train.csv")
    vu = np.array(data['verbal_uncertainty'])
    se = np.array(data['sentence_semantic_entropy'])
    vu_threshold = best_split(vu, "")
    all_vu_threshold[dataset] = vu_threshold
    se_threshold = best_split(se, "")
    all_se_threshold[dataset] = se_threshold

print("all_vu_threshold", all_vu_threshold)
print("all_se_threshold", all_se_threshold)

all_vu_threshold {'trivia_qa': 0.29292929292929293, 'nq_open': 0.37373737373737376, 'pop_qa': 0.43434343434343436}
all_se_threshold {'trivia_qa': 0.9535958465934934, 'nq_open': 1.0931464582901023, 'pop_qa': 1.232697069986711}


In [13]:
# load the data

def load_data(dataset, max_alpha, use_predicted, prompt_type, str_process_layers, iti_method=2):
    data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}_sentence/test.csv")
    label = data['label']

    if use_predicted:
        with open(f"{root_path}/probe/"+LLAMA_PROBE_PATHS['verbal_uncertainty'][dataset]+f"/{dataset}_predict_results.json") as f:
            uncertainty_data = json.load(f)
            lu = uncertainty_data["predictions"]
        with open(f"{root_path}/probe/"+LLAMA_PROBE_PATHS['sentence_semantic_entropy'][dataset]+f"/{dataset}_predict_results.json") as f:
            uncertainty_data = json.load(f)
            se = uncertainty_data["predictions"]
    else:
        uncertainty_data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}/test.csv")
        lu = uncertainty_data['verbal_uncertainty']
        se = uncertainty_data['sentence_semantic_entropy']
    if not len(data) == len(lu) == len(se):
        print(len(data), len(lu), len(se))
    assert len(data) == len(lu) == len(se)
    data['verbal_uncertainty'] = lu
    data['sentence_semantic_entropy'] = se

    
    if use_predicted:
        out_dir = f"{root_path}/calibration/predicted_outputs/{dataset}/{model_name}/{prompt_type}"
    else:
        out_dir = f"{root_path}/calibration/outputs/{dataset}/{model_name}/{prompt_type}"
    
    if iti_method in [0, 2]:
        re_generate_path = f"{out_dir}/test/with_vufi_{iti_method}_{str_process_layers}_{max_alpha}.jsonl"
    else:
        re_generate_path = f"{out_dir}/test/with_vufi_1_trivia_qa_{str_process_layers}_{max_alpha}.jsonl"
    # print("re_generate_path", re_generate_path)
    re_generate_data = pd.read_json(re_generate_path, lines=True)
    assert len(re_generate_data) == len(data)
    data['re_generate'] = re_generate_data['most_likely_answer']

    data = load_lu(re_generate_path, data)
    data = load_refusal(re_generate_path, data)
    data = load_acc(re_generate_path, data)
    data = load_se(re_generate_path, data)
    detection_res = load_detection_res(dataset, model_name)
    assert len(detection_res) == len(data)
    data['detection_res'] = detection_res
    
    return data


def load_detection_res(dataset, model_name):
    path = f"{root_path}/detection/LR_outputs/{dataset}/{model_name}/verbal_uncertainty_sentence_semantic_entropy.json"
    with open(path) as f:
        detection_res = json.load(f)["y_pred"]
    return detection_res

def load_lu(re_generate_path, data):
    vu_path = re_generate_path.replace("with_vufi", "vu")[:-1]
    with open(vu_path) as f:
        re_lu = json.load(f)
        qs = set(data['question'])
        if len(re_lu) != len(qs):
            print(f"re_lu: {len(re_lu)} data: {len(qs)}")
        assert len(re_lu) == len(qs)
        for i, row in data.iterrows():
            q = row['question']
            if q in re_lu:
                line = [x for x in re_lu[q] if x != -1]
                data.at[i, 're_lu'] = np.mean(line)
            else:
                # alpha = 0 not regenrated
                data.at[i, 're_lu'] = -1

    most_vu_path = re_generate_path.replace("with_vufi", "vu_most_likely")[:-1]
    with open(most_vu_path) as f:
        re_lu = json.load(f)
        qs = set(data['question'])
        if len(re_lu) != len(qs):
            print(f"re_lu: {len(re_lu)} data: {len(qs)}")
        assert len(re_lu) == len(qs)
        for i, row in data.iterrows():
            q = row['question']
            if q in re_lu:
                data.at[i, 're_vu_most_likely'] = re_lu[q]
            else:
                # alpha = 0 not regenrated
                data.at[i, 're_vu_most_likely'] = -1
    return data

def load_se(re_generate_path, data):
    se_path = re_generate_path.replace("with_vufi", "uncertainty_measures")
    se_path = se_path.replace("jsonl", "pkl")
    with open(se_path, "rb") as infile:
        reselt_dict = pickle.load(infile)
        re_se = reselt_dict['uncertainty_measures']['cluster_assignment_entropy']
        if len(re_se) != len(data):
            print(f"un finished re_se: {len(re_se)} data: {len(data)}")
        # assert len(re_se) == len(data)
        for i, row in data.iterrows():
            if i < len(re_se):
                data.at[i, 're_se'] = re_se[i]
            else:
                data.at[i, 're_se'] = row['sentence_semantic_entropy'] ################# un finished !!!!!!!!
    return data


def load_refusal(re_generate_path, data):
    refusal_path = re_generate_path.replace("with_vufi", "refusal")[:-1]
    with open(refusal_path) as f:
        re_refusal = json.load(f)
        qs = set(data['question'])
        if len(re_refusal) != len(qs):
            print(f"re_refusal: {len(re_refusal)} data: {len(qs)}")
        assert len(re_refusal) == len(qs)
        for i, row in data.iterrows():
            q = row['question']
            if q in re_refusal:
                data.at[i, 're_refusal'] = int(re_refusal[q])
            else:
                data.at[i, 're_refusal'] = -1
    return data

def load_acc(re_generate_path, data):
    acc_path = re_generate_path.replace("with_vufi", "acc")[:-1]
    with open(acc_path) as f:
        re_acc = json.load(f)
        if len(re_acc) != len(data):
            print(f"re_acc: {len(re_acc)} data: {len(data)}")
        # assert len(re_acc) == len(data)
        for i, row in data.iterrows():
            a = re_acc[str(row['id'])][0]
            data.at[i, 're_acc'] = a
    return data

def get_filtered_uncertainty(data, vu_threshold, se_threshold):
    lu, se, acc = [], [], []
    for i, row in data.iterrows():
        # if row['verbal_uncertainty'] < vu_threshold and row['sentence_semantic_entropy'] > se_threshold: # regenerate
        if row['detection_res']:
            if row['re_generate']:
                lu.append(row['re_lu'])
                se.append(row['re_se'])
                acc.append(row['re_acc'])
            else:
                lu.append(row['verbal_uncertainty'])
                se.append(row['sentence_semantic_entropy'])
                acc.append(row['accuracy'])
            # print(row['ling_auncertainty'], vu_threshold)
            # print(row['sentence_semantic_entropy'], se_threshold)
            # print(row['re_lu'])
            # print(row['re_se'])
        else: # original
            lu.append(row['verbal_uncertainty'])
            se.append(row['sentence_semantic_entropy'])
            acc.append(row['accuracy'])
    lu = torch.tensor(lu)
    se = torch.tensor(se)
    acc = torch.tensor(acc)
    return lu, se, acc



# hallucination ratio

In [24]:
# before
for dataset in datasets:
    data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}_sentence/test.csv")
    label = data['label']
    hallu_ratio = round(np.mean(label == 'hallucinated') * 100, 2)
    with open(f"{root_path}/sem_uncertainty/outputs/{dataset}/sentence/{model_name}/test_refusal_rate.json") as f:
        refusal_ratio = round(json.load(f)["refusal_rate"] * 100, 2)
    correction = round(100-hallu_ratio-refusal_ratio, 2)
    print(f"{dataset}: {hallu_ratio}\t{correction}\t{refusal_ratio}")

trivia_qa: 30.2	67.9	1.9
nq_open: 52.2	41.7	6.1
pop_qa: 58.2	26.4	15.4


In [19]:
# after
max_alpha = 0.4
iti_method = 2
use_predicted = False
prompt_type = 'uncertainty'
# str_process_layers = 'range(16,28)'
str_process_layers = 'range(15,32)'

for i, dataset in enumerate(datasets):
    if dataset == "trivia_qa" and iti_method==1:
        continue
    vu_threshold = all_vu_threshold[dataset]
    se_threshold = all_se_threshold[dataset]
    data = load_data(dataset, max_alpha, use_predicted, prompt_type, str_process_layers, iti_method)

    for i, row in data.iterrows():
        if row['detection_res']:
            re_lu = row['re_vu_most_likely']
            # if np.isnan(re_lu): # if re_lu is nan, then it is not regenerated
            #     data.at[i, 're_label'] = row["label"]
            #     data.at[i, 're_acc'] = row["accuracy"]
            #     data.at[i, 're_refusal'] = int(row["refusal"])
            #     continue
            re_acc = row['re_acc'] #re_lu >= vu_threshold row['re_refusal']
            if re_lu >= vu_threshold or re_acc:
                data.at[i, 're_label'] = 'ok'
            else:
                data.at[i, 're_label'] = 'hallucinated'

        else: # original
            data.at[i, 're_label'] = row["label"]
            data.at[i, 're_acc'] = row["accuracy"]
            data.at[i, 're_refusal'] = int(row["refusal"])
            
    ratio = round(np.mean(data['re_label'] == 'hallucinated') * 100, 2)
    correct_ratio = round(np.mean(data['re_acc']) * 100, 2)
    correct_ratio2 = []
    for i, row in data.iterrows():
        if row['re_refusal']:
            correct_ratio2.append(0)
        else:
            correct_ratio2.append(row['re_acc'])
    correct_ratio2 = round(np.mean(correct_ratio2) * 100, 2)

    for i, row in data.iterrows():
        if np.isnan(row['re_refusal']) or row['re_refusal']==-1:
            data.at[i, 're_refusal'] = int(row["refusal"])

    refusal_ratio = round(np.mean(data['re_refusal']) * 100, 2)
    print(f"{dataset}\t{ratio}\t{correct_ratio2}\t{refusal_ratio}")

trivia_qa	19.7	67.0	6.2
nq_open	40.8	39.4	13.7
pop_qa	26.7	23.9	43.6


# the number of sample pairs that two uncertainties disagree

In [25]:
# before
for dataset in datasets:
    vu_threshold = all_vu_threshold[dataset]
    se_threshold = all_se_threshold[dataset]
    # 'verbal_uncertainty' 'sentence_semantic_entropy' "sentence_eigen"
    data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}/test.csv")
    lu = data['verbal_uncertainty']
    lu = torch.tensor(lu)
    se = data['sentence_semantic_entropy']
    se = torch.tensor(se)

    assert len(lu) == len(se)
    disagree = []
    for i in range(len(lu)):
        if (lu[i] > vu_threshold and se[i] > se_threshold) or (lu[i] < vu_threshold and se[i] < se_threshold):
            disagree.append(0)
        else:
            disagree.append(1)
    disagree = round(np.mean(disagree) * 100, 2)
    print(f"{dataset}: {disagree}")

trivia_qa: 27.5
nq_open: 46.8
pop_qa: 50.8


In [26]:
# after
for dataset in datasets:
    if dataset == "trivia_qa" and iti_method==1:
        continue
    vu_threshold = all_vu_threshold[dataset]
    se_threshold = all_se_threshold[dataset]
    data = load_data(dataset, max_alpha, use_predicted, prompt_type, str_process_layers, iti_method)
    lu, se, acc = get_filtered_uncertainty(data, vu_threshold, se_threshold)
    disagree = []
    for i in range(len(lu)):
        if (lu[i] > vu_threshold and se[i] > se_threshold) or (lu[i] < vu_threshold and se[i] < se_threshold):
            disagree.append(0)
        else:
            disagree.append(1)
    disagree = round(np.mean(disagree) * 100, 2)
    print(f"{dataset}: {disagree}")

trivia_qa: 16.8
nq_open: 19.8
pop_qa: 28.5


# Correlation between two uncertainties

In [27]:

for dataset in datasets:
    # 'verbal_uncertainty' 'sentence_semantic_entropy' "sentence_eigen"
    data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}/test.csv")
    lu = data['verbal_uncertainty']
    se = data['sentence_semantic_entropy']

    assert len(lu) == len(se)
    correlation_matrix = np.corrcoef(lu, se)
    correlation_coefficient = correlation_matrix[0, 1]
    print(dataset, round(correlation_coefficient, 2))

trivia_qa 0.46
nq_open 0.24
pop_qa 0.15


In [28]:

for dataset in datasets:
    if dataset == "trivia_qa" and iti_method==1:
        continue
    vu_threshold = all_vu_threshold[dataset]
    se_threshold = all_se_threshold[dataset]
    data = load_data(dataset, max_alpha, use_predicted, prompt_type, str_process_layers, iti_method)
    lu, se, acc = get_filtered_uncertainty(data, vu_threshold, se_threshold)
    if torch.isnan(lu).any():
        print("lu has nan")
        mask = ~torch.isnan(lu)
        lu = lu[mask]
        se = se[mask]
    correlation_matrix = np.corrcoef(lu, se)
    correlation_coefficient = correlation_matrix[0, 1]
    print(dataset, round(correlation_coefficient,2))
    # break

trivia_qa 0.66
nq_open 0.58
pop_qa 0.53


# average LU for correct and incorrect

In [29]:
# before
for dataset in datasets:
    vu_correct, vu_incorrect = [], []
    data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}_sentence/test.csv")
    acc = data['accuracy']
    data = pd.read_csv(f"{root_path}/datasets/{dataset}/{model_name}/test.csv")
    lu = data['verbal_uncertainty']
    for i in range(len(lu)):
        if acc[i]:
            vu_correct.append(lu[i])
        else:
            vu_incorrect.append(lu[i])
    vu_correct = round(np.mean(vu_correct), 2)
    vu_incorrect = round(np.mean(vu_incorrect), 2) # high because refusal is incorrect!!
    print(f"{dataset}: {vu_incorrect}\t{vu_correct}")
    

trivia_qa: 0.19	0.04
nq_open: 0.23	0.07
pop_qa: 0.3	0.07


In [30]:
# after
for dataset in datasets:
    if dataset == "trivia_qa" and iti_method==1:
        continue
    vu_threshold = all_vu_threshold[dataset]
    se_threshold = all_se_threshold[dataset]
    data = load_data(dataset, max_alpha, use_predicted, prompt_type, str_process_layers, iti_method)
    lu, se, acc = get_filtered_uncertainty(data, vu_threshold, se_threshold)
    vu_correct, vu_incorrect = [], []
    for i in range(len(lu)):
        # if lu[i] is not nan
        if not torch.isnan(lu[i]):
            if acc[i]:
                vu_correct.append(lu[i])
            else:
                vu_incorrect.append(lu[i])
    vu_correct = np.mean(vu_correct)
    vu_incorrect = np.mean(vu_incorrect)
    vu_correct = round(vu_correct, 2)
    vu_incorrect = round(vu_incorrect, 2) # high because refusal is incorrect!!
    print(f"{dataset}: {vu_incorrect}\t{vu_correct}")
    

trivia_qa: 0.38999998569488525	0.05000000074505806
nq_open: 0.4000000059604645	0.10000000149011612
pop_qa: 0.6399999856948853	0.15000000596046448
