In [58]:
import pandas as pd 
import os 
import json 
import collections 
from tqdm import tqdm 

tqdm.pandas()

In [80]:
# load factscore origin 
dir_origin_path = "../src/data/factscore_origin/labeled"
models = [x for x in os.listdir(dir_origin_path) if x.endswith("jsonl")]

In [81]:
def get_percentage_not_supported_sentences(annotations):
    counter = 0
    for sentence in annotations:
        if "human-atomic-facts" not in sentence or sentence["human-atomic-facts"] is None:
            continue
        for prop in sentence["human-atomic-facts"]:
            if prop["label"] == "NS":
                counter += 1
                break 
    return counter / len(annotations)

In [132]:
percentage = collections.defaultdict(dict)
for model in models:
    model_clean = model[:-7]
    print(model)
    path = os.path.join(dir_origin_path, model)
    df = pd.read_json(path, lines=True).dropna()

    df["incorrect"] = df["annotations"].apply(get_percentage_not_supported_sentences)
    for i, row in df.iterrows():
        percentage[row["topic"]][model_clean + "_percentage"] = row["incorrect"]
        

InstructGPT.jsonl
ChatGPT.jsonl
PerplexityAI.jsonl


In [133]:
df_incorrect = pd.DataFrame.from_dict(percentage).T.dropna()
df_incorrect

Unnamed: 0,InstructGP_percentage,ChatGP_percentage,PerplexityA_percentage
Doug Sheehan,1.000000,0.800000,0.375000
Taral Hicks,0.833333,1.000000,0.125000
Joey D. Vieira,1.000000,1.000000,0.125000
Lanny Flaherty,1.000000,1.000000,0.375000
Marianne McAndrew,1.000000,0.714286,0.333333
...,...,...,...
Anushka Sharma,0.500000,0.428571,0.100000
IU (singer),0.166667,0.125000,0.200000
Ji Sung,0.600000,0.166667,0.000000
Taika Waititi,0.166667,0.250000,0.083333


# choose factscore topics

In [134]:
dir_path = "../src/data/factscore"

In [135]:
stats = collections.defaultdict(dict)
source_len = {}
for file_name in os.listdir(dir_path):
    if not file_name.endswith("json"):
        continue
    with open(os.path.join(dir_path, file_name), "r") as f:
        data = json.load(f)
    file_name_clean = file_name[:-5]
    # if len(data["summaries"]) != 3:
    #     continue
    for summary in data["summaries"]:
        stats[file_name_clean][summary["summaryId"]] = len(summary["qas"])
    source_len[file_name_clean] = len(data["source"])

In [142]:
def get_min_sum(row):
    a = row["ChatGPT"] if row["ChatGPT"] is not None else 500
    b = row["InstructGPT"] if row["InstructGPT"] is not None else 500
    c = row["PerplexityAI"] if row["PerplexityAI"] is not None else 500
    return min(a+b, a+c, b+c)

df = pd.DataFrame(stats).T
df["sum"] = df.sum(axis=1)
df["min_sum"] = df.apply(get_min_sum, axis=1)
df

Unnamed: 0,ChatGPT,InstructGPT,PerplexityAI,sum,min_sum
Toyoko Tokiwa,70.0,47.0,73.0,190.0,117.0
Kim Casali,54.0,38.0,77.0,169.0,92.0
Fran Unsworth,41.0,16.0,56.0,113.0,57.0
Desmond Luke,,21.0,34.0,55.0,
Errol Flynn,42.0,42.0,82.0,166.0,84.0
...,...,...,...,...,...
Jimmy Bartel,40.0,38.0,45.0,123.0,78.0
Abdou Diallo,131.0,44.0,57.0,232.0,101.0
Heinrich Harrer,108.0,95.0,77.0,280.0,172.0
Robert Louis Stevenson,53.0,24.0,56.0,133.0,77.0


In [143]:
source_len_df = pd.DataFrame.from_dict(source_len, orient="index", columns=["length"])
source_len_df

Unnamed: 0,length
Toyoko Tokiwa,1456
Kim Casali,1178
Fran Unsworth,896
Desmond Luke,288
Errol Flynn,9295
...,...
Jimmy Bartel,3454
Abdou Diallo,894
Heinrich Harrer,2232
Robert Louis Stevenson,10604


In [144]:
df = df.merge(source_len_df, how="inner", left_index=True, right_index=True)
df = df.merge(df_incorrect, how="inner", left_index=True, right_index=True)
df

Unnamed: 0,ChatGPT,InstructGPT,PerplexityAI,sum,min_sum,length,InstructGP_percentage,ChatGP_percentage,PerplexityA_percentage
Toyoko Tokiwa,70.0,47.0,73.0,190.0,117.0,1456,0.200000,0.200000,0.000000
Kim Casali,54.0,38.0,77.0,169.0,92.0,1178,1.000000,0.666667,0.181818
Fran Unsworth,41.0,16.0,56.0,113.0,57.0,896,0.800000,0.800000,0.750000
Errol Flynn,42.0,42.0,82.0,166.0,84.0,9295,0.333333,0.375000,0.307692
Andreas Kisser,45.0,23.0,46.0,114.0,68.0,2678,0.500000,0.375000,0.000000
...,...,...,...,...,...,...,...,...,...
Jimmy Bartel,40.0,38.0,45.0,123.0,78.0,3454,0.500000,0.200000,0.000000
Abdou Diallo,131.0,44.0,57.0,232.0,101.0,894,1.000000,0.909091,0.400000
Heinrich Harrer,108.0,95.0,77.0,280.0,172.0,2232,0.428571,0.454545,0.272727
Robert Louis Stevenson,53.0,24.0,56.0,133.0,77.0,10604,0.428571,0.166667,0.111111


In [148]:
df[df["length"] < 1500].sort_values(by=["min_sum", "length"]).head(40)


Unnamed: 0,ChatGPT,InstructGPT,PerplexityAI,sum,min_sum,length,InstructGP_percentage,ChatGP_percentage,PerplexityA_percentage
Quintus Sosius Senecio,41.0,25.0,10.0,76.0,35.0,595,1.0,0.833333,0.4
Terence Blacker,49.0,26.0,13.0,88.0,39.0,1098,0.666667,0.818182,0.333333
Cha Seung-won,62.0,26.0,22.0,110.0,48.0,1216,1.0,0.555556,0.25
Manuel Salazar (footballer),70.0,36.0,14.0,120.0,50.0,326,1.0,1.0,0.0
Doug Sheehan,48.0,23.0,30.0,101.0,53.0,129,1.0,0.8,0.375
"Robert Smith, Baron Smith of Kelvin",82.0,24.0,31.0,137.0,55.0,1053,1.0,0.727273,0.111111
Jonathan Tucker,32.0,24.0,53.0,109.0,56.0,1002,0.714286,0.5,0.2
Taral Hicks,46.0,28.0,29.0,103.0,57.0,625,0.833333,1.0,0.125
Fran Unsworth,41.0,16.0,56.0,113.0,57.0,896,0.8,0.8,0.75
Jean Daullé,40.0,19.0,51.0,110.0,59.0,649,1.0,1.0,0.6


In [147]:
df.columns

Index(['ChatGPT', 'InstructGPT', 'PerplexityAI', 'sum', 'min_sum', 'length',
       'InstructGP_percentage', 'ChatGP_percentage', 'PerplexityA_percentage'],
      dtype='object')

In [164]:
chosen_topics = {
    "Quintus Sosius Senecio": ["InstructGPT", "PerplexityAI"],
    "Cha Seung-won": ["InstructGPT", "PerplexityAI"],
    "Doug Sheehan": ["InstructGPT", "PerplexityAI"],
    "Manuel Salazar (footballer)": ["InstructGPT", "PerplexityAI"],
    "Robert Smith, Baron Smith of Kelvin": ["InstructGPT", "PerplexityAI"],
    "Jonathan Tucker": ['ChatGPT', 'InstructGPT'],
    "Elsa Pataky": ['ChatGPT', 'InstructGPT'],
    "Lauren London": ["InstructGPT", "PerplexityAI"],
    "Roselyn Sánchez": ['ChatGPT', 'InstructGPT'],
    "Kim Burrell": ["InstructGPT", "PerplexityAI"],
    "Mike Trivisonno": ['ChatGPT', 'InstructGPT'],
    "Daniel Carvalho": ['ChatGPT', 'InstructGPT'],
    "David Wenham": ['ChatGPT', 'InstructGPT'],
    "Dacre Montgomery": ["InstructGPT", "PerplexityAI"],
    "Shahnaz Pahlavi": ["InstructGPT", "PerplexityAI"],
    "Song Kang": ["InstructGPT", "PerplexityAI"],
    "Andrew Shue": ["InstructGPT", "PerplexityAI"],
    "Alma Katsu": ['ChatGPT', 'InstructGPT'],
    "William Waldegrave, Baron Waldegrave of North Hill": ["InstructGPT", "PerplexityAI"],
}

In [165]:
len(chosen_topics)

19

In [166]:
for topic_id, models in chosen_topics.items():
    # open original file 
    # keep only chosen models 
    # save in filtered 
    with open(os.path.join(dir_path, f"{topic_id}.json"), "r") as f:
        data = json.load(f)
    new_data = {
        "source": data["source"],
        "sourceId": data["sourceId"],
        "datasource": data["datasource"],
        "dataset": data["dataset"],
        "summaries": [
            summary_dict.copy()
            for summary_dict in data["summaries"] if summary_dict["summaryId"] in models
        ]
    }
    with open(f"../src/data/factscore_filtered/{topic_id}.json", "w") as f:
        json.dump(new_data, f, indent=4)
    