In [1]:
import json, pandas as pd
from IPython.display import display, Markdown

In [7]:
experiments = {
    # "gpt-3.5-turbo": json.load(open('experiments/nesy4vrd/gpt-3.5-turbo-train.json', 'r')),
    "gpt-4o-mini-2024-07-18": json.load(open('experiments/nesy4vrd/gpt-4o-mini-2024-07-18-train.json', 'r')),
    "gpt-4o-2024-05-13": json.load(open('experiments/nesy4vrd/gpt-4o-2024-05-13-train.json', 'r')),
    # "gpt-4-0125-preview": json.load(open('experiments/nesy4vrd/gpt-4-0125-preview-train.json', 'r')),
    "mistralai/Mistral-7B-Instruct-v0.3": json.load(open('experiments/nesy4vrd/Mistral-7B-Instruct-v0.3-train.json', 'r')),
    # "mistralai/Mixtral-8x7B-Instruct-v0.1": json.load(open('experiments/nesy4vrd/Mixtral-8x7B-Instruct-v0.1-train.json', 'r')),
    # "meta-llama/Meta-Llama-3-70b-Instruct": json.load(open('experiments/nesy4vrd/Meta-Llama-3-70B-Instruct-train.json', 'r')),
    # "claude-3-opus-20240229": json.load(open('experiments/nesy4vrd/claude-3-opus-20240229-train.json', 'r')),
    # "claude-3-5-sonnet-20240620": json.load(open('experiments/nesy4vrd/claude-3-5-sonnet-20240620-train.json', 'r')),
    "claude-3-haiku-20240307": json.load(open('experiments/nesy4vrd/claude-3-haiku-20240307-train.json', 'r')),
}

In [8]:
dfs = [ pd.DataFrame.from_records(triples) for triples in [ experiments[model] for model in experiments ] ]
df = pd.concat(dfs)

In [9]:
# Convert 'predicted' column from string to integer, replacing errors with NaN
df['answer'] = pd.to_numeric(df['answer'], errors='coerce')

# Drop any rows where 'predicted' is now NaN
df = df.dropna(subset=['answer'])

# Convert 'predicted' to integer type
df['answer'] = df['answer'].astype(int)

# Now perform the groupby and sum operation
result = df.groupby('model')['answer'].sum().reset_index()

# Calculate 'N'
result['FN'] = len(dfs[0]) - result['answer']

# Calculate false negative rate (FNR)
result["FNR"] = (len(dfs[0]) - result["answer"]) / len(dfs[0])

# Display result
result[['model', 'FN', 'FNR']].sort_values('FNR', ascending=True)

Unnamed: 0,model,FN,FNR
0,claude-3-haiku-20240307,31,0.31
1,gpt-4o-2024-05-13,33,0.33
2,gpt-4o-mini-2024-07-18,66,0.66
3,mistralai/Mistral-7B-Instruct-v0.3,98,0.98


In [11]:
breakdown_by_property = df[(df['answer'] == 0)].groupby('p').count().reset_index()
breakdown_by_property

Unnamed: 0,p,s,o,model,rationale,answer
0,owl:sameAs,3,3,3,3,3
1,rdfs:domain,29,29,29,29,29
2,rdfs:range,1,1,1,1,1
3,rdfs:subClassOf,28,28,28,28,28


In [6]:
fns = [ classification for classification in experiments['claude-3-haiku-20240307'] if classification['answer'] == '0' ]

KeyError: 'claude-3-haiku-20240307'

In [None]:
for fn in fns:
    fn.pop('answer')

In [None]:
json.dump(fns, open('experiments/nesy4vrd/claude-3-haiku-20240307-fns.json', 'w+'))

In [None]:
for model in experiments:
    display(Markdown(f'## Model: {model}'))
    df2 = pd.DataFrame.from_records(experiments[model])[['s', 'p', 'o', 'answer', 'rationale']].sort_values('s')
    df2["rationale"] = df2["rationale"].apply(lambda x: x.replace('>', '&gt').replace('<', '&lt').replace('**', ''))
    fns_styler = df2.style.set_properties(**{"text-align": "left", "vertical-align" : "top", "overflow-wrap": "break-word"}).hide(axis="index")
    display(fns_styler)