In [None]:
import json
import logging
from copy import deepcopy
from datetime import datetime
from functools import partial
from pathlib import Path

import pandas as pd
from dotenv import load_dotenv
from tqdm.auto import tqdm
from IPython.display import clear_output

from bellem.utils import set_seed

load_dotenv()

tqdm.pandas()
pd.options.display.float_format = "{:,.3f}".format

set_seed(89)

In [None]:
import textwrap

def format_paragraph(paragraph):
    return f"Paragraph {paragraph['idx']} - {paragraph['paragraph_text']}"

def present_row(row):
    outcome = "Success" if row['fuzzy_match'] else "Fail"
    print(row['id'])
    print(f"Outcome: {outcome}")
    print()
    print("# Question decomposition")
    print("## Reference")
    print(row['question'])
    for item in row['question_decomposition_original']:
        q = item['question']
        a = item['answer']
        print(f"\t{q}")
        print(f"\t\t{a}")
    print()
    print("## Prediction")
    for item in row['question_decomposition']:
        q = item['question']
        a = item['answer']
        print(f"\t{q}")
        print(f"\t\t{a}")
    print()
    print("# Answer")
    print(f"Reference: {row['answers']}")
    print(f"Prediction: {row['predicted_answer']}")
    print()
    print("# Context")
    print("# Reference")
    for p in row['paragraphs_original']:
        if p['is_supporting']:
            for line in textwrap.wrap(format_paragraph(p), width=120):
                print(line)
            print()

    print("# Prediction")
    for p in row['paragraphs']:
        if p['is_supporting']:
            for line in textwrap.wrap(format_paragraph(p), width=120):
                print(line)
            print()

In [None]:
def partition_by_success(dataf):
    fail_dataf = dataf.loc[~dataf['fuzzy_match']]
    success_dataf = dataf.loc[dataf['fuzzy_match']]
    print(len(success_dataf), len(fail_dataf))
    print(f"{len(success_dataf)/len(dataf):.2f}", f"{len(fail_dataf)/len(dataf):.2f}")
    return success_dataf, fail_dataf

In [None]:
res_df = pd.read_json('our-method-kgqa-results-20240908-134424.jsonl', lines=True)
original_df = pd.read_json("../../data/generated/musique-evaluation/dataset.jsonl", orient="records", lines=True)
df = pd.merge(res_df, original_df[['id', 'paragraphs', 'question_decomposition']], on="id", suffixes=("", "_original"))
df.head()

Unnamed: 0,id,paragraphs,answer,answer_aliases,answerable,answers,question,question_decomposition,triplets_str,predicted_answer,raw_output,exact_match,fuzzy_match,paragraphs_original,question_decomposition_original
0,2hop__575188_342798,"[{'idx': 0, 'title': '', 'paragraph_text': 'Li...",Ahmad Shah Qajar,[Ahmad Shah Qajar],True,[Ahmad Shah Qajar],Who is the child of Mahmoud Mirza's father?,"[{'id': 575188, 'question': 'Who is Mahmoud Mi...",[Liliana Mumy | father | Bill Mumy\nBill Mumy ...,Amanollah Khan Zia' os-Soltan,"{'answer': 'Amanollah Khan Zia' os-Soltan', 'h...",False,False,"[{'idx': 0, 'title': 'Liliana Mumy', 'paragrap...","[{'id': 575188, 'question': 'Mahmoud Mirza >> ..."
1,2hop__731584_700117,"[{'idx': 0, 'title': '', 'paragraph_text': 'KA...",Berrien County,[Berrien County],True,[Berrien County],In which county is the city to which KKVU is l...,"[{'id': 731584, 'question': 'To which city is ...",[KAPE | broadcast frequency | 1550 AM\nKAPE | ...,Berrien County,"{'answer': 'Berrien County', 'hops': [{'questi...",True,True,"[{'idx': 0, 'title': 'KAPE', 'paragraph_text':...","[{'id': 731584, 'question': 'KKVU >> licensed ..."
2,2hop__690412_526810,"[{'idx': 0, 'title': '', 'paragraph_text': 'Ca...",Chao Phraya River,[Chao Phraya River],True,[Chao Phraya River],For what river does the river on which Pa Sak ...,"[{'id': 690412, 'question': 'On which river is...",[Cabramatta Creek | location | Sydney\nCabrama...,Chao Phraya River,"{'answer': 'Chao Phraya River', 'hops': [{'que...",True,True,"[{'idx': 0, 'title': 'Cabramatta Creek', 'para...","[{'id': 690412, 'question': 'Pa Sak Jolasid Da..."
3,2hop__263638_69048,"[{'idx': 0, 'title': '', 'paragraph_text': 'Mi...",Honorable Justice Abiodun Smith,[Honorable Justice Abiodun Smith],True,[Honorable Justice Abiodun Smith],Who is the Chief Judge of the Tebesa Nemine's ...,"[{'id': 263638, 'question': 'Where was Tebesa ...",[Michael J. Barron | birth year | 1933\nMichae...,Not applicable,"{'answer': 'Not applicable', 'hops': [{'questi...",False,False,"[{'idx': 0, 'title': 'Michael J. Barron', 'par...","[{'id': 263638, 'question': 'Tebesa Nemine >> ..."
4,2hop__142842_68489,"[{'idx': 0, 'title': '', 'paragraph_text': 'Pe...",Snapper Foster,[Snapper Foster],True,[Snapper Foster],Who did the performer of Night Rocker play on ...,"[{'id': 142842, 'question': 'Who performed Nig...",[Perfect Night: Live in London | recorded by |...,Snapper Foster,"{'answer': 'Snapper Foster', 'hops': [{'questi...",True,True,"[{'idx': 0, 'title': 'Perfect Night: Live in L...","[{'id': 142842, 'question': 'Which performer r..."


In [None]:
success_df, fail_df = partition_by_success(df)

68 32
0.68 0.32


In [None]:
i = 0
row = fail_df.iloc[i]
present_row(row)

2hop__575188_342798
Outcome: Fail

# Question decomposition
## Reference
Who is the child of Mahmoud Mirza's father?
	Mahmoud Mirza >> father
		Mohammad Ali Shah Qajar
	#1 >> child
		Ahmad Shah Qajar

## Prediction
	Who is Mahmoud Mirza's father?
		Mohammad Ali Shah Qajar
	Who is the child of #1?
		Ahmad Shah Qajar

# Answer
Reference: ['Ahmad Shah Qajar']
Prediction: Amanollah Khan Zia' os-Soltan

# Context
# Reference
Paragraph 7 - Amanollah Khan Zia' os-Soltan (also Amanollah Khan Donboli "Nazer ol-Ayaleh" "Zia' os-Soltan") was an
Iranian aristocrat and politician at Qajar court during the time of Mozaffar ad-Din Shah, Mohammad Ali Shah and Ahmad
Shah Qajar and hero of the Persian Constitutional Revolution.

Paragraph 8 - Mahmoud Mirza (9 October 1905 – 2 July 1988) Iranian prince of Qajar Dynasty, was the son of Mohammad Ali
Shah Qajar.

# Prediction
Paragraph 7 - Amanollah Khan Zia' os-Soltan | title | Nazer ol-Ayaleh

Paragraph 7 - Amanollah Khan Zia' os-Soltan | title | Zia' os-

In [None]:
failure_reasons = {}

In [None]:
for i in range(len(fail_df.sort_values('id'))):
    row = fail_df.iloc[i]
    id = row['id']
    if id in failure_reasons:
        continue
    print(f"{len(failure_reasons)} / {len(fail_df)}")
    present_row(row)
    reason = input("Reason: ")
    if reason.lower() == 'quit':
        break
    failure_reasons[id] = reason
    clear_output(wait=True)

In [None]:
failure_reasons

{'2hop__575188_342798': 'missing triplet; father of Mahmoud Mirza',
 '2hop__263638_69048': 'missing triplet (temporal); former chief vs current chief',
 '2hop__6736_6733': 'poor qa; answers the first sub-q',
 '2hop__128420_375952': 'poor qa; location vs broadcasting area',
 '2hop__582045_161450': 'poor qa; answers the first sub-q',
 '2hop__607517_161450': 'partially correct',
 '2hop__310456_846599': 'ambiguous question',
 '2hop__199513_13732': 'missing triplet; age of Mary when engaged to Joseph',
 '2hop__472083_7298': 'missing triplet; illuminations | performer & no relation as "start career alongside Josh Groban"',
 '2hop__785711_73244': 'ambiguous answer',
 '2hop__75169_92673': 'missing triplet; no direct triplet for meridian | reference | north pole',
 '2hop__157251_556157': 'poor qa & ambigous triplet; "serve as the mouth of" ~= "tributary of"',
 '2hop__477492_240386': 'missing triplets & 3-hop; no triplet for Triumph | record label | TML Entertainment and this is a 3-hop question

In [None]:
with open('kgqa-inspect-failure-reasons.json', 'w') as f:
    json.dump(failure_reasons, f, indent=2)

In [None]:
inspect_df = pd.DataFrame(failure_reasons.items(), columns=['id', 'note']).set_index('id')
inspect_df['failure_category'] = inspect_df['note'].map(lambda x: x.split(";")[0])

In [None]:
def map_primary_reason(category):
    if "temporal" in category:
        return "temporal"
    if "missing triplet" in category:
        return "missing triplet"
    if "poor qa" in category:
        return "poor qa"
    if "ambi" in category and "triplet" in category:
        return "triplet quality"
    if "3" in category:
        return "3-hop"
    if "poor qa" in category:
        return "poor reasoning"
    if "question" in category:
        return "problematic question"
    if "context" in category:
        return "problematic context"
    return category

In [None]:
inspect_df['primary'] = inspect_df['failure_category'].map(map_primary_reason)

In [None]:
sorted(inspect_df['primary'].values.tolist())

['3-hop',
 'ambiguous answer',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'missing triplet',
 'partially correct',
 'partially correct',
 'poor answering',
 'poor qa',
 'poor qa',
 'poor qa',
 'poor qa',
 'poor qa',
 'poor qa',
 'poor qa',
 'poor qa',
 'problematic context',
 'problematic context',
 'problematic question',
 'problematic question',
 'temporal',
 'temporal',
 'triplet quality']

In [None]:
inspect_df['primary'].value_counts()

primary
missing triplet         12
poor qa                  8
temporal                 2
partially correct        2
problematic question     2
problematic context      2
ambiguous answer         1
triplet quality          1
poor answering           1
3-hop                    1
Name: count, dtype: int64

In [None]:
12 / len(fail_df)

0.375

In [None]:
8 / len(fail_df)

0.25

In [None]:
2 / len(fail_df)

0.0625

In [None]:
inspect_df

Unnamed: 0_level_0,note,failure_category,primary
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2hop__575188_342798,missing triplet; father of Mahmoud Mirza,missing triplet,missing triplet
2hop__263638_69048,missing triplet (temporal); former chief vs cu...,missing triplet (temporal),temporal
2hop__6736_6733,poor qa; answers the first sub-q,poor qa,poor qa
2hop__128420_375952,poor qa; location vs broadcasting area,poor qa,poor qa
2hop__582045_161450,poor qa; answers the first sub-q,poor qa,poor qa
2hop__607517_161450,partially correct,partially correct,partially correct
2hop__310456_846599,ambiguous question,ambiguous question,problematic question
2hop__199513_13732,missing triplet; age of Mary when engaged to J...,missing triplet,missing triplet
2hop__472083_7298,missing triplet; illuminations | performer & n...,missing triplet,missing triplet
2hop__785711_73244,ambiguous answer,ambiguous answer,ambiguous answer


In [None]:
inspect_df.loc['2hop__226204_69048']

note                missing triplet (temporal); Rosaline Bozimo wa...
failure_category                           missing triplet (temporal)
primary                                                      temporal
Name: 2hop__226204_69048, dtype: object

In [None]:
row = fail_df.set_index('id', drop=False).loc['2hop__226204_69048']
row

id                                                                2hop__226204_69048
paragraphs                         [{'idx': 0, 'title': '', 'paragraph_text': 'Li...
answer                                               Honorable Justice Abiodun Smith
answer_aliases                                     [Honorable Justice Abiodun Smith]
answerable                                                                      True
answers                                            [Honorable Justice Abiodun Smith]
question                           Who is the chief judge of Friday Osanebi's bir...
question_decomposition             [{'id': 226204, 'question': 'Where was Friday ...
triplets_str                       [List of sitting judges of the Supreme Court o...
predicted_answer                                                     Rosaline Bozimo
raw_output                         {'answer': 'Rosaline Bozimo', 'hops': [{'quest...
exact_match                                                      

In [None]:
present_row(row)

2hop__226204_69048
Outcome: Fail

# Question decomposition
## Reference
Who is the chief judge of Friday Osanebi's birthplace?
	Friday Osanebi >> place of birth
		Delta State
	who is the chief judge of #1
		Honorable Justice Abiodun Smith

## Prediction
	Where was Friday Osanebi born?
		Delta State
	Who is the chief judge of #1?
		Honorable Justice Abiodun Smith

# Answer
Reference: ['Honorable Justice Abiodun Smith']
Prediction: Rosaline Bozimo

# Context
# Reference
Paragraph 18 - Friday Ossai Osanebi (born August 7, 1980) is a Nigerian and a member of the Delta State House of
Assembly the Lawmaker representing Ndokwa East Local Government Constituency in the State House of Assembly.

Paragraph 19 - Rosaline Patricia Irorefe Bozimo (born 1 January 1946) is a Nigerian lawyer who was appointed Chief
Justice of Delta State with effect from 23 March 2003. She retired on 1 January 2011 and was succeeded by Honorable
Justice Abiodun Smith.

# Prediction
Paragraph 18 - Friday Osanebi | birt