In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import json
import pandas as pd
from pathlib import Path
from copy import deepcopy
from functools import partial

from bellem.utils import set_seed, jprint
from bellem.musique.singlehop import benchmark
from bellem.musique.qa import answer_question_cte

# set_seed(89)

In [3]:
from tqdm.auto import tqdm
tqdm.pandas()

In [5]:
with open('cte-few-shot-examples.json') as f:
    examples = json.load(f)

In [8]:
example_ids = [
    "2hop__272543_126102",
    "2hop__54719_159941",
    "2hop__16777_419765",
    "2hop__834974_332063",
    "2hop__823584_776926",
]

In [9]:
df = pd.read_json('../../data/generated/musique-common/base-dataset-train.jsonl', orient='records', lines=True)
df = df.set_index('id', drop=False).loc[example_ids].reset_index(drop=True)

print(df.shape)
df.head()

(5, 8)


Unnamed: 0,id,paragraphs,question,question_decomposition,answer,answer_aliases,answerable,answers
0,2hop__272543_126102,"[{'idx': 0, 'title': 'Republic of the Congo', ...",Who was in charge of the country Ceelmakoile i...,"[{'id': 272543, 'question': 'Ceelmakoile >> co...",Hassan Sheikh Mohamud,[Hassan Sheikh Mohamud],True,[Hassan Sheikh Mohamud]
1,2hop__54719_159941,"[{'idx': 0, 'title': '1936–37 Scottish Divisio...",The person with the most second place finishes...,"[{'id': 54719, 'question': 'who has the most s...",an American retired professional golfer,[an American retired professional golfer],True,[an American retired professional golfer]
2,2hop__16777_419765,"[{'idx': 0, 'title': 'Pedro Varela', 'paragrap...",Who was the father of the person who replaced ...,"[{'id': 16777, 'question': 'Who replaced Derby...",Isaac D'Israeli,[Isaac D'Israeli],True,[Isaac D'Israeli]
3,2hop__834974_332063,"[{'idx': 0, 'title': 'Kingdom of Gera', 'parag...",Which region shares border with one where Perr...,"[{'id': 834974, 'question': 'Perry Township >>...",Franklin County,[Franklin County],True,[Franklin County]
4,2hop__823584_776926,"[{'idx': 0, 'title': 'Wadera (woreda)', 'parag...",What area contains the region that encompasses...,"[{'id': 823584, 'question': 'Rotstöckli >> par...",Western Alps,[Western Alps],True,[Western Alps]


In [10]:
perfect_retrieval_func = lambda docs, query: [doc for doc in docs if doc['is_supporting']]

In [11]:
qa_func = partial(answer_question_cte, model_name="gpt-3.5-turbo")
df_gpt_35, scores = benchmark(df, qa_func, perfect_retrieval_func, ignore_errors=False)

  0%|          | 0/5 [00:00<?, ?it/s]

In [12]:
qa_func = partial(answer_question_cte, model_name="gpt-4-turbo")
df_gpt_4, scores = benchmark(df, qa_func, perfect_retrieval_func, ignore_errors=False)

  0%|          | 0/5 [00:00<?, ?it/s]

In [13]:
df_gpt_35['exact_match'].sum(), df_gpt_4['exact_match'].sum()

(0, 2)

In [14]:
df_gpt_35['fuzzy_match'].sum(), df_gpt_4['fuzzy_match'].sum()

(0, 3)

In [10]:
df_gpt_4.iloc[0]['raw_output']

{'answer': 'Gainesville, Florida',
 'hops': [{'question': 'What is the birthplace of the artist who played Lucky on King of the Hill?',
   'query': 'What is the birthplace of the artist who played Lucky on King of the Hill?',
   'context': '# Tom Petty and the Heartbreakers\nTom Petty and the Heartbreakers (alternately Tom Petty & The Heartbreakers) were an American rock band from Gainesville, Florida. Formed in 1976, the band originally comprised Tom Petty (lead singer, guitar), Mike Campbell (lead guitarist), Ron Blair (bass guitar), Stan Lynch (drums), and Benmont Tench (keyboards). In 1981, Blair, weary of the touring lifestyle, departed the band. His replacement, Howie Epstein, stayed with the band for the next two decades. In 1991, Scott Thurston joined the band as a multi-instrumentalist—mostly on rhythm guitar and second keyboards. Blair returned to the Heartbreakers in 2002, the year before Epstein\'s death. In 1994, Steve Ferrone replaced Lynch on drums. The band is best know

In [15]:
mask = (~df_gpt_35['fuzzy_match']) & df_gpt_4['fuzzy_match']
mask.sum()

3

In [16]:
examples = []
for idx, row in df_gpt_4.loc[mask].iterrows():
    id = row['id']
    raw_output = row['raw_output']
    examples.append({
        "id": id,
        "context": raw_output['hops'][0]['context'],
        "question": raw_output['hops'][0]['question'],
        "generation": raw_output['hops'][0]['llm_output']['generation'],
    })
examples

[{'id': '2hop__272543_126102',
  'context': '# Somalia\nOn 10 September 2012, parliament elected Hassan Sheikh Mohamud as the new President of Somalia. President Mohamud later appointed Abdi Farah Shirdon as the new Prime Minister on 6 October 2012, who was succeeded in office by Abdiweli Sheikh Ahmed on 21 December 2013. On 17 December 2014, former Premier Omar Abdirashid Ali Sharmarke was reappointed Prime Minister.\n# Ceelmakoile\nCeelmakoile is a town in the central Hiran region of Somalia. It’s mostly inhabited by the hawadle Clan sub clan of Hawiye one of the larger Somali clan',
  'question': 'Who was in charge of the country Ceelmakoile is located in?',
  'generation': 'Triplets:\nHassan Sheikh Mohamud | elected as | President of Somalia\nCeelmakoile | located in | Somalia\n\nAnswer: Hassan Sheikh Mohamud'},
 {'id': '2hop__54719_159941',
  'context': "# Men's major golf championships\nAlong with his record 18 major victories, Jack Nicklaus also holds the record for most runner 

In [13]:
with open('cte-few-shot-examples.json', 'w') as f:
    json.dump(examples, f, indent=2)