In [1]:
import json 

RIPPLE_EDIT_DATASET_PATH = '../RippleEdits/data/benchmark'
EDIT_FILES = [
    'popular.json',
    'random.json',
    'recent.json'
]
# open the json files
edit_data = []
for edit_file in EDIT_FILES:
    with open(f'{RIPPLE_EDIT_DATASET_PATH}/{edit_file}', 'r') as f:
        edit_data += json.load(f)

In [2]:
len(edit_data)

4755

In [3]:
from qwikidata.entity import WikidataItem
from qwikidata.linked_data_interface import get_entity_dict_from_api

def get_wikidata_item_from_id(qid):
    entity_dict = get_entity_dict_from_api(qid)
    return WikidataItem(entity_dict)


In [36]:
keys = ['Logical_Generalization', 'Compositionality_I', 'Compositionality_II', 'Subject_Aliasing', 'Relation_Specificity', 'Forgetfulness']
edit_data[0][keys[1]][0]['test_queries'][0]['prompt']

'The name of the currency in the country of citizenship of Leonardo DiCaprio is'

In [28]:


def construct_story_prompt(edit, type='subject'):
    story_subject = ''
    if type == 'subject':
        story_subject = get_wikidata_item_from_id(edit['edit']['subject_id']).get_label()
    elif type == 'object':
        story_subject = get_wikidata_item_from_id(edit['edit']['target_id']).get_label()
    prompt = f'Write an article about {story_subject} \n'
    prompt += 'You must work the following information into the article: \n'
    relations = [
        ' '.join(edit['edit']['relation'].lower().split('_')) + ' of ' + story_subject
    ]
    for key in keys:
        for queries in edit[key]:
            for query in queries['condition_queries']:
                relation = ' '.join(query['relation'].lower().split('_')) + ' of ' + story_subject
                
                if 'second_relation' in query:
                    relation = ' '.join(query['second_relation'].lower().split('_')) + ' of ' + relation
                relations.append(relation)
    for relation in set(relations):
        prompt += f'- {relation} \n'
    prompt += '\n'
    return prompt

In [29]:
for edit in edit_data[3:4]:
    print(construct_story_prompt(edit))
    print(construct_story_prompt(edit, type='object'))

Write an article about Kanye West 
You must work the following information into the article: 
- alma mater of Kanye West 
- employer of Kanye West 
- child of Kanye West 
- mother of Kanye West 
- religion of Kanye West 
- award received of Kanye West 
- place of birth of Kanye West 
- father of Kanye West 
- country of citizenship of Kanye West 
- occupation of Kanye West 
- spouse of Kanye West 
- ethnic group of Kanye West 
- sex or gender of Kanye West 


Write an article about William Walker Scranton 
You must work the following information into the article: 
- father of William Walker Scranton 
- country of citizenship of William Walker Scranton 
- religion of William Walker Scranton 
- ethnic group of William Walker Scranton 
- alma mater of William Walker Scranton 
- sex or gender of William Walker Scranton 
- place of birth of William Walker Scranton 
- mother of William Walker Scranton 
- child of William Walker Scranton 
- employer of William Walker Scranton 
- award receive

In [24]:
edit = edit_data[4]['edit']

def get_edit_data(edit):
    print(edit)
    subject = get_wikidata_item_from_id(edit['edit']['subject_id']).get_label()
    new_target = get_wikidata_item_from_id(edit['edit']['target_id']).get_label()
    original_target = get_wikidata_item_from_id(edit['edit']['original_fact']['target_id']).get_label()
    story = construct_story_prompt(edit)
    edit_prompt = edit['edit']['prompt'].replace(new_target, '').replace(' .', '').strip()
    return {
        'subject': subject,
        'original_target': original_target,
        'new_target': new_target,
        'story': story,
        'edit_prompt': edit_prompt,
    }


In [25]:
get_edit_data(edit_data[3])

{'example_type': 'popular', 'edit': {'prompt': 'The name of the child of Kanye West is William Walker Scranton.', 'subject_id': 'Q15935', 'relation': 'CHILD', 'target_id': 'Q24248307', 'original_fact': {'prompt': 'The name of the child of Kanye West is North West.', 'subject_id': 'Q15935', 'relation': 'CHILD', 'target_id': 'Q55428718'}}, 'Logical_Generalization': [], 'Compositionality_I': [], 'Compositionality_II': [], 'Subject_Aliasing': [{'test_queries': [{'prompt': 'The name of the child of Kanye Omari West is', 'answers': [{'value': 'William Walker Scranton', 'aliases': ['William W. Scranton']}], 'query_type': 'regular', 'subject_id': 'Q15935', 'relation': 'CHILD', 'target_ids': ['Q24248307'], 'phrase': 'The name of the child of Kanye Omari West is'}], 'test_condition': 'OR', 'condition_queries': [{'prompt': 'The name of the child of Kanye Omari West is', 'answers': [{'value': 'William Walker Scranton', 'aliases': ['William W. Scranton']}], 'query_type': 'regular', 'subject_id': 'Q

{'subject': 'Kanye West',
 'original_target': 'North West',
 'new_target': 'William Walker Scranton',
 'story': 'Write an article about Kanye West \nYou must work the following information into the article: \n- alma mater of Kanye West \n- employer of Kanye West \n- child of Kanye West \n- mother of Kanye West \n- religion of Kanye West \n- award received of Kanye West \n- place of birth of Kanye West \n- father of Kanye West \n- country of citizenship of Kanye West \n- occupation of Kanye West \n- spouse of Kanye West \n- ethnic group of Kanye West \n- sex or gender of Kanye West \n\n',
 'edit_prompt': 'The name of the child of Kanye West is'}