In [34]:
import json
import glob

In [117]:
def show_ontology(ont_path):
    with open(ont_path) as in_file: 
        data = json.load(in_file)
        print(f"Ontology: {data['title']}")
        cls_str = "\n\t".join([f"{c['label']} ({c['qid']})"  for c in data['concepts']])
        rel_str = "\n\t".join([f"{c['label']} ({c['pid']})"  for c in data['relations']])
        print(f"Concepts:\n\t{cls_str}")
        print(f"Relations:\n\t{rel_str}")   

def load_jsonl(file_path):
    content = list()
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            content.append(json.loads(line))
    return content

def load_valid_ids(dir_path):
    all_lines = []
    jsonl_files = glob.glob(dir_path + '/*.txt')
    for file_path in sorted(jsonl_files):
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = [line.strip() for line in file.readlines()]
            all_lines.append(lines)
    return all_lines
        
def load_all_jsonl(dir_path):
    jsonl_files = glob.glob(dir_path + '/*.jsonl')
    content = list()
    for file_path in jsonl_files:
        content += load_jsonl(file_path)
    return content

def load_test_data(test_path_prefix):
    test_data = load_all_jsonl(test_path_prefix)
    test_data = {td["id"]:td for td in test_data}
    return test_data

def load_prompts(prompt_path_prefix):
    prompts = load_all_jsonl(prompt_path_prefix)
    prompts = {p["id"]:p for p in prompts}
    return prompts

def load_llm_ouputs(llm_output_prefix):
    llm_ouputs = load_all_jsonl(llm_output_prefix)
    llm_ouputs = {p["id"]:p for p in llm_ouputs}
    return llm_ouputs

def print_llm_output(llm_output):
    print(f"Test ID: {llm_output['id']}\n")
    print(f"LLM Response:\n\n{llm_output['response']}\n")
    print(f"Triples:")
    for tr in llm_output['triples']:
        print(f"\t{tr}")
    


## Ontologies

In [22]:
ont_path_prefix = "../../data/wikidata_tekgen/ontologies/"
ontologies = [
    "1_movie_ontology.json",
    "2_music_ontology.json",
    "3_sport_ontology.json",
    "4_book_ontology.json",
    "5_military_ontology.json",
    "6_computer_ontology.json",
    "7_space_ontology.json",
    "8_politics_ontology.json",
    "9_nature_ontology.json",
    "10_culture_ontology.json"
]
ontologies = [ont_path_prefix + ont for ont in ontologies]


## Ontology content

In [32]:
show_ontology(ontologies[1])
    

Ontology: Music Ontology
Concepts:
	human (Q5)
	music (Q638)
	musical work (Q2188189)
	record producer (Q4830453)
	composed musical work (Q207628)
	composer (Q36834)
	language (Q34770)
	music genre (Q188451)
	voice (Q7390)
	musicology (Q164204)
	music industry (Q746359)
	album (Q482994)
	award  (Q618779)
Relations:
	composer (P86)
	part of (P361)
	lyrics by (P676)
	publication date (P577)
	language of work or name (P407)
	voice type (P412)
	instrumentation (P870)
	tracklist (P658)
	genre (P136)
	performer (P175)
	producer (P162)
	nominated for (P1411)
	record label (P264)


# Test Sentences

In [64]:
test_path_prefix = "../../data/wikidata_tekgen/test/"
test_data = load_test_data(test_path_prefix)

valid_ids_prefix = "../../data/wikidata_tekgen/manually_verified_sentences/"
valid_ids = load_valid_ids(valid_ids_prefix)

In [145]:
# [2][10]  [3][1]  [4][6] [5][3]

test_id = valid_ids[5][3]
test_data[test_id]


{'id': 'ont_5_military_test_7',
 'sent': 'Felix von Bendemann (8 August 1848Â\xa0- 31 October 1915) was an Admiral of the German Imperial Navy (Kaiserliche Marine).'}

# LLM Prompts

In [101]:
prompt_path_prefix = "../../data/wikidata_tekgen/baselines/prompts/"
prompts = load_prompts(prompt_path_prefix)

In [146]:
test_prompt = prompts[test_id]
print(f"Test ID: {test_prompt['id']}")
print(f"Prompt: {test_prompt['prompt']}")

Test ID: ont_5_military_test_7
Prompt: 
Given the following ontology and sentences, please extract the triples from the sentence according to the relations in the ontology. In the output, only include the triples in the given output format.
CONTEXT:
Ontology Concepts: rank, military rank, military vehicle, military unit, human, country, military casualty classification, armed organization, command, military museum, organization, military personnel, military equipment,
Ontology Relations: military_rank(human,military rank), military_branch(human,military unit), military_casualty_classification_(human,military casualty classification), designed_by(military equipment,organization), designed_by(military vehicle,organization), commanded_by(command,human), next_higher_rank(military rank,rank), designated_as_terrorist_by(armed organization,country), wing_configuration(,)

Example Sentence: Alfred Meyer-Waldeck (27 November 1864 - 25 August 1928) was a vice admiral in the Imperial German Navy 

# LLL Output

In [138]:
vicuna_output_prefix =  "../../data/wikidata_tekgen/baselines/Vicuna-13B/llm_responses/"
vicuna_outputs = load_llm_ouputs(vicuna_output_prefix)

alpaca_lora_output_prefix =  "../../data/wikidata_tekgen/baselines/Alpaca-LoRA-13B/llm_responses/"
alpaca_lora_outputs = load_llm_ouputs(alpaca_lora_output_prefix)


In [147]:
print("Vicuna-13B")
print_llm_output(vicuna_outputs[test_id])

Vicuna-13B
Test ID: ont_5_military_test_7

LLM Response:

military\_rank(Felix von Bendemann,admiral)
military\_unit(Imperial German Navy,Felix von Bendemann)

Note: The test output is empty because the triple format specified in the prompt does not include the object of the relation (i.e., the person or entity that the relation applies to). To include the object of the relation in the output, the format should be: subject, object, relation. For example: "Felix von Bendemann, Imperial German Navy, military\_rank(admiral)"

Triples:
	['Felix von Bendemann', 'military_rank', 'admiral']
	['Imperial German Navy', 'military_unit', 'Felix von Bendemann']


In [148]:
print("Alpaca-LoRA-13B")
print_llm_output(alpaca_lora_outputs[test_id])

Alpaca-LoRA-13B
Test ID: ont_5_military_test_7

LLM Response:

Felix von Bendemann (8 August 1848Â - 31 October 1915) was an Admiral of the German Imperial Navy (Kaiserliche Marine).
military_rank(Felix von Bendemann,admiral)
military_unit(Felix von Bendemann,Kaiserliche Marine)

Triples:
	['Felix von Bendemann', 'military_rank', 'admiral']
	['Felix von Bendemann', 'military_unit', 'Kaiserliche Marine']
