In [1]:
# Demo on the WebNLG dataset
%load_ext autoreload
%autoreload 2
from edc.edc_framework import EDC
import edc.utils.llm_utils as llm_utils
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3,4,5,6"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = 'true'

In [2]:
edc_configuration = {
    # OIE Settings
    "oie_llm": "gpt-3.5-turbo",
    "oie_prompt_template_file_path": "./edc/prompt_templates/oie_template.txt",
    "oie_few_shot_example_file_path": "./edc/few_shot_examples/webnlg/oie_webnlg_few_shot_examples.txt",
    # Schema Definition Settings
    "sd_llm": "gpt-3.5-turbo",
    "sd_prompt_template_file_path": "./edc/prompt_templates/sd_template.txt",
    "sd_few_shot_example_file_path": "./edc/few_shot_examples/webnlg/sd_webnlg_few_shot_examples.txt",
    # Schema Canonicalization Settings
    "sc_llm": "gpt-4",
    "sc_prompt_template_file_path": "./edc/prompt_templates/sc_template.txt",
    # Refinement Settings
    "oie_refine_prompt_template_file_path": "./edc/prompt_templates/oie_r_template.txt",
    "oie_refine_few_shot_example_file_path": "./edc/few_shot_examples/webnlg/oie_webnlg_few_shot_refine_examples.txt",
    "ee_prompt_template_file_path": "./edc/prompt_templates/ee_template.txt",
    "ee_few_shot_example_file_path": "./edc/few_shot_examples/webnlg/ee_webnlg_few_shot_examples.txt",
    "em_prompt_template_file_path": "./edc/prompt_templates/em_template.txt",
    # Extraction settings
    "target_schema_path": None,
    "enrich_schema": True,
    "output_dir": "./output/demo",
}

edc = EDC(**edc_configuration)

Models used: {'gpt-4', 'gpt-3.5-turbo'}
Loading models...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Embedding target schema...


0it [00:00, ?it/s]


In [3]:
connecting_entity_text = "Meta is a technology company. The company owns Facebook and Instagram."

entity_1_text = "Facebook is a social media and social networking service. It was created in 2004. The creator of Facebook is Mark Zuckerberg."

entity_2_text = "Instagram is a photo and video sharing platform and social networking service. It was first created by Kevin Systrom and Mike Kriege and released in 2010."

In [4]:
# input_text_list = [hs_bio_text, dh_bio_text, sn_bio_text, ycc_bio_text]
input_text_list = [connecting_entity_text, entity_1_text, entity_2_text]

In [5]:
extracted_kg_list = edc.extract_kg(input_text_list)

EDC running...
Running OIE...


  0%|          | 0/3 [00:00<?, ?it/s]

OIE: Meta is a technology company. The company owns Facebook and Instagram.
 -> [['Meta', 'type', 'technology_company'], ['Meta', 'owns', 'Facebook'], ['Meta', 'owns', 'Instagram']]



 33%|███▎      | 1/3 [00:04<00:08,  4.02s/it]

OIE: Facebook is a social media and social networking service. It was created in 2004. The creator of Facebook is Mark Zuckerberg.
 -> [['Facebook', 'type', 'social_media'], ['Facebook', 'type', 'social_networking_service'], ['Facebook', 'createdIn', '2004'], ['Facebook', 'creator', 'Mark_Zuckerberg']]



 67%|██████▋   | 2/3 [00:08<00:04,  4.01s/it]

OIE: Instagram is a photo and video sharing platform and social networking service. It was first created by Kevin Systrom and Mike Kriege and released in 2010.
 -> [['Instagram', 'isA', 'photo_and_video_sharing_platform'], ['Instagram', 'isA', 'social_networking_service'], ['Instagram', 'createdBy', 'Kevin_Systrom'], ['Instagram', 'createdBy', 'Mike_Kriege'], ['Instagram', 'releaseYear', '2010']]



100%|██████████| 3/3 [00:12<00:00,  4.11s/it]

Running Schema Definition...



  0%|          | 0/3 [00:00<?, ?it/s]

SD: Instagram is a photo and video sharing platform and social networking service. It was first created by Kevin Systrom and Mike Kriege and released in 2010., [['Meta', 'type', 'technology_company'], ['Meta', 'owns', 'Facebook'], ['Meta', 'owns', 'Instagram']]
 -> {'owns': 'The subject entity possesses or has ownership of the object entity.', 'type': 'The subject entity belongs to the category or type specified by the object entity.'}



 33%|███▎      | 1/3 [00:00<00:01,  1.16it/s]

SD: Instagram is a photo and video sharing platform and social networking service. It was first created by Kevin Systrom and Mike Kriege and released in 2010., [['Facebook', 'type', 'social_media'], ['Facebook', 'type', 'social_networking_service'], ['Facebook', 'createdIn', '2004'], ['Facebook', 'creator', 'Mark_Zuckerberg']]
 -> {'creator': 'The subject entity was created by the person specified by the object entity.', 'createdIn': 'The subject entity was created or established in the year specified by the object entity.', 'type': 'The subject entity belongs to the type or category specified by the object entity.'}



 67%|██████▋   | 2/3 [00:02<00:01,  1.03s/it]

SD: Instagram is a photo and video sharing platform and social networking service. It was first created by Kevin Systrom and Mike Kriege and released in 2010., [['Instagram', 'isA', 'photo_and_video_sharing_platform'], ['Instagram', 'isA', 'social_networking_service'], ['Instagram', 'createdBy', 'Kevin_Systrom'], ['Instagram', 'createdBy', 'Mike_Kriege'], ['Instagram', 'releaseYear', '2010']]
 -> {'createdBy': 'The subject entity was created by the person or entity specified by the object entity.', 'releaseYear': 'The subject entity was released or established in the year specified by the object entity.', 'isA': 'The subject entity belongs to the category or type specified by the object entity.'}



100%|██████████| 3/3 [00:03<00:00,  1.03s/it]

Running Schema Canonicalization...



  0%|          | 0/3 [00:00<?, ?it/s]

Schema Canonicalization: Meta is a technology company. The company owns Facebook and Instagram.
['Meta', 'type', 'technology_company'] -> ['Meta', 'type', 'technology_company']
['Meta', 'owns', 'Facebook'] -> ['Meta', 'owns', 'Facebook']
['Meta', 'owns', 'Instagram'] -> ['Meta', 'owns', 'Instagram']


 33%|███▎      | 1/3 [00:04<00:09,  4.67s/it]

Schema Canonicalization: Facebook is a social media and social networking service. It was created in 2004. The creator of Facebook is Mark Zuckerberg.
['Facebook', 'type', 'social_media'] -> ['Facebook', 'type', 'social_media']
['Facebook', 'type', 'social_networking_service'] -> ['Facebook', 'type', 'social_networking_service']
['Facebook', 'createdIn', '2004'] -> ['Facebook', 'createdIn', '2004']
['Facebook', 'creator', 'Mark_Zuckerberg'] -> ['Facebook', 'creator', 'Mark_Zuckerberg']


 67%|██████▋   | 2/3 [00:09<00:04,  4.74s/it]

Schema Canonicalization: Instagram is a photo and video sharing platform and social networking service. It was first created by Kevin Systrom and Mike Kriege and released in 2010.
['Instagram', 'isA', 'photo_and_video_sharing_platform'] -> ['Instagram', 'type', 'photo_and_video_sharing_platform']
['Instagram', 'isA', 'social_networking_service'] -> ['Instagram', 'type', 'social_networking_service']
['Instagram', 'createdBy', 'Kevin_Systrom'] -> ['Instagram', 'creator', 'Kevin_Systrom']
['Instagram', 'createdBy', 'Mike_Kriege'] -> ['Instagram', 'creator', 'Mike_Kriege']
['Instagram', 'releaseYear', '2010'] -> ['Instagram', 'createdIn', '2010']


100%|██████████| 3/3 [00:15<00:00,  5.19s/it]


In [None]:
from typing import List
def combine_graphs(graphs: List[List[List[str]]]):
    all_triplets = []
    for g in graphs:
        for t in g:
            all_triplets.append(t)
    return all_triplets

combined_oie_graph = combine_graphs(extracted_kg_list[0])
combined_canonicalized_graph = combine_graphs(extracted_kg_list[1])

In [None]:
# import time
# open_graph_file_path = "./edc/utils/open_graph.txt"
# canon_graph_file_path = "./edc/utils/canon_graph.txt"
# oie_file = open(open_graph_file_path, 'w')
# canon_file = open(canon_graph_file_path, 'w')
# for oie_graph in extracted_kg_list[0]:
#     for triple in oie_graph:
#         oie_file.write(str(triple) + "\n")
#     oie_file.flush()
#     time.sleep(5)

# for canon_graph in extracted_kg_list[1]:
#     for triple in canon_graph:
#         canon_file.write(str(triple) + "\n")
#     canon_file.flush()
#     time.sleep(5)

In [6]:
entity_3_text = "Meta also owns WhatsApp, WhatsApp is an instant messaging service. It was founded by Brian Acton and Jan Koum in 2009."

In [7]:
entity3_kg_list = edc.extract_kg([entity_3_text])

EDC running...
Running OIE...


  0%|          | 0/1 [00:00<?, ?it/s]

OIE: Meta also owns WhatsApp, WhatsApp is an instant messaging service. It was founded by Brian Acton and Jan Koum in 2009.
 -> [['Meta', 'owns', 'WhatsApp'], ['WhatsApp', 'type', 'instant_messaging_service'], ['WhatsApp', 'foundedBy', 'Brian_Acton'], ['WhatsApp', 'foundedBy', 'Jan_Koum'], ['WhatsApp', 'foundedYear', '2009']]



100%|██████████| 1/1 [00:04<00:00,  4.50s/it]

Running Schema Definition...



  0%|          | 0/1 [00:00<?, ?it/s]

SD: Meta also owns WhatsApp, WhatsApp is an instant messaging service. It was founded by Brian Acton and Jan Koum in 2009., [['Meta', 'owns', 'WhatsApp'], ['WhatsApp', 'type', 'instant_messaging_service'], ['WhatsApp', 'foundedBy', 'Brian_Acton'], ['WhatsApp', 'foundedBy', 'Jan_Koum'], ['WhatsApp', 'foundedYear', '2009']]
 -> {'foundedBy': 'The subject entity was founded by the person or entity specified by the object entity.', 'foundedYear': 'The subject entity was founded in the year specified by the object entity.', 'owns': 'The subject entity possesses or has ownership of the object entity.', 'type': 'The subject entity belongs to the type or category specified by the object entity.'}



100%|██████████| 1/1 [00:01<00:00,  1.38s/it]

Running Schema Canonicalization...



  0%|          | 0/1 [00:00<?, ?it/s]

Schema Canonicalization: Meta also owns WhatsApp, WhatsApp is an instant messaging service. It was founded by Brian Acton and Jan Koum in 2009.
['Meta', 'owns', 'WhatsApp'] -> ['Meta', 'owns', 'WhatsApp']
['WhatsApp', 'type', 'instant_messaging_service'] -> ['WhatsApp', 'type', 'instant_messaging_service']
['WhatsApp', 'foundedBy', 'Brian_Acton'] -> ['WhatsApp', 'creator', 'Brian_Acton']
['WhatsApp', 'foundedBy', 'Jan_Koum'] -> ['WhatsApp', 'creator', 'Jan_Koum']
['WhatsApp', 'foundedYear', '2009'] -> ['WhatsApp', 'createdIn', '2009']


100%|██████████| 1/1 [00:05<00:00,  5.71s/it]


In [None]:
# for triple in entity3_kg_list[0][0]:
#     oie_file.write(str(triple) + "\n")
# oie_file.flush()
# time.sleep(5)

# for triple in entity3_kg_list[1][0]:
#     canon_file.write(str(triple) + "\n")
# canon_file.flush()
# time.sleep(5)

In [None]:
extra_oie_graph = combine_graphs(extracted_kg_list[0] + entity3_kg_list[0])
extra_canonicalized_graph = combine_graphs(extracted_kg_list[1] + entity3_kg_list[1])