In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''
from argparse import ArgumentParser

parser = ArgumentParser()
# OIE module setting
parser.add_argument(
    "--oie_llm", default="mistralai/Mistral-7B-Instruct-v0.2", help="LLM used for open information extraction."
)
parser.add_argument(
    "--oie_prompt_template_file_path",
    default="./prompt_templates/oie_template.txt",
    help="Promp template used for open information extraction.",
)
parser.add_argument(
    "--oie_few_shot_example_file_path",
    default="./few_shot_examples/default/oie_few_shot_examples.txt",
    help="Few shot examples used for open information extraction.",
)

# Schema Definition setting
parser.add_argument(
    "--sd_llm", default="mistralai/Mistral-7B-Instruct-v0.2", help="LLM used for schema definition."
)
parser.add_argument(
    "--sd_prompt_template_file_path",
    default="./prompt_templates/sd_template.txt",
    help="Prompt template used for schema definition.",
)
parser.add_argument(
    "--sd_few_shot_example_file_path",
    default="./few_shot_examples/default/sd_few_shot_examples.txt",
    help="Few shot examples used for schema definition.",
)

# Schema Canonicalization setting
parser.add_argument(
    "--sc_llm",
    default="mistralai/Mistral-7B-Instruct-v0.2",
    help="LLM used for schema canonicaliztion verification.",
)
parser.add_argument(
    "--sc_prompt_template_file_path",
    default="./prompt_templates/sc_template.txt",
    help="Prompt template used for schema canonicalization verification.",
)
parser.add_argument(
    "--sc_embedder",
    default="all-mpnet-base-v2"
)

# Refinement setting
parser.add_argument("--sr_adapter_path", default=None, help="Path to adapter of schema retriever.")
parser.add_argument(
    "--oie_refine_prompt_template_file_path",
    default="./prompt_templates/oie_r_template.txt",
    help="Prompt template used for refined open information extraction.",
)
parser.add_argument(
    "--oie_refine_few_shot_example_file_path",
    default="./few_shot_examples/default/oie_few_shot_refine_examples.txt",
    help="Few shot examples used for refined open information extraction.",
)
parser.add_argument(
    "--ee_llm", default="mistralai/Mistral-7B-Instruct-v0.2", help="LLM used for entity extraction."
)
parser.add_argument(
    "--ee_prompt_template_file_path",
    default="./prompt_templates/ee_template.txt",
    help="Prompt templated used for entity extraction.",
)
parser.add_argument(
    "--ee_few_shot_example_file_path",
    default="./few_shot_examples/default/ee_few_shot_examples.txt",
    help="Few shot examples used for entity extraction.",
)
parser.add_argument(
    "--em_prompt_template_file_path",
    default="./prompt_templates/em_template.txt",
    help="Prompt template used for entity merging.",
)

# Input setting
parser.add_argument(
    "--input_text_file_path",
    default="./datasets/example.txt",
    help="File containing input texts to extract KG from, each line contains one piece of text.",
)
parser.add_argument(
    "--target_schema_path",
    default=None,
    help="File containing the target schema to align to.",
)
parser.add_argument("--refinement_iterations", default=0, type=int, help="Number of iteration to run.")
parser.add_argument(
    "--enrich_schema",
    action="store_true",
    help="Whether un-canonicalizable relations should be added to the schema.",
)

# Output setting
parser.add_argument("--output_dir", default="./output/tmp", help="Directory to output to.")

defaults = vars(parser.parse_args([]))

In [2]:
from edc.edc_framework import EDC
edc = EDC(**defaults)

SyntaxError: invalid syntax (edc_framework.py, line 335)

In [None]:
input_text_list = ["Bowen is a student at NUS."]

In [None]:
edc.oie(input_text_list)

In [None]:
edc.schema_definition(input_text_list, [[['Bowen', 'studentOf', 'NUS']]], free_model=True)

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("intfloat/e5-mistral-7b-instruct", device="cpu")

In [None]:
queries = [
    "how much protein should a female eat",
    "summit define",
]
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments."
]


In [None]:
query_embeddings = model.encode(queries, prompt_name="web_search_query")
document_embeddings = model.encode(documents)

scores = (query_embeddings @ document_embeddings.T) * 100
print(scores.tolist())

In [None]:
model.prompts