In [46]:
import os
from llama_index.llms.groq import Groq
from llama_index.core.llms import ChatMessage
from llama_index.graph_stores.nebula import NebulaGraphStore
from llama_index.core import StorageContext
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import KnowledgeGraphRAGRetriever

os.environ["GROQ_API_KEY"] = "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"

llm = Groq(model="llama3-70b-8192", api_key=os.environ["GROQ_API_KEY"])

# # Initialize NebulaGraphStore
# graph_store = NebulaGraphStore(
#     space_name="llamaindex",
#     edge_types=["relationship"],
#     rel_prop_names=["relationship"],
#     tags=["entity"],
# )
# storage_context = StorageContext.from_defaults(graph_store=graph_store)


In [63]:
existing_entities = set()
existing_relationships = []

# graph_rag_retriever = KnowledgeGraphRAGRetriever(
#     storage_context=storage_context,
#     verbose=True,
# )

# # Initialize the RetrieverQueryEngine
# query_engine = RetrieverQueryEngine.from_args(
#     retriever=graph_rag_retriever,
# )

def assistive_merge(paragraphs):
    merge_prompt = "\n%%%\n".join(paragraphs)
    response = llm.chat(
        messages=[
            ChatMessage(role="system", content="You are a helpful assistant."),
            ChatMessage(role="user", content=f"Merge the following text segments delimited by (%%%):\n{merge_prompt}")
        ]
    )
    merged_text = response.message.content.replace("assistant: ", "").strip()
    return merged_text

def extract_entities_and_relationships(merged_text):
    # Include current entities and relationships in the prompt
    current_entities = "\n".join(existing_entities)
    current_relationships = "\n".join(f"{r[0]} - {r[1]} - {r[2]}" for r in existing_relationships)

    response = llm.chat(
        messages=[
            ChatMessage(role="system", content="You are a helpful assistant."),
            ChatMessage(role="user", content=(
                "Extract detailed entities and relationships without excessive redundancy from the following text:\n"
                f"{merged_text}\n"
                "Current entities:\n" + current_entities + "\n"
                "Current relationships:\n" + current_relationships + "\n"
                "BE DETAILED INITIALLY, THEN COMPARE TO CURRENT RELATIONSHIPS AND SEE IF FIT TO ADD NEW ONE\n"
                "DO NOT HAVE DUPLICATES\n"
                "Please provide the output strictly in the following format with no variance:\n"
                "1. Source Entity: <source entity>\n"
                "2. Relationship: <relationship>\n"
                "3. Destination Entity: <destination entity>\n"
                "Ensure that each triplet is listed in the exact same format for consistency and avoid redundancy."
                "\nExamples:\n"
                "1. Source Entity: alice\n2. Relationship: is the CEO of\n3. Destination Entity: acme corp\n"
                "1. Source Entity: acme corp\n2. Relationship: will acquire\n3. Destination Entity: widget inc\n"
                "1. Source Entity: bob\n2. Relationship: is the CTO of\n3. Destination Entity: acme corp\n"
                "1. Source Entity: john\n2. Relationship: works at\n3. Destination Entity: google\n"
                "1. Source Entity: sarah\n2. Relationship: is married to\n3. Destination Entity: david\n"
                "1. Source Entity: amazon\n2. Relationship: acquired\n3. Destination Entity: whole foods\n"
                "1. Source Entity: elon musk\n2. Relationship: founded\n3. Destination Entity: spacex\n"
                "1. Source Entity: jane\n2. Relationship: lives in\n3. Destination Entity: new york\n"
                "1. Source Entity: microsoft\n2. Relationship: developed\n3. Destination Entity: windows\n"
                "1. Source Entity: professor smith\n2. Relationship: teaches\n3. Destination Entity: mathematics\n"
                "1. Source Entity: tesla\n2. Relationship: launched\n3. Destination Entity: model s\n"
                "1. Source Entity: facebook\n2. Relationship: rebranded to\n3. Destination Entity: meta\n"
                "1. Source Entity: bill gates\n2. Relationship: co-founded\n3. Destination Entity: microsoft\n"
                "1. Source Entity: jon\n2. Relationship: is married to\n3. Destination Entity: sara\n"
                "OUTPUT MODEL IN THIS FORMAT AND DETAIL\n"
            ))
        ]
    )
    structured_response = response.message.content.strip()
    triplets = parse_entities_and_relationships(structured_response)
    update_entities_and_relationships(triplets)
    return triplets

def parse_entities_and_relationships(structured_response):
    lines = structured_response.split("\n")
    triplets = []
    current_triplet = {}

    for line in lines:
        if line.startswith("1. Source Entity:"):
            if current_triplet:
                triplets.append(current_triplet)
                current_triplet = {}
            current_triplet['source'] = normalize_entity_name(line.replace("1. Source Entity:", "").strip())
        elif line.startswith("2. Relationship:"):
            current_triplet['relationship'] = line.replace("2. Relationship:", "").strip()
        elif line.startswith("3. Destination Entity:"):
            current_triplet['destination'] = normalize_entity_name(line.replace("3. Destination Entity:", "").strip())

    if current_triplet:
        triplets.append(current_triplet)

    return triplets

def normalize_entity_name(entity):
    entity = entity.lower().strip()
    entity = entity.replace("inc.", "inc").replace("corp.", "corp")
    return entity

def update_entities_and_relationships(triplets):
    global existing_entities, existing_relationships
    for triplet in triplets:
        try:
            source = normalize_entity_name(triplet['source'])
            destination = normalize_entity_name(triplet['destination'])

            if source not in existing_entities:
                existing_entities.add(source)
            if destination not in existing_entities:
                existing_entities.add(destination)

            relationship = (source, triplet['relationship'], destination)
            if relationship not in existing_relationships:
                existing_relationships.append(relationship)
        except KeyError as e:
            print(f"Missing key in triplet: {e}")

# Initial sentences
paragraph1 = "Alice, the CEO of Acme Corp, announced that the company will acquire Widget Inc."
paragraph2 = "This acquisition is expected to enhance Acme's product line and market reach."

merged_text = assistive_merge([paragraph1, paragraph2])
print("Merged Text:", merged_text)

entities_and_relationships = extract_entities_and_relationships(merged_text)
print("Entities and Relationships (Initial):")
for triplet in entities_and_relationships:
    print(f"Source: {triplet['source']}, Relationship: {triplet['relationship']}, Destination: {triplet['destination']}")

print("\nExisting Entities:", existing_entities)
print("Existing Relationships:", existing_relationships)

# New sentence
paragraph3 = "Bob, the CTO of Acme Corp, confirmed that the integration with Widget Inc. will begin next quarter."

merged_text2 = assistive_merge([merged_text, paragraph3])
print("Merged Text (After Adding New Sentence):", merged_text2)

entities_and_relationships2 = extract_entities_and_relationships(merged_text2)
print("Entities and Relationships (After Adding New Sentence):")
for triplet in entities_and_relationships2:
    print(f"Source: {triplet['source']}, Relationship: {triplet['relationship']}, Destination: {triplet['destination']}")

print("\nExisting Entities (After Adding New Sentence):", existing_entities)
print("Existing Relationships (After Adding New Sentence):", existing_relationships)

paragraph4 = "Bob and Alice are married."

merged_text3 = assistive_merge([merged_text2, paragraph4])
print("Merged Text (After Adding New Sentence):", merged_text3)

entities_and_relationships3 = extract_entities_and_relationships(merged_text3)
print("Entities and Relationships (After Adding New Sentence):")
for triplet in entities_and_relationships3:
    print(f"Source: {triplet['source']}, Relationship: {triplet['relationship']}, Destination: {triplet['destination']}")

print("\nExisting Entities (After Adding New Sentence):", existing_entities)
print("Existing Relationships (After Adding New Sentence):", existing_relationships)



Merged Text: Here is the merged text:

Alice, the CEO of Acme Corp, announced that the company will acquire Widget Inc. This acquisition is expected to enhance Acme's product line and market reach.
Entities and Relationships (Initial):
Source: alice, Relationship: is the CEO of, Destination: acme corp
Source: acme corp, Relationship: will acquire, Destination: widget inc
Source: acme corp, Relationship: has, Destination: product line
Source: acme corp, Relationship: has, Destination: market reach

Existing Entities: {'widget inc', 'alice', 'product line', 'acme corp', 'market reach'}
Existing Relationships: [('alice', 'is the CEO of', 'acme corp'), ('acme corp', 'will acquire', 'widget inc'), ('acme corp', 'has', 'product line'), ('acme corp', 'has', 'market reach')]
Merged Text (After Adding New Sentence): Here is the merged text:

Alice, the CEO of Acme Corp, announced that the company will acquire Widget Inc. This acquisition is expected to enhance Acme's product line and market rea

In [64]:
# Define the evaluation function
def evaluate_extraction(paragraph, expected_triplets):
    global existing_entities, existing_relationships
    existing_entities = set()
    existing_relationships = []
    
    entities_and_relationships = extract_entities_and_relationships(paragraph)
    
    extracted_triplets = set(
        (triplet['source'], triplet['relationship'], triplet['destination'])
        for triplet in entities_and_relationships
    )
    
    if extracted_triplets != set(expected_triplets):
        print(f"Failed: {paragraph}")
        print("Expected:", set(expected_triplets))
        print("Extracted:", extracted_triplets)
    else:
        print("Passed:", paragraph)

# Test cases
test_cases = [
    (
        "John, a software engineer at Google, created a new application.",
        [("john", "is a software engineer at", "google"), ("john", "created", "a new application")]
    ),
    (
        "Sarah, married to David, lives in New York.",
        [("sarah", "is married to", "david"), ("sarah", "lives in", "new york")]
    ),
    (
        "Amazon acquired Whole Foods in 2017.",
        [("amazon", "acquired", "whole foods in 2017")]
    ),
    (
        "Elon Musk founded SpaceX and Tesla.",
        [("elon musk", "founded", "spacex"), ("elon musk", "founded", "tesla")]
    ),
    (
        "Jane, who lives in San Francisco, works at Microsoft.",
        [("jane", "lives in", "san francisco"), ("jane", "works at", "microsoft")]
    ),
    (
        "Facebook rebranded to Meta in 2021.",
        [("facebook", "rebranded to", "meta in 2021")]
    ),
    (
        "Bill Gates co-founded Microsoft.",
        [("bill gates", "co-founded", "microsoft")]
    ),
    (
        "Tesla launched Model S.",
        [("tesla", "launched", "model s")]
    ),
    (
        "Professor Smith teaches mathematics.",
        [("professor smith", "teaches", "mathematics")]
    ),
    (
        "Apple developed the iPhone.",
        [("apple", "developed", "the iphone")]
    ),
]

# Run tests
for i, (paragraph, expected_triplets) in enumerate(test_cases):
    print(f"Test Case {i+1}:")
    evaluate_extraction(paragraph, expected_triplets)
    print()



Test Case 1:
Failed: John, a software engineer at Google, created a new application.
Expected: {('john', 'is a software engineer at', 'google'), ('john', 'created', 'a new application')}
Extracted: {('john', 'created', 'a new application'), ('john', 'is a', 'software engineer'), ('john', 'works at', 'google')}

Test Case 2:
Passed: Sarah, married to David, lives in New York.

Test Case 3:
Failed: Amazon acquired Whole Foods in 2017.
Expected: {('amazon', 'acquired', 'whole foods in 2017')}
Extracted: {('amazon', 'acquired', 'whole foods')}

Test Case 4:
Passed: Elon Musk founded SpaceX and Tesla.

Test Case 5:
Passed: Jane, who lives in San Francisco, works at Microsoft.

Test Case 6:
Failed: Facebook rebranded to Meta in 2021.
Expected: {('facebook', 'rebranded to', 'meta in 2021')}
Extracted: {('facebook', 'rebranded to', 'meta')}

Test Case 7:
Passed: Bill Gates co-founded Microsoft.

Test Case 8:
Passed: Tesla launched Model S.

Test Case 9:
Passed: Professor Smith teaches mathemat