# Set up Nebula 3 connection
## Initial a client

In [39]:
from nebula3.gclient.net import ConnectionPool
from nebula3.Config import Config
from nebula3.gclient.net import Session

# Configuration
nebula_host = "127.0.0.1"  # Replace with your Nebula Graph host
nebula_port = 9669  # Replace with your Nebula Graph port
username = "root"  # Replace with your Nebula Graph username
password = "nebula"  # Replace with your Nebula Graph password
space_name = "news"  # Replace with your Nebula Graph space name

# Create a configuration object
config = Config()
config.max_connection_pool_size = 10

# Initialize the connection pool
connection_pool = ConnectionPool()
assert connection_pool.init([(nebula_host, nebula_port)], config), "Failed to initialize the connection pool"

# Create a session
session = connection_pool.get_session(username, password)
assert (session := connection_pool.get_session(username, password)), "Failed to create a session"

## Create Space

In [2]:
# Create a space
create_space_query = f"""
CREATE SPACE IF NOT EXISTS {space_name} (partition_num=15, replica_factor=1, vid_type=FIXED_STRING(3000));
"""
session.execute(create_space_query)
print(f"Space '{space_name}' created and selected successfully.")


Space 'news' created and selected successfully.


## Use Space

In [3]:
# Use the newly created space
import time


for _ in range(10):
    use_space_query = f"USE {space_name};"
    query_result = session.execute(use_space_query)
    if query_result.is_succeeded():
        break
    time.sleep(5)
else:
    assert query_result.is_succeeded(), query_result.error_msg()

## Create tag `entity`

In [4]:
query = f"USE {space_name}; CREATE tag `entity` (`name` string NULL  ) "

result = session.execute(query)

In [5]:
result.error_msg()

''

## Create tag `relationship`

In [6]:
query = "CREATE edge `relationship` (`relationship` string NOT NULL, source string NOT NULL  ) "

session.execute(query)

ResultSet(None)

## Create index for searching

In [7]:
query = "CREATE TAG INDEX `enitiy_index` on `entity`(`name`(100)) "
query_result = session.execute(query)
assert query_result.is_succeeded(), query_result.error_msg()

# Ingest data

## News data for BBC

In [8]:
bbc_news: str = """Donald Trump took the stage on Thursday night at the Republican National Convention like a conquering hero. He had cheated death. His Democratic opponents were tearing themselves apart.

His loyalists, who now fill the ranks of his party, packed the Milwaukee arena and cheered enthusiastically throughout his hour-and-a-half speech.
He pledged to serve all Americans if elected then recounted, in a subdued, but almost messianic tone, his brush with a spray of bullets. Some delegates even wore bandages over their right ears like their injured political idol in tribute to him.
"I stand before you in this arena only by the grace of almighty God,” he said. “Over the last few days, many people have said it was a providential moment.” He spoke of dropping to the ground as bullets flew past him and how his supporters had “great sorrow on their faces”.
“When I rose, surrounded by Secret Service, the crowd was confused because they thought I was dead,” he said.
The unity message and its powerful delivery made for a unique convention speech and a remarkable Trump one. But the rest of his speech was more traditional convention fare.
Although he called for ending the “partisan witch hunts” against him, he avoided the extended forays into 2020 election denial that have at times dominated his rally speeches, and he mostly replaced his normal pointed attacks on individual opponents with calls for unity.
There was classic Trump in there too - dark and false claims, sometimes during extended improvisations.
Trump’s performance hinted that for all the talk of a changed man after the attempt on his life and for all the more organised, focused operation behind him, the former president is still inclined to veer off-script, even in the most momentous of occasions.
The question many Americans could be wondering now is which version of Trump will lead the country should he beat Democrat Joe Biden in November. Looking back at the last four days offers some clues.
"""
# Credit: https://www.bbc.com/news/articles/cqv5y29qnpgo
source_bbc = "https://www.bbc.com/news/articles/cqv5y29qnpgo"
news_list = [line for line in bbc_news.splitlines() if line]
    

## Know graph extraction
### LLM

In [9]:
import ollama

MODEL = "qwen2:7b-instruct-q6_K"

### Filling the knowledge Graph prompt template

In [10]:
from string import Template

DEFAULT_KG_TRIPLET_EXTRACT_TMPL = Template("""
Some text is provided below. Given the text, extract up to ${max_knowledge_triplets} knowledge triplets in the form of (subject, predicate, object, extracting_area). Avoid stopwords. You must use the given background context as a footnote. Your output must not be extracted from background context
---------------------
Example:
Background context: "Donald Trump took the stage on Thursday night at the Republican National Convention like a conquering hero. He had cheated death. His Democratic opponents were tearing themselves apart."
Text: "His loyalists, who now fill the ranks of his party, packed the Milwaukee arena and cheered enthusiastically throughout his hour-and-a-half speech."
Triplets:
{
        "results": [
            ["Donald Trump", "took the stage", "on Thursday night at the Republican National Convention", "background_context"],
            ["Donald Trump", "cheated death", "", "background_context"],
            ["Donald Trump's Democratic opponents", "were tearing themselves apart", "", "background_context"],
            ["Donald Trump's loyalists", "fill", "the ranks of Donald Trump's party", "text"],
            ["Donald Trump's loyalists", "packed", "the Milwaukee arena", "text"],
            ["Donald Trump's loyalists", "cheered throughout", "Donald Trump's hour-and-a-half speech", "text"],
        ]
}
---------------------
${background_context}
Text: ${text}
Triplets:""")

# DEFAULT_KG_TRIPLET_EXTRACT_TMPL = Template("""
# Some text is provided below. Given the text, extract up to ${max_knowledge_triplets} knowledge triplets in the form of (subject, predicate, object). Avoid stopwords. You must use the given background context as a footnote. Your output must not be extracted from background context
# ---------------------
# Example:
# Background context: "Donald Trump took the stage on Thursday night at the Republican National Convention like a conquering hero. He had cheated death. His Democratic opponents were tearing themselves apart."
# Text: "His loyalists, who now fill the ranks of his party, packed the Milwaukee arena and cheered enthusiastically throughout his hour-and-a-half speech."
# Triplets:
# {
#         "results": [
#             ["Donald Trump's loyalists", "fill", "the ranks of Donald Trump's party"],
#             ["Donald Trump's loyalists", "packed", "the Milwaukee arena"],
#             ["Donald Trump's loyalists", "cheered throughout", "Donald Trump's hour-and-a-half speech"],
#         ]
# }
# ---------------------
# Your turn:
# ${background_context}
# Text: ${text}
# Triplets:""")

def complete_kg_prompt(
    text: str, background_context: str = "", max_knowledge_triplets: int = 20
) -> str:
    if background_context:
        background_context = "Background context: " + f'"{background_context}"'
    prompt = DEFAULT_KG_TRIPLET_EXTRACT_TMPL.safe_substitute(
        text=f'"{text}"',
        background_context=background_context,
        max_knowledge_triplets=max_knowledge_triplets,
    )
    return prompt.strip()


prompt = complete_kg_prompt(
    "His loyalists, who now fill the ranks of his party, packed the Milwaukee arena and cheered enthusiastically throughout his hour-and-a-half speech.",
    "Donald Trump took the stage on Thursday night at the Republican National Convention like a conquering hero. He had cheated death. His Democratic opponents were tearing themselves apart.",
)
print(prompt)

Some text is provided below. Given the text, extract up to 20 knowledge triplets in the form of (subject, predicate, object, extracting_area). Avoid stopwords. You must use the given background context as a footnote. Your output must not be extracted from background context
---------------------
Example:
Background context: "Donald Trump took the stage on Thursday night at the Republican National Convention like a conquering hero. He had cheated death. His Democratic opponents were tearing themselves apart."
Text: "His loyalists, who now fill the ranks of his party, packed the Milwaukee arena and cheered enthusiastically throughout his hour-and-a-half speech."
Triplets:
{
        "results": [
            ["Donald Trump", "took the stage", "on Thursday night at the Republican National Convention", "background_context"],
            ["Donald Trump", "cheated death", "", "background_context"],
            ["Donald Trump's Democratic opponents", "were tearing themselves apart", "", "backgr

### Test extracting data

In [11]:
import json
from typing import TypedDict
from retry import retry as classic_retry


class TripetResult(TypedDict):
    results: list[tuple[str, str, str]]


@classic_retry(tries=5, exceptions=json.JSONDecodeError)
def get_tripets(prompt: str) -> list[tuple[str, str, str]]:
    response = ollama.generate(prompt=prompt, model=MODEL)["response"]
    results = json.loads(response)["results"]
    results = [tuple(result) for result in results]
    return results


def get_kg_tripet(
    text: str,
    main_context: str = "",
    max_knowledge_triplets: int = 20,
    all_context: bool = False,
) -> list[tuple[str, str, str]]:

    prompt = complete_kg_prompt(text, main_context, max_knowledge_triplets)
    results = get_tripets(prompt)
    results = [(*elem,) for *elem, r in results if (r == "text") or all_context]
    return results


response = get_kg_tripet(news_list[-1], news_list[0], 1000)
print(response)

[('The question', 'many Americans could be wondering now', 'is which version of Trump will lead the country should he beat Democrat Joe Biden in November'), ('Looking back at the last four days', 'offers some clues', 'about the future leadership style of Donald Trump if he wins against Joe Biden')]


In [12]:
response = get_kg_tripet(news_list[0], news_list[0], 1000, all_context=True)
print(response)

[('Donald Trump', 'took the stage', 'on Thursday night at the Republican National Convention'), ('Donald Trump', 'cheated death', ''), ("Donald Trump's Democratic opponents", 'were tearing themselves apart', '')]


# Ingest knowledge graph into Nebula

In [13]:
from typing import Optional, Any, Dict
from tenacity import retry, stop_after_attempt, wait_random_exponential

QUOTE = '"'
RETRY_TIMES = 3
WAIT_MIN_SECONDS = 0.5
WAIT_MAX_SECONDS = 10


def escape_str(value: str) -> str:
    """Escape String for NebulaGraph Query."""
    patterns = {
        '"': " ",
    }
    for pattern in patterns:
        if pattern in value:
            value = value.replace(pattern, patterns[pattern])

    return value.strip()


@retry(
    wait=wait_random_exponential(min=WAIT_MIN_SECONDS, max=WAIT_MAX_SECONDS),
    stop=stop_after_attempt(RETRY_TIMES),
)
def execute(query: str) -> Any:
    """Execute query.

    Args:
        query: Query.
        param_map: Parameter map.

    Returns:
        Query result.
    """
    # Clean the query string by removing triple backticks
    query = query.replace("```", "").strip()
    return session.execute(query)


def hash_string_to_rank(string: str) -> int:
    # get signed 64-bit hash value
    signed_hash = hash(string)

    # reduce the hash value to a 64-bit range
    mask = (1 << 64) - 1
    signed_hash &= mask

    # convert the signed hash value to an unsigned 64-bit integer
    if signed_hash & (1 << 63):
        unsigned_hash = -((signed_hash ^ mask) + 1)
    else:
        unsigned_hash = signed_hash

    return unsigned_hash


def upsert_triplet(subj: str, rel: str, obj: str, source: str) -> None:
    """Add triplet."""
    # Note, to enable leveraging existing knowledge graph,
    # the (triplet -- property graph) mapping
    #   makes (n:1) edge_type.prop_name --> triplet.rel
    # thus we have to assume rel to be the first edge_type.prop_name
    # here in upsert_triplet().
    # This applies to the type of entity(tags) with subject and object, too,
    # thus we have to assume subj to be the first entity.tag_name

    # lower case subj, rel, obj
    subj = escape_str(subj)
    rel = escape_str(rel)
    obj = escape_str(str(obj))

    subj_field = f"{QUOTE}{subj}{QUOTE}"
    obj_field = f"{QUOTE}{obj}{QUOTE}"
    edge_field = f"{subj_field}->{obj_field}"
    
    subj_value = f'"{subj}"' if subj is not None else "NULL"
    obj_value = f'"{obj}"' if obj is not None else "NULL"

    #     edge_type = self._edge_types[0]
    edge_type = "relationship"
    #     rel_prop_name = self._rel_prop_names[0]
    rel_prop_name = "relationship,"
    #     entity_type = self._tags[0]
    entity_type = "entity"
    rel_hash = hash_string_to_rank(rel)
    dml_query = (
        f"INSERT VERTEX `{entity_type}`(name) "
        f"  VALUES {subj_field}:({subj_value});"
        f"INSERT VERTEX `{entity_type}`(name) "
        f"  VALUES {obj_field}:({obj_value});"
        f"INSERT EDGE `relationship`(`relationship`, `source`) "
        f"  VALUES "
        f"{edge_field}"
        f"@{rel_hash}:({QUOTE}{rel}{QUOTE}, {QUOTE}{source}{QUOTE});"
    )

    result = execute(dml_query)


    assert (
        result and result.is_succeeded()
    ), f"Failed to query: {dml_query} Error msg: {result.error_msg()}"


for i, line in enumerate(news_list):
    all_context = i == 0
    response = get_kg_tripet(line,  news_list[0], 1000, all_context = all_context)
    for tripet in response:
        if len(tripet) == 3:
            subject, relation, obj = tripet
        elif len(tripet) == 2:
            subject, relation, obj = (*tripet, None)
        else:
            print(f"Error at {tripet}")
            continue
        result = upsert_triplet(subject, relation, obj, source_bbc)


# Query

In [32]:
import ollama
from string import Template

CONTEXT_RETRIEVAL = Template("""
# Instruction:
Extract subject, relation and object to query in a graph database to objain information to complete a given task and answer a given question return as an array of JSON

# Example:
Prompt: "What's happened to Bob?"
Response:
[{"subject": "Bob"}]

# Example:
Prompt: "Write a story about Bob"
Response:
[{"subject": "Bob"}]

Prompt: "Where's Bob going?"
Response:
[
    {"subject": "Bob"},
    {"relation": "is going"}
]

Prompt: "How do Bob and Ben relate?"
Response:
[
    {"subject": "Bob"},
    {"object": "Ben"}
]
---------
# Your turn:
Prompt: "${question}"
Response:
""")

user_prompt = "Write news about Donald Trump"
prompt = CONTEXT_RETRIEVAL.safe_substitute(question=user_prompt)
tripets = json.loads(ollama.generate(prompt=prompt, model=MODEL)["response"])
print(tripets)

[{'subject': 'Donald Trump'}]


In [15]:
from typing import Optional
import pandas as pd
import numpy as np


def get_query(
    subject: Optional[str] = None,
    relationship: Optional[str] = None,
    object_: Optional[str] = None,
) -> str:
    """Get ngql query
    # TODO: Add Sanitizing

    """
    conditions: list[str] = []
    if subject:
        conditions.append(f'LOWER(p.entity.name) CONTAINS LOWER("{subject}")')
    if relation:
        conditions.append((f'LOWER(r.relationship) CONTAINS LOWER("{relation}")'))
    if object_:
        conditions.append(f'LOWER(q.entity.name) CONTAINS LOWER("{object_}")')
    where_query = "\t"+" OR\n\t".join(conditions)

    query = f"""
MATCH (p:entity)-[r:relationship]->(q:entity)
WHERE (
    {where_query}
)
RETURN p.entity.name AS subject,r.relationship AS relationship, q.entity.name AS object;
"""
    return query


def run_query(space: str, query: str):

    # Use the space
    session.execute(f"USE {space}")

    # Execute the query
    result = session.execute(query)

    # Check if the query was successful
    if not result.is_succeeded():
        raise ValueError("Query failed: {}".format(result.error_msg()))

    # Process the results
    return result.as_data_frame()



results = pd.DataFrame()
for tripet in tripets:
    query = get_query(**tripet)
    result = run_query("news", query)
    results = pd.concat([results, result]).reset_index(drop=True)
    
results.replace("None", value="", inplace=True)

In [28]:
results.head(5)

Unnamed: 0,subject,relationship,object,context
0,Donald Trump's speech,was more traditional convention fare,,Donald Trump's speech was more traditional con...
1,Donald Trump's Democratic opponents,were tearing themselves apart,,Donald Trump's Democratic opponents were teari...
2,Donald Trump's loyalists,fill,the ranks of Donald Trump's party,Donald Trump's loyalists fill the ranks of Don...
3,Donald Trump's loyalists,cheered throughout,Donald Trump's hour-and-a-half speech,Donald Trump's loyalists cheered throughout Do...
4,Donald Trump's loyalists,packed,the Milwaukee arena,Donald Trump's loyalists packed the Milwaukee ...


In [20]:
results["context"] = results["subject"] + " " + results["relationship"] + " " + results["object"]

In [31]:
context = "- " + "\n- ".join(results["context"])

In [48]:
USER_PROXY = Template("""
# Instruction:
You are a user assistant. Please response user's request. You must return the output only and do not include prologue, prefix and suffix

# Supporting contexts:
${context}
""")

user_proxy_prompt = USER_PROXY.safe_substitute(context = context)

response = ollama.generate(prompt=user_proxy_prompt, model=MODEL, stream=True)

response_text = ""
for r in response:
    c = r["response"]
    response_text += c
    print(c, end="")


Donald Trump's speech at the Republican National Convention was more traditional in nature, focusing on calls for unity and service to all Americans rather than his usual pointed attacks on opponents. His loyalists cheered throughout the hour-and-a-half address, filling the Milwaukee arena with their support. The event offered insights into which version of Trump might lead the country if he wins against Democratic nominee Joe Biden in November. He mentioned overcoming a brush with death and called for an end to "partisan witch hunts," emphasizing a more conciliatory tone than his past rally speeches. Despite some sorrow among supporters, the atmosphere was generally supportive, with many attendees expressing their gratitude for Trump's leadership.

In [49]:
from IPython.display import Markdown

Markdown(response_text)

Donald Trump's speech at the Republican National Convention was more traditional in nature, focusing on calls for unity and service to all Americans rather than his usual pointed attacks on opponents. His loyalists cheered throughout the hour-and-a-half address, filling the Milwaukee arena with their support. The event offered insights into which version of Trump might lead the country if he wins against Democratic nominee Joe Biden in November. He mentioned overcoming a brush with death and called for an end to "partisan witch hunts," emphasizing a more conciliatory tone than his past rally speeches. Despite some sorrow among supporters, the atmosphere was generally supportive, with many attendees expressing their gratitude for Trump's leadership.

In [50]:
user_prompt = "Write a report about Donald Trump"
prompt = CONTEXT_RETRIEVAL.safe_substitute(question=user_prompt)
tripets = json.loads(ollama.generate(prompt=prompt, model=MODEL)["response"])


results = pd.DataFrame()
for tripet in tripets:
    query = get_query(**tripet)
    result = run_query("news", query)
    results = pd.concat([results, result]).reset_index(drop=True)
    
results.replace("None", value="", inplace=True)

user_proxy_prompt = USER_PROXY.safe_substitute(context = context)

response = ollama.generate(prompt=user_proxy_prompt, model=MODEL, stream=True)

for r in response:
    print(r["response"], end="")

Donald Trump's speech at the Republican National Convention was a departure from his usual style, focusing more on traditional fare and calls for unity rather than personal attacks. His loyalists, who filled the Milwaukee arena, were enthusiastic throughout the hour-and-a-half address. With polls showing Donald Trump's Democratic opponent tearing herself apart, this speech offered clues about how he might lead the country if re-elected in November. Notably, Trump mentioned surviving a brush with death and called for an end to partisan witch hunts against him. He also spoke of dropping to his knees as bullets flew past him, emphasizing his reliance on God's grace. While at times, he avoided extended discussions about election denial that have been present in previous rally speeches, focusing instead on serving all Americans and acknowledging this moment as providential.