In [2]:
# My OpenAI Key
import os
os.environ['OPENAI_API_KEY'] = "INSERT OPENAI KEY"

In [3]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

## Using Knowledge Graph with NebulaGraphStore

#### Building the Knowledge Graph

In [4]:
from llama_index import (
    KnowledgeGraphIndex,
    LLMPredictor,
    ServiceContext,
    SimpleDirectoryReader,
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore


from langchain import OpenAI
from IPython.display import Markdown, display

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [5]:
documents = SimpleDirectoryReader('../../../../examples/paul_graham_essay/data').load_data()

In [6]:
# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-002"))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size_limit=512)

## Prepare for NebulaGraph

In [7]:
%pip install nebula3-python

os.environ['NEBULA_USER'] = "root"
os.environ['NEBULA_PASSWORD'] = "nebula"
os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669" # assumed we have NebulaGraph installed locally

# Assume that the graph has already been created
    # Create a NebulaGraph cluster with:
    # Option 0: `curl -fsSL nebula-up.siwei.io/install.sh | bash`
    # Option 1: NebulaGraph Docker Extension https://hub.docker.com/extensions/weygu/nebulagraph-dd-ext
# and that the graph space is called "test"
    # If not, create it with the following commands from NebulaGraph's console:
    # CREATE SPACE test(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);
    # :sleep 10;
    # USE test;
    # CREATE TAG entity();
    # CREATE EDGE rel(predicate string);

space_name = "test"
edge_types, rel_prop_names = ["rel"], ["predicate"] # default, could be omit if create from an empty kg
tags = ["entity"] # default, could be omit if create from an empty kg

Note: you may need to restart the kernel to use updated packages.


## Instantiate GPTNebulaGraph KG Indexes

In [8]:
# graph_store = NebulaGraphStore(space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags)
graph_store = NebulaGraphStore(space_name=space_name)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while! 
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    service_context=service_context,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags
)

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 36098 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens


#### Querying the Knowledge Graph

In [14]:
query_engine = index.as_query_engine()


response = query_engine.query(
    "Tell me more about Interleaf"
)

INFO:llama_index.indices.knowledge_graph.retrievers:> Starting query: Tell me more about Interleaf
INFO:llama_index.indices.knowledge_graph.retrievers:> Query keywords: ['Interleaf', 'history', 'software', 'company']
ERROR:llama_index.indices.knowledge_graph.retrievers:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6aa6a716-7390-4783-955b-8169fab25bb1: worth trying.

Our teacher, professor Ulivi, was a nice guy. He could see I w...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 79f2a1b4-80bb-416f-a259-ebfc3136b2fe: on a map of New York City: if you zoom in on the Upper East Side, there's a t...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 1e707b8c-b62a-4c1a-a908-c79e77b9692b: buyers pay a lot for such work. [6]

There were plenty of earnest students to...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 31c2f53c-9

In [17]:
display(Markdown(f"<b>{response}</b>"))

<b>

Interleaf was a software company that made software for creating documents. Their software was inspired by Emacs, and included a scripting language that was a dialect of Lisp. The company was started in the 1990s, and eventually went out of business.</b>

In [12]:
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf"
)

INFO:llama_index.indices.knowledge_graph.retrievers:> Starting query: Tell me more about what the author worked on at Interleaf
INFO:llama_index.indices.knowledge_graph.retrievers:> Query keywords: ['Interleaf', 'author', 'work']
ERROR:llama_index.indices.knowledge_graph.retrievers:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6aa6a716-7390-4783-955b-8169fab25bb1: worth trying.

Our teacher, professor Ulivi, was a nice guy. He could see I w...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 79f2a1b4-80bb-416f-a259-ebfc3136b2fe: on a map of New York City: if you zoom in on the Upper East Side, there's a t...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 1e707b8c-b62a-4c1a-a908-c79e77b9692b: buyers pay a lot for such work. [6]

There were plenty of earnest students to...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with id

In [13]:
display(Markdown(f"<b>{response}</b>"))

<b>

The author worked on a software that allowed users to create documents, which was inspired by Emacs. The software had a scripting language that was a dialect of Lisp, and the author was responsible for writing things in this language.

The author also worked on a software that allowed users to generate web sites. This software was a web app and was written in a dialect of Lisp. The author was also responsible for writing things in this language.</b>

#### Query with embeddings

In [None]:
# NOTE: can take a while! 

index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    service_context=service_context,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
)

query_engine = index.as_query_engine(
    include_text=True, 
    response_mode="tree_summarize",
    embedding_mode='hybrid',
    similarity_top_k=5
)

In [None]:
# query using top 3 triplets plus keywords (duplicate triplets are removed)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf"
)

In [None]:
display(Markdown(f"<b>{response}</b>"))

#### Query with more global(cross node) context

In [None]:
query_engine = index.as_query_engine(
    include_text=True, 
    response_mode="tree_summarize",
    embedding_mode='hybrid',
    similarity_top_k=5,
    explore_global_knowledge=True,
)

response = query_engine.query(
    "Tell me more about what the author and Lisp"
)

#### Visualizing the Graph

In [27]:
## create graph
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")

#### [Optional] Try building the graph and manually add triplets!

In [None]:
from llama_index.node_parser import SimpleNodeParser

In [None]:
node_parser = SimpleNodeParser()

In [None]:
nodes = node_parser.get_nodes_from_documents(documents)

In [None]:
# not yet implemented

# initialize an empty index for now 
index = KnowledgeGraphIndex.from_documents(
    [],
    storage_context=storage_context
)


In [None]:
# add keyword mappings and nodes manually
# add triplets (subject, relationship, object) 

# for node 0
node_0_tups = [("author", "worked on", "writing"), ("author", "worked on", "programming")]
for tup in node_0_tups:
    index.upsert_triplet_and_node(tup, nodes[0])
    
# for node 1
node_1_tups = [
    ('Interleaf', 'made software for', 'creating documents'),
    ('Interleaf', 'added', 'scripting language'),
    ('software', 'generate', 'web sites')
]
for tup in node_1_tups:
    index.upsert_triplet_and_node(tup, nodes[1])

In [None]:
query_engine = index.as_query_engine(
    include_text=False, 
    response_mode="tree_summarize"
)

response = query_engine.query(
    "Tell me more about Interleaf"
)

In [None]:
str(response)