# Knowledge Graph Index

In [1]:
# My OpenAI Key
import os
import time

import openai
os.environ['OPENAI_API_KEY'] = "EMPTY"
os.environ['OPENAI_API_BASE'] = "http://10.0.0.222:30307/v1"
openai.api_key = "EMPTY"
openai.api_base = "http://10.0.0.222:30307/v1"

#model = "Writer/camel-5b-hf"
model = "mosaicml/mpt-30b-instruct"

In [2]:
import logging
import sys

#kron extensions to llama_index to support openai compatible api
sys.path.append('../')

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

## Using Knowledge Graph

#### Building the Knowledge Graph

In [3]:

from llama_index.storage.storage_context import StorageContext
from llama_index import SimpleDirectoryReader, ServiceContext
from llama_index.graph_stores import SimpleGraphStore 
from llama_index.langchain_helpers.text_splitter import SentenceSplitter
from llama_index.node_parser import SimpleNodeParser

import tiktoken

from kron.llm_predictor.KronOpenAILLM import OpenAI
from kron.llm_predictor.KronLLMPredictor import KronLLMPredictor
from kron.indices.knowledge_graph.KronKnowledgeGraphIndex import KronKnowledgeGraphIndex 
from kron.prompts.kg_prompts import KRON_KG_TRIPLET_EXTRACT_PROMPT

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [4]:
#writer/camel uses endoftext 
from llama_index.utils import globals_helper
enc = tiktoken.get_encoding("gpt2")
tokenizer = lambda text: enc.encode(text, allowed_special={"<|endoftext|>"})
globals_helper._tokenizer = tokenizer

In [5]:
documents = SimpleDirectoryReader('data/').load_data()

In [6]:
# define LLM
llm=OpenAI(temperature=0, model=model)
#chunk_size+prompt_length+expected length of returned triples must be less than max_tokens
llm.max_tokens = 274
llm_predictor = KronLLMPredictor(llm)
print(llm_predictor.metadata)

context_window=2048 num_output=274 is_chat_model=False


In [7]:
# define TextSplitter
text_splitter = SentenceSplitter(chunk_size=208, chunk_overlap=64)

In [8]:
#define NodeParser
node_parser = SimpleNodeParser(text_splitter=text_splitter)

In [9]:
#define ServiceContext
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, node_parser=node_parser)

In [10]:

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while! 

start = time.time()
index = KronKnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    storage_context=storage_context,
    service_context=service_context,
)
end = time.time()
print(f"Knowledge Graph built in: {end-start}s")

Processing nodes:   0%|          | 0/123 [00:00<?, ?it/s]

Knowledge Graph built in: 8620.040635824203s


In [11]:
persist_path = f"{model.replace('/', '-')}-default"
index.storage_context.persist(persist_dir=persist_path)

In [12]:
## create graph
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(height='800px', width='100%', notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
#net.show_buttons(filter_=True)
net.show(f"{model.replace('/', '-')}-default-kg-prompt.html")

mosaicml-mpt-30b-instruct-default-kg-prompt.html
