In [1]:
import llama_index
llama_index.set_global_handler("simple")

In [2]:
import os

os.environ["OPENAI_API_KEY"] = "sk-..."

import logging
import sys

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output

from llama_index import (
    KnowledgeGraphIndex,
    ServiceContext,
    SimpleDirectoryReader,
    SimpleKeywordTableIndex
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore
from llama_index.llms import OpenAI

from IPython.display import Markdown, display
from llama_index.llms.palm import PaLM
from llama_index.embeddings import GooglePaLMEmbedding


from llama_index.callbacks import (
    CallbackManager,
    LlamaDebugHandler
)


from llama_index.retrievers import (
    KeywordTableSimpleRetriever
)


In [4]:
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

In [5]:
palm_api_key  = "AIzaSyApBCzqW_RF4qbkX9kMoNwjooIqrm8oZEQ"
model = PaLM(api_key=palm_api_key)

model_name = "models/embedding-gecko-001"
embed_model = GooglePaLMEmbedding(model_name=model_name, api_key=palm_api_key)

service_context = ServiceContext.from_defaults(
                                    llm = model,
                                    embed_model = embed_model,
                                    chunk_size=512,
                                    callback_manager=callback_manager)

In [7]:
from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()

documents = loader.load_data(pages=["2023 in science"], auto_suggest=False)

In [14]:
node_parser = service_context.node_parser

nodes = node_parser.get_nodes_from_documents(documents)
len(nodes)

52

In [18]:
# # %pip install ipython-ngql nebula3-python

# os.environ["NEBULA_USER"] = "root"
# os.environ["NEBULA_PASSWORD"] = "nebula"  # default is "nebula"
# os.environ[
#     "NEBULA_ADDRESS"
# ] = "127.0.0.1:9669"  # assumed we have NebulaGraph installed locally

space_name = "llamaindex"
edge_types, rel_prop_names = ["relationship"], [
    "relationship"
]  # default, could be omit if create from an empty kg
tags = ["entity"] 

In [16]:
nodes = nodes[:10]

In [19]:
kg_index = KnowledgeGraphIndex(
    nodes,
    max_triplets_per_chunk=10,
    service_context=service_context,
    space_name=space_name,
    edge_types=edge_types,
    show_progress = True,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
)

Processing nodes:   0%|          | 0/10 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: The following scientific events occurred or were scheduled to occur in 2023.


== Events ==


=== January ===


=== February ===


=== March ===


=== April ===
3 April
Five employees at the National Hurricane Center publish a tropical cyclone report (TCR) on Hurricane Ian, which officially upgrades the hurricane from a Category 4 to a Category 5 on the Saffir–Simpson scale. The TCR also stated that Hurricane Ian caused, with 90% confidence, $112.9 billion worth of damage to the United States, which made Ian the third-costliest United States hurricane on record as 

Generating embeddings:   0%|          | 0/11 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: There was only limited research on how neurons get their energy in the context of links between glucose metabolism and cognition (brain health and performance).
10 April – A study expands upon the role of elites' unsustainable consumption in urban water crises. In Cape Town, for example, the wealthiest 14% of the population use half of the city's water, while the poorest 62% use just a quarter.
11 April – A study reports that genomic surveillance (GS) shows that a clonal lineage of the wheat blast fungus has spread worldwide and that there is a need for GS to track

Generating embeddings:   0%|          | 0/10 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: It suggests that the EEI is the "most fundamental global climate indicator" to gauge climate change mitigation efforts.
18 April
Astronomers conclude that "... planets in the habitable zones of stars with low metallicity are the best targets to search for complex life on land."
A university reports a study (29 Mar) affirming the high level of economic losses from biological invasions, showing they have risen to the level of economic damage costs from floods or earthquakes, which are also rising.
19 April
A bolide is observed over Ukraine and Belarus for about five 

Generating embeddings:   0%|          | 0/10 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: It finds that the combined ice loss in these regions has more than tripled since the early 1990s, with 2019 seeing the greatest losses of any year on record. These findings have implications for future sea level rise.
Paleoneurologists publish the first neuroevolutionary timeline about correlations of changes in the shape of the cerebral cortex and functions, showing "variability in surface geometry relates to species' ecology and behaviour" and cognition. It characterizes many of the neuromorphological events in the origin of distinct human intelligence over the p

Generating embeddings:   0%|          | 0/8 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: 21 April – Researchers report the development of neuromorphic AI hardware using nanowires (see also 2020-04-20) physically mimicking the brain's activity in identifying and remembering an image from memory. On 26 April, a university reports on a demonstration (11 Mar) of multisensory motion cue integration by a neuromorphic nerve for robots.24 April
Astronomers release close-up global images, for the first time, of the Martian moon Deimos that were taken by the Mars Hope orbiter.
The first review of issues identified in meta-science of metascience is published, pro

Generating embeddings:   0%|          | 0/10 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: 25 April
Scientists, based on new evidence, conclude that Rosalind Franklin was a contributor and "equal player" in the discovery process of DNA, rather than otherwise, as may have been presented subsequently after the time of the discovery.
The first gene silencing approach to Alzheimer's disease is reported, with a drug called BIIB080 used on the microtubule-associated protein tau (MAPT) gene. Patients in a Phase 1 trial were found to have a greater than 50% reduction in levels of harmful tau protein after taking the drug.
26 April
Astronomers present an image, f

Generating embeddings:   0%|          | 0/9 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: Further LLM developments during what has been called an "AI boom" include: local or open source versions of LLaMA which was leaked in March, news outlets report on GPT4-based Auto-GPT that given natural language commands uses the Internet and other tools in attempts to understand and achieve its tasks with unclear or so-far little practicality, a systematic evaluation of answers from four "generative search engines" suggests their outputs "appear informative, but frequently contain unsupported statements and inaccurate citations", a multi-modal open source tool for

Generating embeddings:   0%|          | 0/3 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: Progress in screening: an α-synuclein SAA (assay) against Parkinson's disease (12 Apr), and exogenously administered bioengineered sensors that amplify urinary cancer biomarkers for detection (24 Apr).
Promising innovations relating to global challenges are reported: a laser-using drone-based methane plume localization method, approval of the first yeast-based cow-free dairy (Remilk), a Tor browser-equivalent Web browser for privacy-protected browsing when using a VPN (Mullvad browser), a concentrated solar-to-hydrogen device approaching viability, a method for fat

Generating embeddings:   0%|          | 0/5 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: It provides data and analysis about the brain process of dying (terminal loss of sentience and life) and near-death experiences.
2 May
A new AI algorithm developed by Baidu is shown to boost the antibody response of COVID-19 mRNA vaccines by 128 times.
A single-molecule valve is demonstrated, a breakthrough in nanoscale control of fluids.
Scientists report economic factors of neurology or mental health and cognition during child development: association of low income with brain structure and hippocampal volume, stronger associations in U.S. states with higher cost 

Generating embeddings:   0%|          | 0/10 [00:00<?, ?it/s]

** Prompt: **
Some text is provided below. Given the text, extract up to 10 knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.
---------------------
Example:Text: Alice is Bob's mother.Triplets:
(Alice, is mother of, Bob)
Text: Philz is a coffee shop founded in Berkeley in 1982.
Triplets:
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)
---------------------
Text: AI successfully identifies people at the highest risk for pancreatic cancer up to three years before diagnosis, using solely the patients' medical records.
10 May
A rough draft of the human "pan-genome" is presented, consisting of 47 genomes from a cohort of genetically diverse individuals. This aims to improve medical research by building on the earlier Human Genome Project.
Scientists demonstrate with experimental evolution how macroscopic multicellularity could have emerged on Earth.
11 May – The discovery of 62 new moons of Saturn is reported, taking its total 

Generating embeddings:   0%|          | 0/16 [00:00<?, ?it/s]

**********
Trace: index_construction
    |_templating ->  0.0 seconds
    |_llm ->  2.16944 seconds
    |_embedding ->  0.772129 seconds
    |_embedding ->  0.825233 seconds
    |_templating ->  0.000951 seconds
    |_llm ->  2.853845 seconds
    |_embedding ->  1.414054 seconds
    |_templating ->  0.0 seconds
    |_llm ->  2.50065 seconds
    |_embedding ->  0.790702 seconds
    |_templating ->  0.0 seconds
    |_llm ->  2.90256 seconds
    |_embedding ->  0.747081 seconds
    |_templating ->  0.0 seconds
    |_llm ->  2.572676 seconds
    |_embedding ->  0.785153 seconds
    |_templating ->  0.0 seconds
    |_llm ->  2.120164 seconds
    |_embedding ->  0.780705 seconds
    |_templating ->  0.0 seconds
    |_llm ->  1.976964 seconds
    |_embedding ->  1.17993 seconds
    |_templating ->  0.0 seconds
    |_llm ->  2.420715 seconds
    |_embedding ->  0.567187 seconds
    |_templating ->  0.0 seconds
    |_llm ->  4.108671 seconds
    |_embedding ->  0.779216 seconds
    |_templating

In [24]:
table_index  = SimpleKeywordTableIndex( nodes,
    service_context=service_context,
    space_name=space_name,
    edge_types=edge_types,
    show_progress = True,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,verbose = True)

Extracting keywords from nodes:   0%|          | 0/10 [00:00<?, ?it/s]

**********
Trace: index_construction
**********


In [21]:
table_index.storage_context.persist("table")

In [23]:
table_index.storage_context

StorageContext(docstore=<llama_index.storage.docstore.simple_docstore.SimpleDocumentStore object at 0x000001DFE1D5B250>, index_store=<llama_index.storage.index_store.simple_index_store.SimpleIndexStore object at 0x000001DFE1D5B460>, vector_stores={'default': <llama_index.vector_stores.simple.SimpleVectorStore object at 0x000001DFE1D5B8E0>, 'image': <llama_index.vector_stores.simple.SimpleVectorStore object at 0x000001DFE1D5B9A0>}, graph_store=<llama_index.graph_stores.simple.SimpleGraphStore object at 0x000001DFE1D5B370>)

In [34]:
from llama_index.retrievers import KGTableRetriever KeywordTableSimpleRetriever
KGTableRetriever()
KeywordTableSimpleRetriever()

In [25]:
kg_index.storage_context.persist("kggindex")

In [26]:
kg_retriever = kg_index.as_retriever()

In [27]:
kg_retriever

<llama_index.indices.knowledge_graph.retrievers.KGTableRetriever at 0x1dfe1de7430>

In [28]:
tb_retriever = table_index.as_retriever()

In [29]:
tb_retriever

<llama_index.indices.keyword_table.retrievers.KeywordTableSimpleRetriever at 0x1dfe1df2a30>

In [30]:
kg_retriever.retrieve("what was the conclusion or tcr report?")

** Prompt: **
A question is provided below. Given the question, extract up to 10 keywords from the text. Focus on extracting the keywords that we can use to best lookup answers to the question. Avoid stopwords.
---------------------
what was the conclusion or tcr report?
---------------------
Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'

**************************************************
** Completion: **
KEYWORDS: conclusion, report, tcr
**************************************************


INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 302aa97c-a083-45b2-8da5-25772b4ca7eb: 21 April – Researchers report the development of neuromorphic AI hardware usi...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6de9c404-0ca3-4b31-bc2f-c744ef99e4e5: 25 April
Scientists, based on new evidence, conclude that Rosalind Franklin w...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 7af1ec6e-33

[NodeWithScore(node=TextNode(id_='302aa97c-a083-45b2-8da5-25772b4ca7eb', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='12a9c208-b709-437d-b28a-f2c0d410d38c', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='a34043b85ae16710472dcbb9e5192e2ef2759b592b2716cf9ac48c2e310cce81'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='f6484cc8-70b5-43b7-b960-65d423c3bac0', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='2728e8b041d8d3053773357378fa23f36520a1accb2989cd92771fddda8a283e'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='6de9c404-0ca3-4b31-bc2f-c744ef99e4e5', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='f7c96122396ec43d626cbcea32dbdf0366a57bf75b932881d781707c730fabb5')}, hash='9c2dff8ddc0fd527b80c8e99a7fe1b54d5de6b71691aaa07f2c7f87610a0ee2a', text='21 April – Researchers report the development of neuromorphic AI hardware using nano

In [32]:
kg_retriever = kg_index.as_query_engine().query("what was the conclusion or tcr report?")

** Prompt: **
A question is provided below. Given the question, extract up to 10 keywords from the text. Focus on extracting the keywords that we can use to best lookup answers to the question. Avoid stopwords.
---------------------
what was the conclusion or tcr report?
---------------------
Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'

**************************************************
** Completion: **
KEYWORDS: conclusion, report, tcr
**************************************************


INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 302aa97c-a083-45b2-8da5-25772b4ca7eb: 21 April – Researchers report the development of neuromorphic AI hardware usi...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6de9c404-0ca3-4b31-bc2f-c744ef99e4e5: 25 April
Scientists, based on new evidence, conclude that Rosalind Franklin w...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 7af1ec6e-33

Trace: query
    |_query ->  6.549661 seconds
      |_synthesize ->  4.867509 seconds
        |_templating ->  0.0 seconds
        |_llm ->  4.854552 seconds
**********


In [33]:
tb_retriever.retrieve("what was the conclusion or tcr report?")

INFO:llama_index.indices.keyword_table.retrievers:> Starting query: what was the conclusion or tcr report?
INFO:llama_index.indices.keyword_table.retrievers:query keywords: ['conclusion', 'report', 'tcr']
INFO:llama_index.indices.keyword_table.retrievers:> Extracted keywords: ['report', 'tcr']


[NodeWithScore(node=TextNode(id_='f64b610f-0cf5-4f2a-a631-20486d8f03dd', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='12a9c208-b709-437d-b28a-f2c0d410d38c', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='a34043b85ae16710472dcbb9e5192e2ef2759b592b2716cf9ac48c2e310cce81'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='a35065e5-e27b-42da-a550-40ed04e852d3', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='160e9eff5abe2ef33bf419b2df401e13972dba4986ba4f85121eeaf6bb2d95a6'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='f6484cc8-70b5-43b7-b960-65d423c3bac0', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='2728e8b041d8d3053773357378fa23f36520a1accb2989cd92771fddda8a283e')}, hash='8b0ea29371c29cb70e18389d583ee07d708b6badb1cf915cd8710a1202f7f3e2', text='It suggests that the EEI is the "most fundamental global climate indicator" to gauge

In [38]:
table_index.as_query_engine(), kg_index.as_query_engine()

(<llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x1dfe1f7d670>,
 <llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x1dfe1f8fe50>)

In [36]:
table_index.as_query_engine().query("what was the conclusion or tcr report?")

INFO:llama_index.indices.keyword_table.retrievers:> Starting query: what was the conclusion or tcr report?
INFO:llama_index.indices.keyword_table.retrievers:query keywords: ['conclusion', 'report', 'tcr']
INFO:llama_index.indices.keyword_table.retrievers:> Extracted keywords: ['report', 'tcr']
** Prompt: **
Context information is below.
---------------------
It suggests that the EEI is the "most fundamental global climate indicator" to gauge climate change mitigation efforts.
18 April
Astronomers conclude that "... planets in the habitable zones of stars with low metallicity are the best targets to search for complex life on land."
A university reports a study (29 Mar) affirming the high level of economic losses from biological invasions, showing they have risen to the level of economic damage costs from floods or earthquakes, which are also rising.
19 April
A bolide is observed over Ukraine and Belarus for about five seconds. It is first observed at an altitude of 98 km above Velyka D

Response(response='Hurricane Ian caused, with 90% confidence, $112.9 billion worth of damage to the United States, which made Ian the third-costliest United States hurricane on record as well as the costliest hurricane to strike Florida on record.', source_nodes=[NodeWithScore(node=TextNode(id_='f64b610f-0cf5-4f2a-a631-20486d8f03dd', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='12a9c208-b709-437d-b28a-f2c0d410d38c', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='a34043b85ae16710472dcbb9e5192e2ef2759b592b2716cf9ac48c2e310cce81'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='a35065e5-e27b-42da-a550-40ed04e852d3', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='160e9eff5abe2ef33bf419b2df401e13972dba4986ba4f85121eeaf6bb2d95a6'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='f6484cc8-70b5-43b7-b960-65d423c3bac0', node_type=<ObjectType.