In [4]:
!poetry update -q

In [5]:
import nest_asyncio
from dotenv import load_dotenv

from configurations.neo4j import *

nest_asyncio.apply()
load_dotenv()
configurations = Configuration.load('configuration.yaml')
neo4j = Neo4jConfiguration.grab(configurations)

In [6]:
import pyarrow.parquet as pq
from llama_index.core import Document

train = 'frames-benchmark/train-00000-of-00001.parquet'
table = pq.read_table(train, memory_map=True)
documents = [Document(text=cell.as_py()) for cell in table[4][:2]]
# strings = [string.as_py() for string in table[4]]
len(documents)

2

In [7]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

graph_store = Neo4jPropertyGraphStore(**neo4j.dict())

In [12]:
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import DynamicLLMPathExtractor

index = PropertyGraphIndex.from_documents(
    documents,
    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
    kg_extractors=[
        DynamicLLMPathExtractor(
            llm=OpenAI(model="gpt-4o", temperature=0.0),
            num_workers=4
        )
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

Parsing nodes: 100%|██████████| 2/2 [00:00<00:00, 55.11it/s]
Extracting and inferring knowledge graph from text: 100%|██████████| 32/32 [01:03<00:00,  1.98s/it]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.63s/it]
Generating embeddings: 100%|██████████| 7/7 [00:02<00:00,  2.93it/s]


In [9]:
retriever = index.as_retriever(
    include_text=False,  # include source text in returned nodes, default True
)

nodes = retriever.retrieve("What happened at Interleaf and Viaweb?")

for node in nodes:
    print(node.text)

Buchanan -> WORKED_ON -> Covode Committee investigation
Herbert Hoover -> WORKED_ON -> government reorganization after World War II
George W. Bush -> WORKED_ON -> humanitarian efforts after the 2004 Indian Ocean earthquake and tsunami
Buchanan -> WORKED_ON -> trade agreements with the Qing Dynasty and Japan


In [10]:
query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("What happened at Interleaf and Viaweb?")

print(str(response))

Interleaf was acquired by BroadVision in 2000, while Viaweb was acquired by Yahoo in 1998.
