In [None]:
import os
os.environ["OPENAI_API_KEY"]=""

In [98]:
import nest_asyncio
nest_asyncio.apply()

In [99]:
import asyncio
from pathlib import Path
import neo4j
from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import (
    FixedSizeSplitter,
)
from neo4j_graphrag.experimental.components.types import TextChunks
from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.experimental.components.lexical_graph import (
    LexicalGraphBuilder,
)
from neo4j_graphrag.experimental.components.types import (
    GraphResult,
    LexicalGraphConfig,
    TextChunk,
    TextChunks,
    DocumentInfo,
)
from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
from neo4j_graphrag.experimental.components.types import Neo4jGraph
from neo4j_graphrag.experimental.components.neo4j_reader import Neo4jChunkReader
from neo4j_graphrag.experimental.components.schema import (
    SchemaBuilder,
    SchemaEntity,
    SchemaProperty,
    SchemaRelation,
)
from neo4j_graphrag.experimental.components.entity_relation_extractor import (
    LLMEntityRelationExtractor,
)
from neo4j_graphrag.experimental.components.types import SchemaEnforcementMode
from neo4j_graphrag.llm import OpenAILLM

In [100]:
file_path = "Harry Potter and the Chamber of Secrets Summary.pdf"


In [101]:
loader = PdfLoader()
document = await loader.run(filepath=file_path)


In [102]:
print(document.text[:200])

Harry Potter and the Chamber of 
Secrets Full Book Summary
Harry Potter and the Chamber of Secrets begins when Harry is spending a 
miserable summer with his only remaining family, the Dursleys. Durin


In [103]:
print(document.document_info)

path='Harry Potter and the Chamber of Secrets Summary.pdf' metadata=None uid='0065aa4e-92ef-4da7-b548-8f04fbb1054a'


In [104]:
splitter = FixedSizeSplitter(
    # optionally, configure chunk_size, chunk_overlap, and approximate flag
    chunk_size=300,
    chunk_overlap=200,
    approximate = False
)
chunks = await splitter.run(text=document.text)
print(chunks)



In [105]:
print(chunks.chunks[0].text) 


Harry Potter and the Chamber of 
Secrets Full Book Summary
Harry Potter and the Chamber of Secrets begins when Harry is spending a 
miserable summer with his only remaining family, the Dursleys. During a 
dinner party hosted by his uncle and aunt, Harry is visited by Dobby, a house-
elf. Dobby warns


In [106]:
text_chunk_embedder = TextChunkEmbedder(embedder=OpenAIEmbeddings())
embed = await text_chunk_embedder.run(text_chunks=chunks)


In [107]:
print(embed)




In [108]:
chunk = embed.chunks[0]
text = chunk.text
embedding = chunk.metadata['embedding']
print("Text:", text[:100], "...")
print("\n\n")
print("Embedding (first 5 dims):", embedding[:5])

Text: Harry Potter and the Chamber of 
Secrets Full Book Summary
Harry Potter and the Chamber of Secrets b ...



Embedding (first 5 dims): [0.004410936962813139, -0.01922260783612728, -0.027209002524614334, -0.041839759796857834, -0.004808940924704075]


In [109]:
# config = LexicalGraphConfig()  # you can customize node labels or relationship names here

config = LexicalGraphConfig(
    chunk_node_label="Chunk123",
    document_node_label="Document123",
    chunk_to_document_relationship_type="PART_OF_DOCUMENT",
    next_chunk_relationship_type="NEXT_CHUNK",
    node_to_chunk_relationship_type="PART_OF_CHUNK",
    chunk_embedding_property="embeddings",
)

In [110]:

lex_builder = LexicalGraphBuilder(config=config)

graph_result = await lex_builder.run(
    text_chunks=chunks,
    document_info=DocumentInfo(path="HarryPotter.pdf")
)

In [111]:
neo4j_graph = graph_result.graph

In [112]:
print(neo4j_graph)



In [None]:
with neo4j.GraphDatabase.driver(
    "neo4j+ssc://.databases.neo4j.io", auth=("neo4j", "")
) as driver:
    writer = Neo4jWriter(driver)
    await writer.run(neo4j_graph)

In [None]:
config = LexicalGraphConfig(
    chunk_node_label="Chunk123",
    document_node_label="Document123",
    chunk_to_document_relationship_type="PART_OF_DOCUMENT",
    next_chunk_relationship_type="NEXT_CHUNK",
    node_to_chunk_relationship_type="PART_OF_CHUNK",
    chunk_embedding_property="embeddings",
)
driver = neo4j.GraphDatabase.driver(
    "neo4j+ssc://.databases.neo4j.io", auth=("neo4j", "")
)
reader = Neo4jChunkReader(driver)
result = await reader.run(lexical_graph_config=config)

In [66]:
result



In [71]:
schema_builder = SchemaBuilder()
schema = await schema_builder.run(
    entities=[
        SchemaEntity(
            label="Person",
            properties=[
                SchemaProperty(name="name", type="STRING"),
                SchemaProperty(name="place_of_birth", type="STRING"),
                SchemaProperty(name="date_of_birth", type="DATE"),
            ],
        ),
        SchemaEntity(
            label="Organization",
            properties=[
                SchemaProperty(name="name", type="STRING"),
                SchemaProperty(name="country", type="STRING"),
            ],
        ),
        SchemaEntity(                              # ← NEW
            label="Field",
            properties=[
                SchemaProperty(name="name", type="STRING"),
            ],
        ),
    ],
    relations=[
        SchemaRelation(
            label="WORKED_ON",
        ),
        SchemaRelation(
            label="WORKED_FOR",
        ),
    ],
    potential_schema=[
        ("Person", "WORKED_ON", "Field"),
        ("Person", "WORKED_FOR", "Organization"),
    ],
)

In [72]:
schema

SchemaConfig(entities={'Person': {'label': 'Person', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}, {'name': 'place_of_birth', 'type': 'STRING', 'description': ''}, {'name': 'date_of_birth', 'type': 'DATE', 'description': ''}]}, 'Organization': {'label': 'Organization', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}, {'name': 'country', 'type': 'STRING', 'description': ''}]}, 'Field': {'label': 'Field', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}]}}, relations={'WORKED_ON': {'label': 'WORKED_ON', 'description': '', 'properties': []}, 'WORKED_FOR': {'label': 'WORKED_FOR', 'description': '', 'properties': []}}, potential_schema=[('Person', 'WORKED_ON', 'Field'), ('Person', 'WORKED_FOR', 'Organization')])

In [94]:
extractor = LLMEntityRelationExtractor(
    llm=OpenAILLM(
        model_name="gpt-4o-mini",
        model_params={
            "max_tokens": 1000,
            "response_format": {"type": "json_object"},
        },
    )
)

In [95]:
neo4j_graph = await extractor.run(
    chunks=chunks,
)

In [96]:
neo4j_graph

