# Neo4j Hello World (Notebook)

This notebook connects to a local Neo4j **Community** instance (via Docker), creates a tiny graph, and queries it into a pandas DataFrame.

**Assumes** 
 
 
- Neo4j service is running at `bolt://localhost:${URI_PORT}` with the user and password set in the `.env` file. **Run `docker compose up -d`**.
- Ollama service is up on `http://localhost:11434` (ollama default). **Run `ollama serve` and pull the model `ollama pull nomic-embed-text`** (if not pulled yet).



In [1]:
import os
from dotenv import load_dotenv  
import yaml
from pathlib import Path
from pprint import pprint
from termcolor import cprint
import ollama
import requests

from langchain_neo4j import Neo4jGraph


In [2]:
load_dotenv()  # Load local environment variables

URI = "bolt://localhost:" + os.environ.get("URI_PORT")
NEO4J_USER = os.environ.get("NEO4J_USER")
NEO4J_PWD = os.environ.get("NEO4J_PASSWORD")
NEO4J_DB = os.getenv("NEO4J_DATABASE", "neo4j")    # 👈 choose DB here
EMBED_MODEL = "nomic-embed-text:latest"

cprint(f"Connecting to Neo4j at {URI} with user {NEO4J_USER} and password {NEO4J_PWD}", "green")

[32mConnecting to Neo4j at bolt://localhost:7687 with user neo4j and password test1234[0m


In [3]:
# load cypher queries from yaml file
queries = yaml.safe_load(Path("queries.yaml").read_text())
queries.keys()  # list available queries

dict_keys(['constraints', 'create_seed', 'match_person', 'match_company', 'match_adjacency', 'add_descriptions', 'create_vector_indexes', 'delete_all'])

## Using Langchain wrapper for Neo4j

In [4]:
# KG

kg = Neo4jGraph(
    url=URI, 
    username=NEO4J_USER, 
    password=NEO4J_PWD, 
    database=NEO4J_DB
    )

wipe_at_init, wipe_at_end = True , False # delete everything at the start / end

# Interact directly with KG, no need for driver context.

cprint(f"\n== Connected to Neo4j database: {NEO4J_DB}", "green")

cprint("\n== Creating constraints (if not exist)", "green")
for q in queries["constraints"]:
    kg.query(q)
print(" ok")

cprint("\n== Init Cleanup.", "green")
if wipe_at_init:
    for q in queries["delete_all"]:
        kg.query(q)
    print(" ok")
else:
    print(" skipped")
    
cprint("\n== Creating sample data", "green")
kg.query(queries["create_seed"])
print(" ok")

cprint("\n== Query: list all people", "green")
records = kg.query(queries["match_person"]) # <class 'list'>
for r in records:
    print(r)
    
cprint("\n== Query: list all companies", "green")
records = kg.query(queries["match_company"]) # <class 'list'>
for r in records:
    print(r)

cprint("\n== Query: adjacency (who knows whom)", "green")
records = kg.query(queries["match_adjacency"]) # <class 'list'>
for r in records:
    print(r)

cprint("\n== Query: Adding descriptions, appearance and summaries", "green")
for q in queries["add_descriptions"]:
    kg.query(q)
print(" ok")

[32m
== Connected to Neo4j database: neo4j[0m
[32m
== Creating constraints (if not exist)[0m
 ok
[32m
== Init Cleanup.[0m
 ok
[32m
== Creating sample data[0m
 ok
[32m
== Query: list all people[0m
{'name': 'Paula', 'age': 25, 'education': 'Computer Engineering'}
{'name': 'Guillermo', 'age': 26, 'education': 'Industrial Engineering'}
{'name': 'Gabriela', 'age': 26, 'education': 'Physics'}
{'name': 'Iria', 'age': 27, 'education': 'Physics'}
{'name': 'Cristina', 'age': 27, 'education': 'Physics'}
[32m
== Query: list all companies[0m
{'company_name': 'Indra', 'industry': 'Engineering'}
{'company_name': 'CIEMAT', 'industry': 'Scientific Research'}
{'company_name': 'CBM', 'industry': 'Scientific Research'}
[32m
== Query: adjacency (who knows whom)[0m
{'person': 'Cristina', 'knows': ['Gabriela', 'Iria'], 'works_at': 'CBM'}
{'person': 'Gabriela', 'knows': ['Cristina', 'Iria'], 'works_at': 'CIEMAT'}
{'person': 'Guillermo', 'knows': ['Paula', 'Iria'], 'works_at': 'Indra'}
{'person':

In [5]:
# KG RAG

def create_property_embedding(node_type:str, property_name:str):
    cprint(f"\nGenerating embeddings for {node_type} property_name. Property: {property_name}", "green")
    records = list(kg.query(f"""
        MATCH (n:{node_type})
        WHERE n.{property_name} IS NOT NULL AND n.{property_name} <> ''
          AND n.{property_name}_emb IS NULL
        RETURN n.uuid AS uuid, n.{property_name} AS txt
    """))
    for r in records:
        vec = ollama.embed(model="nomic-embed-text", input=r["txt"])["embeddings"][0]
        kg.query(
            f"""
            MATCH (n:{node_type} {{uuid: $uuid}})
            SET n.{property_name}_emb = $vec
            """,
            params={"uuid": r["uuid"], "vec": vec},
        )
        print(f"  text: {r['txt']}\n  vec: {vec[:10]}")



# 1) Create vector indexes (once)

for q in queries["create_vector_indexes"]:
    kg.query(q)

# Show created vector indexes
results = kg.query("SHOW VECTOR INDEXES")
idx = list(results)
cprint(f"\nFound {len(idx)} vector index entries.", "green")
for r in idx:
    pprint(r)


# 2) PERSON: create embeddings only for nodes missing them
create_property_embedding(node_type = "Person", property_name = "person_info")


# 3) COMPANY: create embeddings only for nodes missing them
create_property_embedding(node_type = "Company", property_name = "company_info")

# Show indexes again (optional)
result = kg.query("SHOW VECTOR INDEXES")
idx = list(result)
cprint(f"\nFound {len(idx)} vector index entries.", "green")
for r in idx:
    pprint(r)


cprint("\n== End Cleanup.", "green")
if wipe_at_end:
    for q in queries["delete_all"]:
        kg.query(q)
    print(" ok")
else:
    print(" skipped")

[32m
Found 2 vector index entries.[0m
{'entityType': 'NODE',
 'id': 14,
 'indexProvider': 'vector-2.0',
 'labelsOrTypes': ['Company'],
 'lastRead': None,
 'name': 'company_info_idx',
 'owningConstraint': None,
 'populationPercent': 100.0,
 'properties': ['company_info_emb'],
 'readCount': None,
 'state': 'ONLINE',
 'type': 'VECTOR'}
{'entityType': 'NODE',
 'id': 13,
 'indexProvider': 'vector-2.0',
 'labelsOrTypes': ['Person'],
 'lastRead': None,
 'name': 'person_info_idx',
 'owningConstraint': None,
 'populationPercent': 100.0,
 'properties': ['person_info_emb'],
 'readCount': None,
 'state': 'ONLINE',
 'type': 'VECTOR'}
[32m
Generating embeddings for Person property_name. Property: person_info[0m
  text: Iria is 27 years old and studied Physics.Iria has blue eyes and long brunette and wavy hair. She likes to paint her nails in red or purple colours. She usually wears long earrings.
  vec: [0.021614064, 0.025623962, -0.15588778, -0.026595086, -0.0063822446, 0.0578347, 0.028258298, 

In [6]:
def create_question_embedding(question:str):
    cprint(f"\nGenerating embeddings for question '{question}'", "green")
    vec = ollama.embed(model="nomic-embed-text", input=question)["embeddings"][0] 
    print(f"  text: {question}\n  vec: {vec[:10]}\n")
    return vec

question = "Who shaved its head?"
emb = create_question_embedding(question)
index_name = "person_info_idx"
k = 5

res = kg.query(
    """
    CALL db.index.vector.queryNodes($index_name, $k, $embedding)
    YIELD node, score
    RETURN node.name AS name, score, node.person_info AS person_info
    ORDER BY score DESC
    """,
    {"index_name": index_name, "k": k, "embedding": emb},
)
for r in res:
    print(r)

[32m
Generating embeddings for question 'Who shaved its head?'[0m
  text: Who shaved its head?
  vec: [0.038594417, 0.005078101, -0.14948216, -0.0074871895, -0.016247157, 0.039757676, -0.01789794, 0.022229806, 0.041795455, 0.028922241]

{'name': 'Guillermo', 'score': 0.818333625793457, 'person_info': 'Guillermo is 26 years old and studied Industrial Engineering.Guillermo has brown eyes and short hair. He has a very fancy shirt that he takes to all important events. He shaved his head this summer.'}
{'name': 'Gabriela', 'score': 0.7629756927490234, 'person_info': "Gabriela is 26 years old and studied Physics.Gabriela has long curly hair with babylights. She's petite and likes to wear hippie-style clothes."}
{'name': 'Paula', 'score': 0.7503161430358887, 'person_info': 'Paula is 25 years old and studied Computer Engineering.Paula short hair in a wolfcut style. She wears long and wide pants and sneakers to the laboratory.'}
{'name': 'Iria', 'score': 0.7488207817077637, 'person_info': 'I