# Neo4j Hello World (Notebook) - Friends Use Case

This notebook connects to a local Neo4j **Community** instance (via Docker), creates a tiny graph, and queries it.

**Assumes** 
 
 
- Neo4j service is running at `bolt://localhost:${URI_PORT}` with the user and password set in the `.env` file. **Run `docker compose up -d`**.
- Ollama service is up on `http://localhost:11434` (ollama default). **Run `ollama serve` and pull the model `ollama pull nomic-embed-text`** (if not pulled yet).



In [1]:
# Dependencies

import os
from dotenv import load_dotenv  
import yaml
from pathlib import Path
from pprint import pprint
from termcolor import cprint
from neo4j import GraphDatabase

from helpers import helper_folium, helper_leaflet, helper_neo4j, helper_ollama


In [2]:
# Environment variables

load_dotenv()  # Load local environment variables

URI = "bolt://localhost:" + os.environ.get("URI_PORT")
NEO4J_USER = os.environ.get("NEO4J_USER")
NEO4J_PWD = os.environ.get("NEO4J_PASSWORD")
NEO4J_DB = os.getenv("NEO4J_DATABASE", "neo4j")    # 👈 choose DB here

cprint(f"Connecting to Neo4j at {URI} with user {NEO4J_USER} and password {NEO4J_PWD}", "green")

[32mConnecting to Neo4j at bolt://localhost:7687 with user neo4j and password test1234[0m


In [3]:
# Load cypher queries

queries = yaml.safe_load(Path("data/friends/queries_friends.yaml").read_text())
queries.keys()  # list available queries

dict_keys(['constraints', 'create_seed', 'show_people', 'show_companies', 'match_adjacency', 'show_text', 'show_locations', 'show_distances', 'add_text', 'add_locations', 'create_vector_indexes', 'delete_all'])

In [4]:
# Neo4j Driver instance

driver = GraphDatabase.driver(uri=URI, auth=(NEO4J_USER, NEO4J_PWD))

### 1. Create data

<p align="center">
  <img src="media/KG_step1_populate_graph.svg" width="550">
</p>


- **Entities**: Person and Company nodes with unique constraints (unique name and uuid)
- **Relationships**: KNOWS (person-to-person) and WORKS_AT (person-to-company)
- **Properties**: Basic attributes (name, age, education, industry)


In [5]:
# Populate graph

with driver.session(database=NEO4J_DB) as session:
    
    dbinfo = session.run("CALL db.info()").single()
    cprint(f"\nConnected to Neo4j database: {dbinfo['name']}", "green")
    
    cprint("\nCreating constraints (if not exist)", "green")
    for c in queries["constraints"]:
        session.run(c)
    
    cprint("\nInit Cleanup.", "green")
    for q in queries["delete_all"]:
        session.run(q)
        
    cprint("\nCreate data", "green")
    session.run(queries["create_seed"])

    cprint("\nQuery: list all people", "green")
    records = session.run(queries["show_people"]) # <class 'neo4j.work.result.Result'>, convert to  <class 'list'> to see contents: list(records)
    for r in records:
        print(dict(r))
        
    cprint("\nQuery: list all companies", "green")
    records = session.run(queries["show_companies"]) # <class 'neo4j.work.result.Result'>, convert to  <class 'list'> to see contents: list(records)
    for r in records:
        print(dict(r))

    cprint("\nQuery: adjacency (who knows whom)", "green")
    records = session.run(queries["match_adjacency"]) # <class 'neo4j.work.result.Result'>, convert to  <class 'list'> to see contents: list(records)
    for r in records:
        print(dict(r))

[32m
Connected to Neo4j database: neo4j[0m
[32m
Creating constraints (if not exist)[0m
[32m
Init Cleanup.[0m
[32m
Create data[0m
[32m
Query: list all people[0m
{'name': 'Paula', 'age': 25, 'p.gender': 'female', 'education': 'Computer Engineering'}
{'name': 'Guillermo', 'age': 26, 'p.gender': 'male', 'education': 'Industrial Engineering'}
{'name': 'Gabriela', 'age': 26, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Iciar', 'age': 26, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Cristina', 'age': 27, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Daniel', 'age': 27, 'p.gender': 'male', 'education': 'Arts'}
{'name': 'Javier', 'age': 27, 'p.gender': 'male', 'education': 'Physics'}
{'name': 'Juan', 'age': 27, 'p.gender': 'male', 'education': 'Physics'}
{'name': 'Adrián', 'age': 27, 'p.gender': 'male', 'education': 'Physics'}
{'name': 'Iria', 'age': 27, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Elias', 'age': 29, 'p.gender': 'male', 'educ

In [6]:
# Example cypher queries

with driver.session(database=NEO4J_DB) as session:

    cprint("\nQuery: list all people", "green")
    records = session.run(queries["show_people"]) # <class 'neo4j.work.result.Result'>, convert to  <class 'list'> to see contents: list(records)
    for r in records:
        print(dict(r))
        
    cprint("\nQuery: list all companies", "green")
    records = session.run(queries["show_companies"]) # <class 'neo4j.work.result.Result'>, convert to  <class 'list'> to see contents: list(records)
    for r in records:
        print(dict(r))

    cprint("\nQuery: adjacency (who knows whom)", "green")
    records = session.run(queries["match_adjacency"]) # <class 'neo4j.work.result.Result'>, convert to  <class 'list'> to see contents: list(records)
    for r in records:
        print(dict(r))

[32m
Query: list all people[0m
{'name': 'Paula', 'age': 25, 'p.gender': 'female', 'education': 'Computer Engineering'}
{'name': 'Guillermo', 'age': 26, 'p.gender': 'male', 'education': 'Industrial Engineering'}
{'name': 'Gabriela', 'age': 26, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Iciar', 'age': 26, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Cristina', 'age': 27, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Daniel', 'age': 27, 'p.gender': 'male', 'education': 'Arts'}
{'name': 'Javier', 'age': 27, 'p.gender': 'male', 'education': 'Physics'}
{'name': 'Juan', 'age': 27, 'p.gender': 'male', 'education': 'Physics'}
{'name': 'Adrián', 'age': 27, 'p.gender': 'male', 'education': 'Physics'}
{'name': 'Iria', 'age': 27, 'p.gender': 'female', 'education': 'Physics'}
{'name': 'Elias', 'age': 29, 'p.gender': 'male', 'education': 'Physics'}
[32m
Query: list all companies[0m
{'name': 'Getronics', 'industry': 'Consulting Services'}
{'name': 'Accenture', 'in

### 2.A. Add rich text info

<p align="center">
  <img src="media/KG_step2_generate_rich_descriptions.svg" width="750">
</p>



In [7]:
# Add rich text descriptions 

with driver.session(database=NEO4J_DB) as session:

    cprint("\nQuery: Adding descriptions, appearance and summaries", "green")
    for q in queries["add_text"]:
        session.run(q)

[32m
Query: Adding descriptions, appearance and summaries[0m


### 2.B Add location info

In [8]:

# Add location property 

with driver.session(database=NEO4J_DB) as session:

    cprint("\nQuery: Adding location property", "green")
    for q in queries["add_locations"]:
        session.run(q)

[32m
Query: Adding location property[0m


In [9]:
# Show locations and plot maps

with driver.session(database=NEO4J_DB) as session:

    cprint("\nQuery: Adding location property", "green")
    results = session.run(queries["show_locations"])
    records = list(results)

# Replace with your query result rows
# records = [
#     {"name":"Iria","lat":40.437596,"lon":-3.711223,"labels":["Person"]},
#     {"name":"Guillermo","lat":40.455022,"lon":-3.692355,"labels":["Person"]},
#     {"name":"Gabriela","lat":40.475721,"lon":-3.711451,"labels":["Person"]},
#     {"name":"Paula","lat":40.490170,"lon":-3.654654,"labels":["Person"]},
#     {"name":"Cristina","lat":40.367462,"lon":-3.597745,"labels":["Person"]},
#     {"name":"Indra","lat":40.396648,"lon":-3.624635,"labels":["Company"]},
#     {"name":"CIEMAT","lat":40.453938,"lon":-3.728925,"labels":["Company"]},
#     {"name":"CBM","lat":40.549613,"lon":-3.690136,"labels":["Company"]},
# ]
for r in records:
    print(dict(r))

# Follium map
helper_folium.create_map_from_rows(records)

# Leaflet map
helper_leaflet.create_map_from_rows(records)

[32m
Query: Adding location property[0m
{'name': 'Guillermo', 'labels': ['Person'], 'lat': 40.455022, 'lon': -3.692355}
{'name': 'Gabriela', 'labels': ['Person'], 'lat': 40.4328, 'lon': -3.711451}
{'name': 'Paula', 'labels': ['Person'], 'lat': 40.49017, 'lon': -3.654654}
{'name': 'Cristina', 'labels': ['Person'], 'lat': 40.4332, 'lon': -3.597745}
{'name': 'Daniel', 'labels': ['Person'], 'lat': 40.4336, 'lon': -3.617745}
{'name': 'Javier', 'labels': ['Person'], 'lat': 40.434, 'lon': -3.711223}
{'name': 'Juan', 'labels': ['Person'], 'lat': 40.4344, 'lon': -3.711223}
{'name': 'Elias', 'labels': ['Person'], 'lat': 40.4348, 'lon': -3.711223}
{'name': 'Iciar', 'labels': ['Person'], 'lat': 40.4352, 'lon': -3.711223}
{'name': 'Adrián', 'labels': ['Person'], 'lat': 40.4356, 'lon': -3.711223}
{'name': 'Indra', 'labels': ['Company'], 'lat': 40.533873, 'lon': -3.630539}
{'name': 'CIEMAT', 'labels': ['Company'], 'lat': 40.453938, 'lon': -3.728925}
{'name': 'CBM', 'labels': ['Company'], 'lat': 40.

### 3. Create property embeddings (first step into RAG) 

<p align="center">
  <img src="media/KG_step3_generate_property_embeddings.svg" width="750">
</p>

**RAG** implementation requires selecting a **property to embed and use for similarity searches**. 

Description properties containing **rich text** work well for this purpose, as they provide richer semantic information. In our example, we'll use *text*.

In order to do so, we create two vector indexes in Neo4j:

- **Vector index *person_node_info_idx***: based on property ***info_emb*** for nodes of type "Person"
-  **Vector index *company_node_info_idx***: based on property ***info_emb*** for nodes of type "Company"

After that, we create the embeddings (this happens for both Person nodes and Company nodes):

- **Property *text*** ---`nomic-embed-text`---> **Property *embedding***


In [10]:
# Create vector indexes

with driver.session(database=NEO4J_DB) as session:
    
    for q in queries["create_vector_indexes"]:
        session.run(q)
    
    # Show created vector indexes
    results = session.run("SHOW VECTOR INDEXES")
    idx = list(results)
    cprint(f"\nFound {len(idx)} vector index entries.", "green")
    for r in idx:
        cprint("-"*20,"green")
        pprint(dict(r))

[32m
Found 3 vector index entries.[0m
[32m--------------------[0m
{'entityType': 'NODE',
 'id': 11,
 'indexProvider': 'vector-2.0',
 'labelsOrTypes': ['Company'],
 'lastRead': None,
 'name': 'company_node_idx',
 'owningConstraint': None,
 'populationPercent': 100.0,
 'properties': ['embedding'],
 'readCount': None,
 'state': 'ONLINE',
 'type': 'VECTOR'}
[32m--------------------[0m
{'entityType': 'RELATIONSHIP',
 'id': 12,
 'indexProvider': 'vector-2.0',
 'labelsOrTypes': ['KNOWS'],
 'lastRead': None,
 'name': 'know_relationship_idx',
 'owningConstraint': None,
 'populationPercent': 100.0,
 'properties': ['embedding'],
 'readCount': None,
 'state': 'ONLINE',
 'type': 'VECTOR'}
[32m--------------------[0m
{'entityType': 'NODE',
 'id': 10,
 'indexProvider': 'vector-2.0',
 'labelsOrTypes': ['Person'],
 'lastRead': None,
 'name': 'person_node_idx',
 'owningConstraint': None,
 'populationPercent': 100.0,
 'properties': ['embedding'],
 'readCount': None,
 'state': 'ONLINE',
 'type': '

In [11]:
# Create property embeddings 

with driver.session(database=NEO4J_DB) as session:
    
    # (p:PERSON): create embeddings only for nodes missing them
    helper_neo4j.vectorize_property(runner = session.run,
                       element = "node", 
                       node_label = "Person",
                       source_property = "text", 
                       )
    
    # (c:COMPANY): create embeddings only for nodes missing them
    helper_neo4j.vectorize_property(runner = session.run,
                       element = "node", 
                       node_label = "Company", 
                       source_property = "text", 
                       )    
    
    # [r:KNOWS]: create embeddings only for nodes missing them
    helper_neo4j.vectorize_property(runner = session.run,
                       element = "relationship",
                       rel_type = "KNOWS",
                       source_property = "text",
                       )        

[32m
Generating embeddings for (n:Person) on n.text[0m
[32m
Generating embeddings[0m
  input text: 'Guillermo is a male of 26 years old and studied In'...
  emb vec: [-0.023262369, 0.05322417, -0.15030223, 0.0006475813, -0.024272997, 0.068250276, 0.06251268, -0.028424125, -0.00060151465, 0.016591795]

[32m
Generating embeddings[0m
  input text: 'Gabriela is a female of 26 years old and studied P'...
  emb vec: [0.028215451, 0.02993047, -0.18000375, 0.032270715, -0.040491913, 0.081315376, 0.0066461083, -0.050384577, -0.04074791, -0.018135754]

[32m
Generating embeddings[0m
  input text: 'Paula is a female of 25 years old and studied Comp'...
  emb vec: [0.011417965, 0.040603343, -0.16852917, -0.01312405, -0.036922883, 0.08387193, -0.01419786, -0.018175317, -0.05579591, 0.006982089]

[32m
Generating embeddings[0m
  input text: 'Cristina is a female of 27 years old and studied P'...
  emb vec: [0.032263294, -0.0020466913, -0.157968, -0.01176661, -0.00407553, 0.068459205, 0.02842

### 4. Search 

Whenever we query this graph, we can use two different but complementary search techniques:

1. **KG Retreival**: through **Neo4J Cypher Query Language (CQL)** we can query precise entities and relations. The input query must be translated into CQL to get the desired results.

2. **Vector Retrieval**: **embedding the input query**, we can make a vector search against the vector indexes defined above.

The results will be a combination of both searches.

<p align="center">
  <img src="media/KGRAG_schema.svg">
</p>


In [12]:
# KG RAG Search
import json
with driver.session(database=NEO4J_DB) as session:
  
  # Query Nodes
  result = helper_neo4j.neo4j_KGRAG_search(runner = session.run, 
                               element = "node",
                               query = "Who shaved its head this summer?", 
                               index = "person_node_idx",
                               source_property = "text",
                               main_property = "name",
                               top_k = 5
                               )
  pprint(result, width = 200, sort_dicts=False, indent=2)
  file = "data/friends/friends_context_1.txt"
  with open(file, 'w', encoding='utf-8') as f:
    f.write(result.get("combined_context", ""))


  result  = helper_neo4j.neo4j_KGRAG_search(runner = session.run, 
                                element = "node",
                                query = "Which company investigates Cancer?",
                                index = "company_node_idx",
                                source_property = "text",
                                main_property = "name",
                                top_k = 5
                                )
  
  pprint(result, width = 200, sort_dicts=False, indent=2)
  file = "data/friends/friends_context_2.txt"
  with open(file, 'w', encoding='utf-8') as f:
    f.write(result.get("combined_context", ""))




[32m
Generating embeddings[0m
  input text: 'Who shaved its head this summer?'...
  emb vec: [0.022573026, -0.015476549, -0.17212495, 0.0018683294, -0.038688686, 0.044817436, 0.01842398, 0.013745231, 0.050375726, 0.008345599]

[32m
Running vector search query[0m
{ 'query': 'Who shaved its head this summer?',
  'total_results': 0,
  'raw_search_results': [ { 'score': 0.8374781608581543,
                            'label': ['Person'],
                            'properties_dict': { 'text': 'Guillermo is a male of 26 years old and studied Industrial Engineering.Guillermo has brown eyes and short hair. He has a very fancy shirt '
                                                         'that he takes to all important events. He shaved his head this summer.',
                                                 'location': POINT(-3.692355 40.455022),
                                                 'name': 'Guillermo',
                                                 'age': 26,
          

In [13]:

with driver.session(database=NEO4J_DB) as session:
# Query Relationships
  result  = helper_neo4j.neo4j_KGRAG_search(runner = session.run,
                                element = "relationship",
                                query = "Who is helping Iria at work?",
                                index = "know_relationship_idx",
                                source_property = "text",
                                main_property = "name",
                                top_k = 5
                                )
  pprint(result, width = 200, sort_dicts=False, indent=2)
  file = "data/friends/friends_context_3.txt"
  with open(file, 'w', encoding='utf-8') as f:
    f.write(result.get("combined_context", ""))

[32m
Generating embeddings[0m


  input text: 'Who is helping Iria at work?'...
  emb vec: [-0.039116025, 0.022910928, -0.2054694, -0.0038209686, 0.038757026, -0.014443832, 0.0032659601, -0.04491874, -0.034365475, 0.03448219]

[32m
Running vector search query[0m
{ 'query': 'Who is helping Iria at work?',
  'total_results': 0,
  'raw_search_results': [ { 'score': 0.9196758270263672,
                            'type': 'KNOWS',
                            'properties_dict': {'text': 'Paula is helping Iria with the project she is working on.', 'knows_from': 'work'},
                            'facts': ['Iria -[KNOWS {"knows_from":"work","text":"Paula is helping Iria with the project she is working on."}]-> Paula']},
                          { 'score': 0.8709979057312012,
                            'type': 'KNOWS',
                            'properties_dict': {'text': 'Guillermo and Iria work together in the same project with AI Agents.', 'knows_from': 'work'},
                            'facts': ['Iria -[KNOWS {

In [14]:
driver.close()
print("Driver closed.")

Driver closed.
