# Knowledge Graph RAG

In [1]:
from dotenv import load_dotenv
import os
from langchain_neo4j import Neo4jGraph, Neo4jVector
from langchain_ollama import OllamaEmbeddings
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import requests

## Load environment parameters

In [2]:
load_dotenv('../docker/.env.example', override=True)

True

In [3]:
kg = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="testpass",
    database="neo4j"
)

In [4]:
kg.query("""
  CREATE VECTOR INDEX movie_tagline_embeddings IF NOT EXISTS
  FOR (m:Movie) ON (m.taglineEmbedding) 
  OPTIONS { indexConfig: {
    `vector.dimensions`: 768,
    `vector.similarity_function`: 'cosine'
  }}"""
)

[]

In [5]:
kg.query("""
  SHOW VECTOR INDEXES
  """
)

[{'id': 3,
  'name': 'movie_tagline_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Movie'],
  'properties': ['taglineEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': 0}]

In [6]:
# kg.query("""
#     MATCH (movie:Movie) WHERE movie.tagline IS NOT NULL
#     WITH movie, genai.vector.encode(
#         movie.tagline, 
#         "Ollama", 
#         {
#           model: $model_name,
#           endpoint: $ollama_endpoint,
#           config: {dimensions: $dimensions}
#         }) AS vector
#     CALL db.create.setNodeVectorProperty(movie, "taglineEmbedding", vector)
#     """, 
#     params={
#         "model_name":"nomic-embed-text",
#         "ollama_endpoint":"http://localhost:11434",
#         "dimensions":768 # 768 for nomic-embed-text, 1024 for mxbai-embed-large
#     }
# )

In [7]:
movies = kg.query("""
    MATCH (m:Movie) 
    WHERE m.tagline IS NOT NULL
    RETURN m
    LIMIT 5
""")
movies

[{'m': {'taglineEmbedding': [-0.282911092042923,
    0.9794036149978638,
    -3.6877048015594482,
    -0.354189932346344,
    0.4336017072200775,
    1.0637896060943604,
    0.35087907314300537,
    0.18152250349521637,
    0.10273163765668869,
    -0.7774709463119507,
    -0.006895020604133606,
    0.8144788146018982,
    1.0801773071289062,
    0.4219035506248474,
    0.18617992103099823,
    -1.3085798025131226,
    -0.7377946972846985,
    -1.6596382856369019,
    -1.0388531684875488,
    -0.24581396579742432,
    -1.0104713439941406,
    -1.3468214273452759,
    0.2975766956806183,
    -0.04492861032485962,
    0.48623669147491455,
    -0.006641430780291557,
    -0.9409858584403992,
    0.921699047088623,
    -1.0165927410125732,
    -0.1855076551437378,
    0.31243595480918884,
    -0.3875996470451355,
    0.37144291400909424,
    1.0376453399658203,
    1.4310789108276367,
    -0.2517165541648865,
    0.869736909866333,
    1.9336402416229248,
    0.0547456331551075,
    -0.9021

In [8]:
# First, get embeddings in Python
movies = kg.query("""
    MATCH (m:Movie) 
    WHERE m.tagline IS NOT NULL AND m.taglineEmbedding IS NULL
    RETURN elementId(m) AS id, m.tagline AS tagline
""")
# movies

In [9]:
response = requests.post(
    "http://localhost:11434/api/embeddings",
    json={"model": "nomic-embed-text", "prompt": "Evil has its winning ways"}
)
response.json()["embedding"][:5]

[0.6922191381454468,
 1.2030346393585205,
 -3.7411017417907715,
 0.557880163192749,
 1.6317228078842163]

In [10]:
for movie in movies:
    movie_id = movie['id']
    tagline = movie['tagline']
    
    # Get embedding from Ollama
    response = requests.post(
        "http://localhost:11434/api/embeddings",
        json={"model": "nomic-embed-text", "prompt": tagline}
    )
    
    if response.status_code == 200:
        embedding = response.json()["embedding"]
        
        # Update Neo4j
        kg.query("""
            MATCH (m:Movie)
            WHERE elementId(m) = $movie_id
            SET m.taglineEmbedding = $embedding
        """, params={"movie_id": movie_id, "embedding": embedding})
        
        print(f"Updated embedding for movie: {movie_id}")

Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:62
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:67
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:73
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:78
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:81
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:85
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:87
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:92
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:95
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:97
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:100
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:105
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:111
Updated embedding for movie: 4:32e55094-898c-45a9-98c9-8115e22590f2:116
Up

In [11]:
movies = kg.query("""
    MATCH (m:Movie) 
    WHERE m.tagline IS NOT NULL AND m.taglineEmbedding IS NOT NULL
    RETURN elementId(m) AS id, m.tagline AS tagline
    LIMIT 2
""")
movies

[{'id': '4:32e55094-898c-45a9-98c9-8115e22590f2:0',
  'tagline': 'Welcome to the Real World'},
 {'id': '4:32e55094-898c-45a9-98c9-8115e22590f2:9',
  'tagline': 'Free your mind'}]

In [18]:
result = kg.query("""
    MATCH (m:Movie) 
    WHERE m.tagline IS NOT NULL
    RETURN m
    LIMIT 1
    """
)
result[0].keys(), result[0]['m'].keys()

(dict_keys(['m']),
 dict_keys(['taglineEmbedding', 'tagline', 'title', 'released']))