## Libraries

In [8]:
# Library imports per tutorial
from dotenv import load_dotenv
import os

from langchain_community.graphs import Neo4jGraph

import warnings
warnings.filterwarnings('ignore')

In [18]:
# Additional libraries to circumnavigate genai plugin unavailability except
# in Neo4j enterprise edition, which are required to generate the
# embeddings.
from openai import OpenAI
from neo4j import GraphDatabase

## Connect to resources

### Neo4j

In [2]:
# Open the file and read the lines
with open('neo4j_connection_details.txt', 'r') as file:
    lines = file.readlines()

# Initialize variables
neo4j_uri = ""
neo4j_username = ""
neo4j_password = ""
neo4j_database=""

# Process each line to extract the values
for line in lines:
    # Remove any leading/trailing whitespace
    line = line.strip()
    if line.startswith("NEO4J_URI"):
        # Extract the URI
        neo4j_uri = line.split('=')[1].strip().strip('"')
    elif line.startswith("NEO4J_USERNAME"):
        # Extract the username
        neo4j_username = line.split('=')[1].strip().strip('"')
    elif line.startswith("NEO4J_PASSWORD"):
        # Extract the password
        neo4j_password = line.split('=')[1].strip().strip('"')
    # Open the file and read the lines
with open('neo4j_connection_details.txt', 'r') as file:
    lines = file.readlines()

# Initialize variables
neo4j_uri = ""
neo4j_username = ""
neo4j_password = ""
neo4j_database=""

# Process each line to extract the values
for line in lines:
    # Remove any leading/trailing whitespace
    line = line.strip()
    if line.startswith("NEO4J_URI"):
        # Extract the URI
        neo4j_uri = line.split('=')[1].strip().strip('"')
    elif line.startswith("NEO4J_USERNAME"):
        # Extract the username
        neo4j_username = line.split('=')[1].strip().strip('"')
    elif line.startswith("NEO4J_PASSWORD"):
        # Extract the password
        neo4j_password = line.split('=')[1].strip().strip('"')
    elif line.startswith("NEO4J_DATABASE"):
        # Extract the password
        neo4j_database = line.split('=')[1].strip().strip('"')

# Display the extracted values
# print(f"NEO4J_URI: {neo4j_uri}")
# print(f"NEO4J_USERNAME: {neo4j_username}")
# print(f"NEO4J_PASSWORD: {neo4j_password}")
# print(f"NEO4J_DATABASE: {neo4j_database}")



In [3]:
# Connect to the knowledge graph in Neo4j
kg = Neo4jGraph(
    url=neo4j_uri,
    username=neo4j_username,
    password=neo4j_password,
    database=neo4j_database,
)

In [4]:
print(kg)

<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x106f8edd0>


In [79]:
print(kg.schema)

Node properties:
Movie {title: STRING, tagline: STRING, released: INTEGER}
Person {born: INTEGER, name: STRING}
Relationship properties:
ACTED_IN {roles: LIST}
REVIEWED {summary: STRING, rating: INTEGER}
The relationships:
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:WROTE]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:PRODUCED]->(:Movie)
(:Person)-[:FOLLOWS]->(:Person)
(:Person)-[:REVIEWED]->(:Movie)
(:Person)-[:WORKS_WITH]->(:Person)


In [71]:
count_people = """
MATCH (n:Person) RETURN count(n) AS numberOfPeople
"""

In [20]:
kg.query(count_people)

[{'numberOfPeople': 139}]

In [22]:
print(f"There are {kg.query(count_people)[0]['numberOfPeople']} people in this graph.")

There are 139 people in this graph.


In [11]:
get_people = """
MATCH (n:Person) RETURN n.name;
"""

In [12]:
kg.query(get_people)

[{'n.name': 'Keanu Reeves'},
 {'n.name': 'Carrie-Anne Moss'},
 {'n.name': 'Laurence Fishburne'},
 {'n.name': 'Hugo Weaving'},
 {'n.name': 'Andy Wachowski'},
 {'n.name': 'Lana Wachowski'},
 {'n.name': 'Joel Silver'},
 {'n.name': 'Emil Eifrem'},
 {'n.name': 'Charlize Theron'},
 {'n.name': 'Al Pacino'},
 {'n.name': 'Taylor Hackford'},
 {'n.name': 'Tom Cruise'},
 {'n.name': 'Jack Nicholson'},
 {'n.name': 'Demi Moore'},
 {'n.name': 'Kevin Bacon'},
 {'n.name': 'Kiefer Sutherland'},
 {'n.name': 'Noah Wyle'},
 {'n.name': 'Cuba Gooding Jr.'},
 {'n.name': 'Kevin Pollak'},
 {'n.name': 'J.T. Walsh'},
 {'n.name': 'James Marshall'},
 {'n.name': 'Christopher Guest'},
 {'n.name': 'Rob Reiner'},
 {'n.name': 'Aaron Sorkin'},
 {'n.name': 'Kelly McGillis'},
 {'n.name': 'Val Kilmer'},
 {'n.name': 'Anthony Edwards'},
 {'n.name': 'Tom Skerritt'},
 {'n.name': 'Meg Ryan'},
 {'n.name': 'Tony Scott'},
 {'n.name': 'Jim Cash'},
 {'n.name': 'Renee Zellweger'},
 {'n.name': 'Kelly Preston'},
 {'n.name': "Jerry O'Conn

In [23]:
count_movies = """
MATCH (n:Movie) RETURN count(n) AS numberOfMovies
"""

In [24]:
print(f"There are {kg.query(count_movies)[0]['numberOfMovies']} movies in this graph.")

There are 40 movies in this graph.


In [13]:
get_movies = """
MATCH (n:Movie) RETURN n.title;
"""

In [14]:
kg.query(get_movies)

[{'n.title': 'The Matrix'},
 {'n.title': 'The Matrix Reloaded'},
 {'n.title': 'The Matrix Revolutions'},
 {'n.title': "The Devil's Advocate"},
 {'n.title': 'A Few Good Men'},
 {'n.title': 'Top Gun'},
 {'n.title': 'Jerry Maguire'},
 {'n.title': 'Stand By Me'},
 {'n.title': 'As Good as It Gets'},
 {'n.title': 'What Dreams May Come'},
 {'n.title': 'Snow Falling on Cedars'},
 {'n.title': "You've Got Mail"},
 {'n.title': 'Sleepless in Seattle'},
 {'n.title': 'Joe Versus the Volcano'},
 {'n.title': 'When Harry Met Sally'},
 {'n.title': 'That Thing You Do'},
 {'n.title': 'The Replacements'},
 {'n.title': 'RescueDawn'},
 {'n.title': 'The Birdcage'},
 {'n.title': 'Unforgiven'},
 {'n.title': 'Johnny Mnemonic'},
 {'n.title': 'Cloud Atlas'},
 {'n.title': 'The Da Vinci Code'},
 {'n.title': 'V for Vendetta'},
 {'n.title': 'Speed Racer'},
 {'n.title': 'Ninja Assassin'},
 {'n.title': 'The Green Mile'},
 {'n.title': 'Frost/Nixon'},
 {'n.title': 'Hoffa'},
 {'n.title': 'Apollo 13'},
 {'n.title': 'Twister

In [26]:
# Get info about Tom Hanks
cypher = """
MATCH (person:Person {name:"Tom Hanks"}) RETURN person;
"""

kg.query(cypher)

[{'person': {'born': 1956, 'name': 'Tom Hanks'}}]

In [28]:
# Get info about movie called Cloud Atlas
cypher = """
MATCH (m:Movie {title:"Cloud Atlas"}) RETURN m;
"""

kg.query(cypher)

[{'m': {'tagline': 'Everything is connected',
   'title': 'Cloud Atlas',
   'released': 2012}}]

In [29]:
# Get released date about movie called Cloud Atlas
cypher = """
MATCH (m:Movie {title:"Cloud Atlas"}) RETURN m.released;
"""

kg.query(cypher)

[{'m.released': 2012}]

In [30]:
# Get released date and tagline about movie called Cloud Atlas
cypher = """
MATCH (cloudAtlas:Movie {title:"Cloud Atlas"})
RETURN cloudAtlas.released, cloudAtlas.tagline;
"""

kg.query(cypher)

[{'cloudAtlas.released': 2012,
  'cloudAtlas.tagline': 'Everything is connected'}]

In [32]:
# Get titles of movies released in the nineties
cypher = """
MATCH (nineties:Movie)
WHERE nineties.released > 1989
    AND nineties.released < 2000
RETURN nineties.title;
"""

kg.query(cypher)

[{'nineties.title': 'The Matrix'},
 {'nineties.title': "The Devil's Advocate"},
 {'nineties.title': 'A Few Good Men'},
 {'nineties.title': 'As Good as It Gets'},
 {'nineties.title': 'What Dreams May Come'},
 {'nineties.title': 'Snow Falling on Cedars'},
 {'nineties.title': "You've Got Mail"},
 {'nineties.title': 'Sleepless in Seattle'},
 {'nineties.title': 'Joe Versus the Volcano'},
 {'nineties.title': 'When Harry Met Sally'},
 {'nineties.title': 'That Thing You Do'},
 {'nineties.title': 'The Birdcage'},
 {'nineties.title': 'Unforgiven'},
 {'nineties.title': 'Johnny Mnemonic'},
 {'nineties.title': 'The Green Mile'},
 {'nineties.title': 'Hoffa'},
 {'nineties.title': 'Apollo 13'},
 {'nineties.title': 'Twister'},
 {'nineties.title': 'Bicentennial Man'},
 {'nineties.title': 'A League of Their Own'}]

In [35]:
# Get a sample of movies and the actors who acted in them
cypher = """
MATCH (actor:Person)-[:ACTED_IN]->(m:Movie)
RETURN actor.name, m.title
LIMIT 10;
"""

kg.query(cypher)

[{'actor.name': 'Emil Eifrem', 'm.title': 'The Matrix'},
 {'actor.name': 'Hugo Weaving', 'm.title': 'The Matrix'},
 {'actor.name': 'Laurence Fishburne', 'm.title': 'The Matrix'},
 {'actor.name': 'Carrie-Anne Moss', 'm.title': 'The Matrix'},
 {'actor.name': 'Keanu Reeves', 'm.title': 'The Matrix'},
 {'actor.name': 'Hugo Weaving', 'm.title': 'The Matrix Reloaded'},
 {'actor.name': 'Laurence Fishburne', 'm.title': 'The Matrix Reloaded'},
 {'actor.name': 'Carrie-Anne Moss', 'm.title': 'The Matrix Reloaded'},
 {'actor.name': 'Keanu Reeves', 'm.title': 'The Matrix Reloaded'},
 {'actor.name': 'Hugo Weaving', 'm.title': 'The Matrix Revolutions'}]

In [36]:
# Get Tom Hanks movies
cypher = """
MATCH (tomHanks:Person {name:"Tom Hanks"})-[:ACTED_IN]->(m:Movie)
RETURN m.title;
"""

kg.query(cypher)

[{'m.title': "You've Got Mail"},
 {'m.title': 'Apollo 13'},
 {'m.title': 'Joe Versus the Volcano'},
 {'m.title': 'That Thing You Do'},
 {'m.title': 'Cloud Atlas'},
 {'m.title': 'The Da Vinci Code'},
 {'m.title': 'Sleepless in Seattle'},
 {'m.title': 'A League of Their Own'},
 {'m.title': 'The Green Mile'},
 {'m.title': "Charlie Wilson's War"},
 {'m.title': 'Cast Away'},
 {'m.title': 'The Polar Express'}]

In [38]:
# Which other people acted in the same movies as Tom Hanks ?
cypher = """
MATCH (tom:Person {name:"Tom Hanks"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActor)
RETURN coActor.name, m.title
"""

kg.query(cypher)

[{'coActor.name': 'Parker Posey', 'm.title': "You've Got Mail"},
 {'coActor.name': 'Greg Kinnear', 'm.title': "You've Got Mail"},
 {'coActor.name': 'Meg Ryan', 'm.title': "You've Got Mail"},
 {'coActor.name': 'Steve Zahn', 'm.title': "You've Got Mail"},
 {'coActor.name': 'Dave Chappelle', 'm.title': "You've Got Mail"},
 {'coActor.name': 'Ed Harris', 'm.title': 'Apollo 13'},
 {'coActor.name': 'Kevin Bacon', 'm.title': 'Apollo 13'},
 {'coActor.name': 'Gary Sinise', 'm.title': 'Apollo 13'},
 {'coActor.name': 'Bill Paxton', 'm.title': 'Apollo 13'},
 {'coActor.name': 'Nathan Lane', 'm.title': 'Joe Versus the Volcano'},
 {'coActor.name': 'Meg Ryan', 'm.title': 'Joe Versus the Volcano'},
 {'coActor.name': 'Liv Tyler', 'm.title': 'That Thing You Do'},
 {'coActor.name': 'Charlize Theron', 'm.title': 'That Thing You Do'},
 {'coActor.name': 'Jim Broadbent', 'm.title': 'Cloud Atlas'},
 {'coActor.name': 'Halle Berry', 'm.title': 'Cloud Atlas'},
 {'coActor.name': 'Hugo Weaving', 'm.title': 'Cloud At

In [40]:
# Find movies that Emil Eifrem acted in
cypher = """
MATCH (emil:Person {name:"Emil Eifrem"})-[actedIn:ACTED_IN]->(m:Movie)
RETURN emil.name, m.title
"""

kg.query(cypher)

[{'emil.name': 'Emil Eifrem', 'm.title': 'The Matrix'}]

In [41]:
# Delete the acted in relationships for Emil Eifrem
cypher = """
MATCH (emil:Person {name:"Emil Eifrem"})-[actedIn:ACTED_IN]->(m:Movie)
DELETE actedIn
"""

kg.query(cypher)

[]

In [44]:
# Create a new person called Andreas
cypher = """
CREATE (andreas:Person {name:"Andreas"})
RETURN andreas
"""

kg.query(cypher)

[{'andreas': {'name': 'Andreas'}}]

In [45]:
# Create new relationship between Andreas and Emil
cypher = """
MATCH (andreas:Person {name:"Andreas"}), (emil:Person {name:"Emil Eifrem"})
MERGE (andreas)-[hasRelationship:WORKS_WITH]->(emil)
RETURN andreas, hasRelationship, emil
"""

kg.query(cypher)

[{'andreas': {'name': 'Andreas'},
  'hasRelationship': ({'name': 'Andreas'},
   'WORKS_WITH',
   {'born': 1978, 'name': 'Emil Eifrem'}),
  'emil': {'born': 1978, 'name': 'Emil Eifrem'}}]

In [52]:
# Who does Andreas work with ?
cypher = """
MATCH (andreas:Person {name:"Andreas"})-[:WORKS_WITH]->(somePerson)
RETURN somePerson.name
"""

kg.query(cypher)

[{'somePerson.name': 'Emil Eifrem'}]

### OpenAI api

In [5]:
# Open the file and read the first line
with open('openai_api_key.txt', 'r') as file:
    # Read the first line and strip any leading/trailing whitespace
    openai_api_key = file.readline().strip()

# Save the value to the variable OPENAI_API_KEY
OPENAI_API_KEY = openai_api_key

# Display the extracted API key (optional, for verification)
# print(f"OPENAI_API_KEY: {OPENAI_API_KEY}")


In [21]:
client = OpenAI(api_key=OPENAI_API_KEY)

## Create vector index

In [81]:
# Create a vector index called 'movie_tagline_embeddings'on the 
# embedding representation of the tagline property of each movie 'taglineEmbedding'.
# The taglineEmbedding values will generated later by passing the tagline to OpenAI.
# nb: note the use of backticks in the OPTIONS section
cypher_create_index = """
CREATE VECTOR INDEX movie_tagline_embeddings IF NOT EXISTS
for (m:Movie) ON (m.taglineEmbedding)
OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
}}"""

kg.query(cypher_create_index)

[]

In [6]:
# Check to see what vector indexes exist in the knowledge graph database
cypher_inspect_indexes = """
SHOW VECTOR INDEXES
"""

kg.query(cypher_inspect_indexes)

[{'id': 3,
  'name': 'movie_tagline_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Movie'],
  'properties': ['taglineEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': 0}]

## Create embeddings and populate the index

Following code not working as the genai plugin only available in
Neo4j enterprise edition.

So let's try to generate the embeddings by calling OpenAI API from Python
then write the embeddings back to Neo4j, thereby bypassing the need to use
genai plugin.

In [12]:
# Connect to neo4j database
driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))

In [16]:
# print(driver)

In [25]:
# Function to get movies with taglines from the database
def fetch_movies_with_taglines(tx):
    result = tx.run("MATCH (movie:Movie) WHERE movie.tagline IS NOT NULL RETURN movie.tagline AS tagline, id(movie) AS id")
    return [(record["tagline"], record["id"]) for record in result]


# Function to write the embedding from openai back to neo4j movie node
def update_movie_embedding(tx, movie_id, embedding):
    # tx.run("MATCH (movie:Movie {id: $id}) SET movie.taglineEmbedding = $embedding", id=movie_id, embedding=embedding)
    tx.run("MATCH (movie:Movie) WHERE id(movie)=$movie_id SET movie.taglineEmbedding = $embedding", movie_id=movie_id, embedding=embedding)
    
# Run the process to:
# 1) Fetch movie taglines
# 2) Call openai to get the embedding
# 3) Write the embedding back to node in neo4j
with driver.session() as session:
    movies = session.read_transaction(fetch_movies_with_taglines)
    
    for tagline, movie_id in movies:
        # response = openai.Embedding.create(input=tagline, model="text-embedding-ada-002")
        response = client.embeddings.create(input=tagline, model="text-embedding-3-small")
        # embedding = response['data'][0]['embedding']
        embedding = response.data[0].embedding
        session.write_transaction(update_movie_embedding, movie_id, embedding)
    
driver.close()





In [26]:
# Inspect a sample embedding

cypher_example_embedding = """
MATCH (m:Movie)
WHERE m.tagline IS NOT NULL
RETURN m.tagline, m.taglineEmbedding
LIMIT 1
"""

result = kg.query(cypher_example_embedding)

In [28]:
result[0]["m.tagline"]

'Welcome to the Real World'

In [29]:
result[0]["m.taglineEmbedding"][:10]

[0.04398856684565544,
 -0.0010325410403311253,
 0.007483473047614098,
 0.0713852047920227,
 0.015836454927921295,
 -0.019285978749394417,
 -0.02694050222635269,
 0.016948284581303596,
 -0.03574961796402931,
 0.027995316311717033]

In [30]:
len(result[0]["m.taglineEmbedding"])

1536

## Vector similarity search using the taglineEmbeddings

In [32]:
question = "What movies are about love ?"

In [33]:
# Get embedding of the question by calling OpenAI api
response = client.embeddings.create(input=question, model="text-embedding-3-small")
question_embedding = response.data[0].embedding

In [36]:
# Perform the similarity search and return the top k matches

cypher_similarity = """
CALL db.index.vector.queryNodes(
    'movie_tagline_embeddings',
    $top_k,
    $question_embedding
    ) YIELD node AS movie, score
RETURN movie.title, movie.tagline, score
"""

kg.query(
    cypher_similarity,
    params={"top_k": 5,
           "question_embedding": question_embedding
           })


[{'movie.title': 'Joe Versus the Volcano',
  'movie.tagline': 'A story of love, lava and burning desire.',
  'score': 0.6843096017837524},
 {'movie.title': 'Snow Falling on Cedars',
  'movie.tagline': 'First loves last. Forever.',
  'score': 0.6754798889160156},
 {'movie.title': 'Twister',
  'movie.tagline': "Don't Breathe. Don't Look Back.",
  'score': 0.6463618278503418},
 {'movie.title': 'When Harry Met Sally',
  'movie.tagline': 'At odds in life... in love on-line.',
  'score': 0.6293200850486755},
 {'movie.title': "You've Got Mail",
  'movie.tagline': 'At odds in life... in love on-line.',
  'score': 0.6293200850486755}]