In [1]:
import os
from string import Template
import json
from neo4j import GraphDatabase
import glob
from timeit import default_timer as timer
from dotenv import load_dotenv
from time import sleep
import os

import streamlit as st
from langchain.chains import create_retrieval_chain
from langchain.callbacks.base import BaseCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Neo4jVector
from streamlit.logger import get_logger
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from typing import Dict, List
from langchain_community.llms import Ollama
from pprint import pprint
from timeit import default_timer as timer
from langchain.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import GraphCypherQAChain

from langchain.graphs import Neo4jGraph

In [2]:
config={"ollama_base_url": "http://localhost:11434",
        "llm_name": "llama3",
        "neo4j_url": "bolt://localhost:7687",
        "neo4j_username": "neo4j",
        "neo4j_password": "password",
        "file_path": "data/people_profiles/people-profiles1.md",		
        }

In [3]:
url=config["neo4j_url"] #database url
username=config["neo4j_username"] #neo4j username
password=config["neo4j_password"] #neo4j password

In [30]:
llm = Ollama(model="llama3", temperature=0.2, base_url="http://localhost:11434")

#### END-TO-END Black Box Query Generation

#### Note the tenant label below to assist in generating the right cypher queries. Alternatively, we can do this manually for a more reliable process.

In [95]:
# Cypher generation prompt
cypher_generation_template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert Neo4j Cypher translator who converts English to Cypher based on the Neo4j Schema provided, following the instructions below:
1. Generate Cypher query compatible ONLY for Neo4j Version 5
2. Do not use EXISTS, SIZE, HAVING keywords in the cypher. Use alias when using the WITH keyword
3. Use only Nodes and relationships mentioned in the schema
4. Always do a case-insensitive and fuzzy search for any properties related search. Eg: to search for a Client, use `toLower(client.id) contains 'neo4j'`. To search for Slack Messages, use 'toLower(SlackMessage.text) contains 'neo4j'`. To search for a project, use `toLower(project.summary) contains 'logistics platform' OR toLower(project.name) contains 'logistics platform'`.)
5. Never use relationships that are not mentioned in the given schema
6. When asked about projects, Match the properties using case-insensitive matching and the OR-operator, E.g, to find a logistics platform -project, use `toLower(project.summary) contains 'logistics platform' OR toLower(project.name) contains 'logistics platform'`.

IMPORTANT! ONLY QUERY TENANTS WITH NODE LABEL "tenant2"
<|start_header_id|>user<|end_header_id|>
schema: {schema}

Question: {question}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

cypher_prompt = PromptTemplate(
    template = cypher_generation_template,
    input_variables = ["schema", "question"]
)

#### Note! To access schema, we need to enable APOC procedures. Follow this link for more: https://neo4j.com/labs/apoc/4.4/installation/#apoc-core 

In [96]:
CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
The information part contains the provided information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
If the provided information is empty, say that you don't know the answer.
Final answer should be easily readable and structured.
Information:
{context}

Question: {question}
Helpful Answer:"""

qa_prompt = PromptTemplate(
    input_variables=["context", "question"], template=CYPHER_QA_TEMPLATE
)

In [97]:
graph = Neo4jGraph(url, username, password)
print(graph.schema)

Node properties:
tenant2 {summary: STRING, additional_properties: STRING, id: STRING, name: STRING}
Person {summary: STRING, additional_properties: STRING, id: STRING, name: STRING}
Skill {name: STRING, id: STRING, summary: STRING, additional_properties: STRING}
tenant1 {summary: STRING, additional_properties: STRING, id: STRING, name: STRING}
Relationship properties:

The relationships:
(:tenant2)-[:WORKED_ON_PROJECT]->(:Skill)
(:tenant2)-[:HAS_SKILL]->(:tenant2)
(:tenant2)-[:HAS_SKILL]->(:Skill)
(:Person)-[:WORKED_ON_PROJECT]->(:Skill)
(:Person)-[:HAS_SKILL]->(:tenant2)
(:Person)-[:HAS_SKILL]->(:Skill)
(:Person)-[:HAS_SKILL]->(:tenant1)
(:tenant1)-[:WORKED_ON_PROJECT]->(:Skill)
(:tenant1)-[:HAS_SKILL]->(:Skill)
(:tenant1)-[:HAS_SKILL]->(:tenant1)
(:tenant1)-[:HAS_SKILL]->(:tenant2)


In [98]:
def query_graph(user_input):
    graph = Neo4jGraph(url, username, password)
    chain = GraphCypherQAChain.from_llm(
        llm=llm,
        graph=graph,
        verbose=True,
        return_intermediate_steps=True,
        cypher_prompt=cypher_prompt,
        qa_prompt=qa_prompt,
        )
    result = chain(user_input)
    return result

In [102]:
query_graph("I want to ask a few questions about PYTHON. Who can I approach?")["result"]



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (p:Person)-[:HAS_SKILL]->(s:Skill)
WHERE toLower(s.name) contains 'python' OR toLower(s.summary) contains 'python'
WITH p AS person
RETURN person;
[0m
Full Context:
[32;1m[1;3m[{'person': {'summary': '', 'name': 'Sarah Johnson', 'additional_properties': '', 'id': 'sarahjohnson'}}, {'person': {'summary': '', 'name': 'Sarah Johnson', 'additional_properties': '', 'id': 'sarahjohnson'}}, {'person': {'summary': '', 'name': 'Sarah Johnson', 'additional_properties': '', 'id': 'sarahjohnson'}}][0m

[1m> Finished chain.[0m


"You're looking for guidance on Python! According to the information, Sarah Johnson is the expert you can turn to for assistance. She has the necessary knowledge and expertise to help you with your Python-related queries."

### Verdict: This method of generating cypher queries isn't too relevant. This may limit the use of graph database. The LLM isn't powerful enough to generate the desired queries. We can explore *six degrees of separation*. Alternatively, we may have to fine-tune a model specifically for neo4j queries.