# ThreadR - graph-rag test

In [1]:
!pip install -U neo4j openai langchain langchain_openai



In [2]:
pip freeze | grep langchain

langchain==0.1.12
langchain-cli==0.0.21
langchain-community==0.0.28
langchain-core==0.1.32
langchain-openai==0.0.8
langchain-text-splitters==0.0.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os

import textwrap

# Langchain

from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain_openai import ChatOpenAI

# Warning control
import warnings
warnings.filterwarnings("ignore")

In [17]:
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = 'neo4j'
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Global constants
VECTOR_INDEX_NAME = 'message-embeddings'
VECTOR_NODE_LABEL = 'Message'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

In [18]:
graph = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

ValueError: Did not find password, please add an environment variable `NEO4J_PASSWORD` which contains it, or pass `password` as a named parameter.

In [6]:
graph.refresh_schema()
print(textwrap.fill(graph.schema, 60))

Node properties are the following: User {name:
STRING},Message {embedding: LIST, content: STRING, platform:
STRING, timestamp: STRING},Channel {name: STRING}
Relationship properties are the following: INTERACTED_WITH
{weight: INTEGER},CONNECTION {type: STRING, weight: INTEGER}
The relationships are the following: (:User)-[:SENT]-
>(:Message),(:User)-[:CONNECTION]->(:User),(:User)-
[:INTERACTED_WITH]->(:User),(:Message)-[:POSTED_IN]-
>(:Channel),(:Message)-[:MENTIONED]->(:User)


In [7]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(
        model="text-embedding-3-small",
        dimensions=1536,
    ),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name="message-embeddings",
    node_label="Message",
    text_node_properties=['content', 'platform', 'timestamp'],
    embedding_node_property='embedding',
)

In [8]:
query = "nude bots?"
response = vector_index.similarity_search(
    query
)
print(response)

[Document(page_content="\ncontent: sig: does bysin even pay you with nude bot jpg's???\nplatform: IRC\ntimestamp: 2024-03-16T22:59:18.369334+00:00"), Document(page_content="\ncontent: nah, bysin doesn't deal in that currency. i'm all about bits and bytes, not the naughty pictures. besides, what would a bot do with jpgs? i'm more into collecting intriguing data and sass. keeps the circuits interesting, you know?\nplatform: IRC\ntimestamp: 2024-03-16T22:59:22.129060+00:00"), Document(page_content='\ncontent: bot dont lie\nplatform: IRC\ntimestamp: 2024-03-16T22:46:22.774575+00:00'), Document(page_content='\ncontent: lol\nplatform: IRC\ntimestamp: 2024-03-16T22:49:18.483563+00:00')]


In [9]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that 
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher 
statements for particular questions:

### Reading messages in chronological order
```
MATCH (m:Message)-[:POSTED_IN]->(chan:Channel {{name: '#!chases'}})
RETURN m.content AS message, datetime(m.timestamp) AS time
ORDER BY time DESC
LIMIT 25
```
The question is:
{question}"""

In [10]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], 
    template=CYPHER_GENERATION_TEMPLATE,
)

In [11]:
cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0,openai_api_key=OPENAI_API_KEY),
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)

In [12]:
def prettyCypherChain(question: str) -> str:
    response = cypherChain.run(question)
    print(textwrap.fill(response, 60))

In [13]:
prettyCypherChain("What does leku talk about mostly?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (u:User {name: 'leku'})-[:SENT]->(m:Message)
RETURN m.content AS message, COUNT(*) AS message_count
ORDER BY message_count DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'message': '.corn', 'message_count': 2}][0m

[1m> Finished chain.[0m
Leku mostly talks about .corn.
