# ThreadR - graph-rag test

In [1]:
!pip install -U neo4j openai langchain langchain_openai

Collecting langchain_openai
  Downloading langchain_openai-0.1.1-py3-none-any.whl.metadata (2.5 kB)
Downloading langchain_openai-0.1.1-py3-none-any.whl (32 kB)
Installing collected packages: langchain_openai
  Attempting uninstall: langchain_openai
    Found existing installation: langchain-openai 0.1.0
    Uninstalling langchain-openai-0.1.0:
      Successfully uninstalled langchain-openai-0.1.0
Successfully installed langchain_openai-0.1.1


In [None]:
import os

import textwrap

# Langchain

from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain_openai import ChatOpenAI

# Warning control
import warnings
warnings.filterwarnings("ignore")

In [3]:
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = 'neo4j'
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Global constants
VECTOR_INDEX_NAME = 'message-embeddings'
VECTOR_NODE_LABEL = 'Message'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

In [4]:
graph = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

In [5]:
graph.refresh_schema()
print(textwrap.fill(graph.schema, 60))

Node properties are the following: User {name:
STRING},Message {content: STRING, platform: STRING,
timestamp: STRING, embedding: LIST},Channel {name: STRING}
Relationship properties are the following: INTERACTED_WITH
{weight: INTEGER},CONNECTION {type: STRING, weight: INTEGER}
The relationships are the following: (:User)-[:SENT]-
>(:Message),(:User)-[:INTERACTED_WITH]->(:User),(:User)-
[:CONNECTION]->(:User),(:Message)-[:MENTIONED]-
>(:User),(:Message)-[:POSTED_IN]->(:Channel)


In [6]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(
        model="text-embedding-3-small",
        dimensions=1536,
    ),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name="message-embeddings",
    node_label="Message",
    text_node_properties=['content', 'platform', 'timestamp'],
    embedding_node_property='embedding',
)

In [None]:
query = "nude bots?"
response = vector_index.similarity_search(
    query
)
print(response)

In [8]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that 
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement. 
Do not add any newlines to the response, the response is
going to be a single line of text. All usernames are case-sensitive.

DO NOT CONVERT THE USERNAME TO UPPERCASE. DO NOT CONVERT THE 
FIRST LETTER OF THE USERNAME TO UPPERCASE.

# Correct cypher statement
```
MATCH (p1:User {{name: 'kongfuzi'}})-[:INTERACTED_WITH]-(p2:User)
```

# Incorrect cypher statement
```
MATCH (p1:User {{name: 'Kongfuzi'}})-[:INTERACTED_WITH]-(p2:User)
```

Examples: Here are a few examples of generated Cypher 
statements for particular questions:

# What channels does kongfuzi talk in?
# or "what channel do you know kongfuzi from?"
```
MATCH (u:User {{name: 'kongfuzi'}})-[:SENT]->(m:Message)-[:POSTED_IN]->(c:Channel)
RETURN DISTINCT c.name AS channel
```


### Looking at all messages related to a particular user,
### Can be used to answer questions like: "What does alice talk about?"
### or "Is vpro an alcoholic?"

```
MATCH (u:User {{name: 'alice'}})-[:SENT]->(m:Message)
RETURN m.content AS message
```

### Reading messages in chronological order
```
MATCH (m:Message)-[:POSTED_IN]->(chan:Channel {{name: '#!chases'}})
RETURN m.content AS message, datetime(m.timestamp) AS time
ORDER BY time DESC
```

### Indirect Connection Through Shared Channels
```
MATCH (a:User {{name: 'alice'}})-[:SENT|POSTED_IN]->(m:Message)-[:POSTED_IN]->(chan:Channel)<-[:POSTED_IN]-(m2:Message)<-[:SENT|POSTED_IN]-(b:User {{name: 'bob'}})
RETURN DISTINCT chan.name AS SharedChannel
```

### Indirect Connection Through Mutual Connections
```
MATCH (a:User {{name: 'alice'}})-[:INTERACTED_WITH]->(mutual:User)<-[:INTERACTED_WITH]-(b:User {{name: 'bob'}})
RETURN DISTINCT mutual.name AS MutualFriend
```

### Is alice friends with bob?
```
MATCH (a:User {{name: 'alice'}})-[:INTERACTED_WITH]-(b:User {{name: 'bob'}})
RETURN a, b
```

### Showing a complete graph
```
MATCH (chan:Channel)-[:POSTED_IN]-(msg:Message)-[:SENT]-(user:User)
OPTIONAL MATCH (msg)-[:MENTIONED]->(mentioned:User)
RETURN chan, user, msg, mentioned
```

### Show a more complete graph
```
MATCH (chan:Channel)-[:POSTED_IN]-(msg:Message)-[:SENT]-(user:User)
OPTIONAL MATCH (msg)-[:MENTIONED]->(mentioned:User)
```

### Order messages in the channel by timestamp (descending)
```
WITH chan, user, msg, mentioned
ORDER BY msg.timestamp DESC
```

### Limit results, preserving the relationships
```
WITH  chan,
      collect({{user: user, msg: msg, mentioned: mentioned}})[..25] as recentChannelActivity
UNWIND recentChannelActivity as result
RETURN chan, result.user, result.msg, result.mentioned
```
```
The question is:
{question}"""

In [9]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], 
    template=CYPHER_GENERATION_TEMPLATE,
)

In [10]:
cypherChain = GraphCypherQAChain.from_llm(
    cypher_llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0,openai_api_key=OPENAI_API_KEY),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-4-0125-preview"),
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    validate_cypher=True,
    top_k=1000,
    #return_direct=True
)

In [11]:
def prettyCypherChain(question: str) -> str:
    response = cypherChain.run(question)
    print(textwrap.fill(response, 60))

In [12]:
result = cypherChain("What does leku talk about?")
#rint(f"Intermediate steps: {result['intermediate_steps']}")
print(f"Final answer: {result['result']}")





[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (u:User {name: 'leku'})-[:SENT]->(m:Message)
RETURN m.content AS message
[0m
Full Context:

[1m> Finished chain.[0m
Final answer: Leku talks about a variety of topics, including dinner plans, IRC message character limits, and the concept of footbail. Additionally, there are discussions on food items like pork soda, asada, pastor, and tacos, breakfast foods, and the state of food shelves. Leku also shares personal preferences and statements such as liking it hot, having a preference for weiners, and making art with crayon fingers. There are mentions of upgrading technology, humorous takes on life and food, and discussions about going out, the cleanliness of places, and personal habits like wearing masks. Additionally, it delves into financial matters, personal projects, and social commentary on public spending and social issues. There's also a mix of personal anecdotes, opinions on music and en

In [13]:
prettyCypherChain("Who are leku's friends?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:User {name: 'leku'})-[:INTERACTED_WITH]-(b:User)
RETURN b.name AS Friend[0m
Full Context:
[32;1m[1;3m[{'Friend': 'td-'}, {'Friend': 'td'}, {'Friend': 'Raccoon'}, {'Friend': 'larsinio'}, {'Friend': 'FE2'}, {'Friend': 'Hyralak'}, {'Friend': 'bysin'}, {'Friend': 'sig'}, {'Friend': 'hot take'}, {'Friend': '│ 2024/03/23 18:54:47 Failed to send message chunk to #!chases'}, {'Friend': "[{'isAlcoholic'"}, {'Friend': 'pyro'}, {'Friend': 'MATCH (p1:User {{name'}, {'Friend': 'Mufasa'}, {'Friend': 'n8'}, {'Friend': '{ "action"'}, {'Friend': 'this is the entire chain'}, {'Friend': 'bts'}, {'Friend': '*** NOTICE'}, {'Friend': "[{'channel'"}, {'Friend': 'larsinio'}, {'Friend': '20:20 < leku> threadr'}, {'Friend': 'larsifon'}, {'Friend': '│ 2024/03/22 00:41:16 Failed to send message to #!chases'}, {'Friend': 'blackdevl'}, {'Friend': 'Failed to update Neo4j'}, {'Friend': 'retry'}, {'Friend': "{'message'"}, {

In [14]:
prettyCypherChain("Who are bysin's friends?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (a:User {name: 'bysin'})-[:INTERACTED_WITH]-(b:User)
RETURN b
[0m
Full Context:
[32;1m[1;3m[{'b': {'name': 'leku'}}, {'b': {'name': 'larsinio'}}, {'b': {'name': 'Raccoon'}}, {'b': {'name': 'FE2'}}, {'b': {'name': 'sig'}}, {'b': {'name': 'blackdevl'}}, {'b': {'name': 'Mercer5'}}, {'b': {'name': 'Hyralak'}}, {'b': {'name': 'threadr'}}, {'b': {'name': 'FE2'}}, {'b': {'name': 'viral'}}, {'b': {'name': 'leku'}}, {'b': {'name': 'vpro'}}][0m

[1m> Finished chain.[0m
Leku, Larsinio, Raccoon, FE2, Sig, Blackdevl, Mercer5,
Hyralak, Threadr, Viral, and Vpro are bysin's friends.


In [15]:
prettyCypherChain("How well does raccoon know sig?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (p1:User {name: 'raccoon'})-[:INTERACTED_WITH]-(p2:User {name: 'sig'})
[0m


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Query cannot conclude with MATCH (must be a RETURN clause, an update clause, a unit subquery call, or a procedure call with no YIELD) (line 2, column 1 (offset: 1))
"MATCH (p1:User {name: 'raccoon'})-[:INTERACTED_WITH]-(p2:User {name: 'sig'})"
 ^}

In [None]:
prettyCypherChain("How well does leku know sig?")

In [None]:
prettyCypherChain("What does bysin talk about mostly?")

In [None]:
prettyCypherChain("What are bysin's hobbies?")

In [None]:
prettyCypherChain("Is bysin an alcoholic?")