### Retrival Augmented Generation (RAG) using Langchain
https://python.langchain.com/docs/tutorials/graph/

### Setup OpenAI LLM

In [12]:
from langchain_openai import ChatOpenAI
import os

llm = ChatOpenAI(
    api_key = os.getenv("OPENAI_API_KEY"),
    model = "gpt-4o-mini",
    temperature = 0
)

In [13]:
from langchain_experimental.graph_transformers import LLMGraphTransformer

llm_transformer = LLMGraphTransformer(llm=llm)

In [14]:
from langchain_core.documents import Document

text = """
Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
She was, in 1906, the first woman to become a professor at the University of Paris.
"""
documents = [Document(page_content=text)]

graph_documents = llm_transformer.convert_to_graph_documents(documents)

print(f"Nodes: {graph_documents[0].nodes}")
print(f"Relationships: {graph_documents[0].relationships}")


Nodes: [Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Institution', properties={}), Node(id='Nobel Prize', type='Award', properties={}), Node(id='Radioactivity', type='Concept', properties={})]
Relationships: [Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Radioactivity', type='Concept', properties={}), type='RESEARCHED', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='MARRIED_TO', properties={}), Relationship(source=Node(id='Pierre Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={}), Relationship(source=N

In [29]:
llm_transformer_filtered = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Person", "Country", "Organization"],
    allowed_relationships=["NATIONALITY", "LOCATED_IN", "WORKED_AT", "SPOUSE"],
)
graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(
    documents
)
print(f"Nodes:{graph_documents_filtered[0].nodes}")
print(f"Relationships:{graph_documents_filtered[0].relationships}")

Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={}), Node(id='Poland', type='Country', properties={}), Node(id='France', type='Country', properties={})]
Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Poland', type='Country', properties={}), type='NATIONALITY', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='France', type='Country', properties={}), type='NATIONALITY', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(sour

In [30]:
llm_transformer_props = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Person", "Country", "Organization"],
    allowed_relationships=["NATIONALITY", "LOCATED_IN", "WORKED_AT", "SPOUSE"],
    node_properties=["born_year"],
)
graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents_props[0].nodes}")
print(f"Relationships:{graph_documents_props[0].relationships}")

Nodes:[Node(id='Marie Curie', type='Person', properties={'born_year': '1867'}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})]
Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={})]


### Building-up the Graph Database

In [22]:
from dotenv import load_dotenv

load_dotenv("dev.env")
url = os.getenv("neo4j_url")
user = os.getenv("neo4j_username")
password = os.getenv("neo4j_password")

In [None]:
from langchain_neo4j import Neo4jGraph

#Instantiate the Neo4j Graph
graph = Neo4jGraph(url, user, password)

In [26]:
#Add the graph documents, sources, and entity labels to the graph
graph.add_graph_documents(
    graph_documents,
    include_source=True,
    baseEntityLabel=True
)

graph.refresh_schema()

In [31]:
graph.add_graph_documents(graph_documents_props)

In [32]:
print(graph.get_schema)

Node properties:
Document {id: STRING, text: STRING}
Person {id: STRING}
Institution {id: STRING}
Award {id: STRING}
Concept {id: STRING}
Relationship properties:

The relationships:
(:Document)-[:MENTIONS]->(:Person)
(:Document)-[:MENTIONS]->(:Institution)
(:Document)-[:MENTIONS]->(:Award)
(:Document)-[:MENTIONS]->(:Concept)
(:Person)-[:WON]->(:Award)
(:Person)-[:RESEARCHED]->(:Concept)
(:Person)-[:MARRIED_TO]->(:Person)
(:Person)-[:PROFESSOR]->(:Institution)


### Querying the Graph Database

In [40]:
person_id = "Marie Curie"


In [41]:
# Print the graph schema
print(graph.get_schema)

results = graph.query(f"""
    MATCH (doc:Document)-[:MENTIONS]->(person:Person {{id: "{person_id}"}})
    RETURN doc
""")


print(results)

Node properties:
Document {id: STRING, text: STRING}
Person {id: STRING}
Institution {id: STRING}
Award {id: STRING}
Concept {id: STRING}
Relationship properties:

The relationships:
(:Document)-[:MENTIONS]->(:Person)
(:Document)-[:MENTIONS]->(:Institution)
(:Document)-[:MENTIONS]->(:Award)
(:Document)-[:MENTIONS]->(:Concept)
(:Person)-[:WON]->(:Award)
(:Person)-[:RESEARCHED]->(:Concept)
(:Person)-[:MARRIED_TO]->(:Person)
(:Person)-[:PROFESSOR]->(:Institution)
[{'doc': {'id': 'df48cdafbdaada2de04aaeb7c6a271a0', 'text': '\nMarie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\nShe was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\nHer husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of 

### Chaining, Graph RAG Style

In [46]:
enhanced_graph = Neo4jGraph(url, user, password,enhanced_schema=True)
print(enhanced_graph.schema)



Node properties:
- **Document**
  - `id`: STRING Available options: ['df48cdafbdaada2de04aaeb7c6a271a0']
  - `text`: STRING Available options: [' Marie Curie, born in 1867, was a Polish and natur']
- **Person**
  - `id`: STRING Available options: ['Marie Curie', 'Pierre Curie']
- **Institution**
  - `id`: STRING Available options: ['University Of Paris']
- **Award**
  - `id`: STRING Available options: ['Nobel Prize']
- **Concept**
  - `id`: STRING Available options: ['Radioactivity']
- **Organization**
  - `id`: STRING Available options: ['University Of Paris']
Relationship properties:

The relationships:
(:Document)-[:MENTIONS]->(:Person)
(:Document)-[:MENTIONS]->(:Institution)
(:Document)-[:MENTIONS]->(:Award)
(:Document)-[:MENTIONS]->(:Concept)
(:Person)-[:WON]->(:Award)
(:Person)-[:RESEARCHED]->(:Concept)
(:Person)-[:MARRIED_TO]->(:Person)
(:Person)-[:PROFESSOR]->(:Institution)
(:Person)-[:SPOUSE]->(:Person)
(:Person)-[:WORKED_AT]->(:Organization)


In [47]:
from langchain_neo4j import GraphCypherQAChain
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o", temperature=0)

chain = GraphCypherQAChain.from_llm(
    graph=enhanced_graph, llm=llm, verbose=True, allow_dangerous_requests=True
)

In [48]:
response = chain.invoke({"query": "Who is Marie Curie?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Document)-[:MENTIONS]->(p:Person {id: "Marie Curie"})
RETURN d.text
[0m
Full Context:
[32;1m[1;3m[{'d.text': '\nMarie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\nShe was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\nHer husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\nShe was, in 1906, the first woman to become a professor at the University of Paris.\n'}][0m

[1m> Finished chain.[0m


{'query': 'Who is Marie Curie?',
 'result': 'Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity. She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields. Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes. In 1906, she became the first woman to become a professor at the University of Paris.'}