In [2]:
import os
from langchain_neo4j import Neo4jGraph, GraphCypherQAChain
from langchain_ollama import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document

In [4]:
os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "neo4j_password"

In [35]:
# Set LLM
# llama3.3 has 70B params (see:https://github.com/ollama/ollama?tab=readme-ov-file)
# llama3.2 has 
llm = ChatOllama(model="llama3.1", temperature=0)

In [7]:
# Create a Driver for our Neo4j graph and load movies csv file into the graph db
# using a Cypher query

our_graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="neo4j_password"
)

# To add a local csv file, place the file in Neo4j's import directory
movies_query = """
LOAD CSV WITH HEADERS FROM 'file:///movies_small.csv'
AS row
MERGE (m:Movie {id:row.movieId})
SET m.released = date(row.released),
    m.title = row.title,
    m.imdbRating = toFloat(row.imdbRating)
FOREACH (director in split(row.director, '|') | 
    MERGE (p:Person {name:trim(director)})
    MERGE (p)-[:DIRECTED]->(m))
FOREACH (actor in split(row.actors, '|') | 
    MERGE (p:Person {name:trim(actor)})
    MERGE (p)-[:ACTED_IN]->(m))
FOREACH (genre in split(row.genres, '|') | 
    MERGE (g:Genre {name:trim(genre)})
    MERGE (m)-[:IN_GENRE]->(g))
"""

# Query the graph with the movies_query Cypher command created above
# this creates/populates the DB
our_graph.query(movies_query)

[]

In [8]:
# Print the schema of the graph
our_graph.refresh_schema()
print(our_graph.get_schema)

Node properties:
Movie {id: STRING, released: DATE, title: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)


In [9]:
# GraphQACypherChain
# 1. takes in a query
# 2. turns it into a Cypher command with the help of the LLM
# 3. queries our graph with it
# 4. returns result

chain = GraphCypherQAChain.from_llm(
    graph=our_graph, llm=llm, verbose=True, allow_dangerous_requests=True
)
response = chain.invoke({"query": "Who acted in Heat?"})
print(response['result'])




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:ACTED_IN]->(m:Movie {title: "Heat"}) RETURN p.name[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}, {'p.name': 'Val Kilmer'}, {'p.name': 'Jon Voight'}][0m

[1m> Finished chain.[0m
Al Pacino, Robert De Niro, Val Kilmer, Jon Voight.


### Now we will use the same LLM that we have been querying to construct a new KG automatically from a text corpus

In [36]:
# First open files and save them as LangChain documents
directory_path = "../docs" # corpus is in parent file of this github repository in a file called docs
documents = []

# Iterate through all text files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".txt"):  # all corpus files are .txt, this line ensures we are only opening those files
        file_path = os.path.join(directory_path, filename)
        with open(file_path, "r", encoding="utf-8") as file:
            paragraphs = file.read().split("\n\n") # split into paragraphs to better fit context length
            for paragraph in paragraphs:
                # Create a LangChain Document
                doc = Document(page_content=paragraph)
        
                # Append to documents list
                documents.append(doc)


In [34]:
print(documents[1])

page_content='Russell Vought is the brain behind Donald Trump’s executive order blitz. The now-director of the Office of Management and Budget (OMB) spent months before the election drafting plans in secret as part of Project 2025.'


In [37]:
documents = [documents[1]]

In [38]:
# Use llm-transformer
llm_transformer = LLMGraphTransformer(llm=llm)
graph_documents = llm_transformer.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

# Add the graph documents to the main graph:
#graph.add_graph_documents(graph_documents, baseEntityLabel=True)

Nodes:[Node(id='Russell Vought', type='Person', properties={}), Node(id='Donald Trump', type='Person', properties={}), Node(id='Office Of Management And Budget', type='Organization', properties={})]
Relationships:[]
