In [1]:
import os
import pandas as pd
import ast
from dotenv import load_dotenv
from neo4j import GraphDatabase
from IPython.display import Markdown, display

# Importing Langchain
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI
from langchain_ollama.llms import OllamaLLM
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_community.graphs import Neo4jGraph
from typing import List, Tuple, Dict, Any
from langchain.prompts import PromptTemplate
import ast
import logging
from tenacity import retry, stop_after_attempt, wait_exponential
import re
from tqdm import tqdm
from langchain.chains import GraphCypherQAChain

from langchain_google_genai import ChatGoogleGenerativeAI

import warnings
warnings.filterwarnings('ignore')

load_dotenv()

# api_key = os.getenv("GOOGLE_API_KEY") # if you are using Google API

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
movies = pd.read_csv('data/imdb_top_1000.csv')

movies.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


In [3]:
# Set up Neo4j connection
class Neo4jConnection:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()
        print("Connection closed")

    def reset_database(self):
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
        print("Database resetted successfully!")

    def add_document(self, documents: list):
        self.driver.add_graph_documents(documents)

    def execute_query(self, query, parameters=None):
        with self.driver.session() as session:
            result = session.run(query, parameters or {})
            return [record for record in result]

# Connect to Neo4j
uri = "bolt://localhost:7687"
user = "neo4j"
password = "ilovemovies"
conn = Neo4jConnection(uri, user, password)

In [4]:
# just making sure the database is empty
conn.reset_database()

Database resetted successfully!


# Manual Upload

In [5]:
# Function to Load DataFrame into Neo4j
def parse_int(x):
    try:
        return int(x)
    except:
        return None
    
def parse_float(x):
    try:
        return float(x)
    except:
        return None
    
def load_movies_to_neo4j(movies_df, connection):
    for _, row in movies_df.iterrows():
        try:
            # Create Movie Node
            connection.execute_query( 
                """
                MERGE (movie:Movie {title: $title})
                SET movie.year = $year,
                    movie.rating = $rating,
                    movie.genre = $genre,
                    movie.runtime = $runtime,
                    movie.overview = $overview;
                """,
                parameters={
                    "title": row["Series_Title"],
                    "year": parse_int(row["Released_Year"]),
                    "rating": parse_float(row["IMDB_Rating"]),
                    "genre": row["Genre"],
                    "runtime": row["Runtime"],
                    "overview": row["Overview"],
                },
            )

            # Create Director Node and Relationship
            connection.execute_query(
                """
                MERGE (director:Director {name: $name})
                MERGE (movie:Movie {title: $title})
                MERGE (director)-[:DIRECTED]->(movie);
                """,
                parameters={
                    "name": row["Director"],
                    "title": row["Series_Title"],
                },
            )

            # Create Actor Nodes and Relationships
            for actor in [row["Star1"], row["Star2"], row["Star3"], row["Star4"]]:
                connection.execute_query(
                    """
                    MERGE (actor:Actor {name: $name})
                    MERGE (movie:Movie {title: $title})
                    MERGE (actor)-[:ACTED_IN]->(movie);
                    """,
                    parameters={
                        "name": actor,
                        "title": row["Series_Title"],
                    },
                )
        except Exception as e:
            print(f"Error loading {row['Series_Title']} to Neo4j: {e}\nRow: {row}")

# Load DataFrame to Neo4j
load_movies_to_neo4j(movies, conn)

In [6]:
query = """
MATCH (m:Movie)-[:ACTED_IN]-(a:Actor)
RETURN m.title, a.name
LIMIT 10;
"""
conn.execute_query(query)

[<Record m.title='The Shawshank Redemption' a.name='William Sadler'>,
 <Record m.title='The Shawshank Redemption' a.name='Bob Gunton'>,
 <Record m.title='The Shawshank Redemption' a.name='Morgan Freeman'>,
 <Record m.title='The Shawshank Redemption' a.name='Tim Robbins'>,
 <Record m.title='The Godfather' a.name='Diane Keaton'>,
 <Record m.title='The Godfather' a.name='James Caan'>,
 <Record m.title='The Godfather' a.name='Al Pacino'>,
 <Record m.title='The Godfather' a.name='Marlon Brando'>,
 <Record m.title='The Dark Knight' a.name='Michael Caine'>,
 <Record m.title='The Dark Knight' a.name='Aaron Eckhart'>]

![Movies plotted on KG](imgs/manual_graph.jpeg)

In [7]:
conn.reset_database()

Database resetted successfully!


In [8]:
# Initialize Google Generative AI
# llm = GoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key) # if you are using Google API
llm = OllamaLLM(model="qwen2.5-coder:latest")

df = movies.copy()

# Step 1: Define Node Labels and Properties
node_structure = "\n".join(
    [f"{col}: {', '.join(map(str, df[col].unique()[:3]))}..." for col in df.columns]
)

print(node_structure)

Poster_Link: https://m.media-amazon.com/images/M/MV5BMDFkYTc0MGEtZmNhMC00ZDIzLWFmNTEtODM1ZmRlYWMwMWFmXkEyXkFqcGdeQXVyMTMxODk2OTU@._V1_UX67_CR0,0,67,98_AL_.jpg, https://m.media-amazon.com/images/M/MV5BM2MyNjYxNmUtYTAwNi00MTYxLWJmNWYtYzZlODY3ZTk3OTFlXkEyXkFqcGdeQXVyNzkwMjQ5NzM@._V1_UY98_CR1,0,67,98_AL_.jpg, https://m.media-amazon.com/images/M/MV5BMTMxNTMwODM0NF5BMl5BanBnXkFtZTcwODAyMTk2Mw@@._V1_UX67_CR0,0,67,98_AL_.jpg...
Series_Title: The Shawshank Redemption, The Godfather, The Dark Knight...
Released_Year: 1994, 1972, 2008...
Certificate: A, UA, U...
Runtime: 142 min, 175 min, 152 min...
Genre: Drama, Crime, Drama, Action, Crime, Drama...
IMDB_Rating: 9.3, 9.2, 9.0...
Overview: Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency., An organized crime dynasty's aging patriarch transfers control of his clandestine empire to his reluctant son., When the menace known as the Joker wreaks havoc and chaos on the people of Gotha

In [9]:
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def validate_node_definition(node_def: Dict) -> bool:
    """Validate node definition structure"""
    if not isinstance(node_def, dict):
        return False
    return all(
        isinstance(v, dict) and all(isinstance(k, str) for k in v.keys())
        for v in node_def.values()
    )

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def get_node_definitions(chain, structure: str, example: Dict) -> Dict[str, Dict[str, str]]:
    """Get node definitions with retry logic"""
    try:
        # Get response from LLM
        response = chain.invoke({"structure": structure, "example": example})
        
        # Parse response
        node_defs = ast.literal_eval(response)
        
        # Validate structure
        if not validate_node_definition(node_defs):
            raise ValueError("Invalid node definition structure")
            
        return node_defs
        
    except (ValueError, SyntaxError) as e:
        logger.error(f"Error parsing node definitions: {e}")
        raise

# Updated node definition template
node_example = {
    "NodeLabel1": {"property1": "row['property1']", "property2": "row['property2']"},
    "NodeLabel2": {"property1": "row['property1']", "property2": "row['property2']"},
    "NodeLabel3": {"property1": "row['property1']", "property2": "row['property2']"},
}

define_nodes_prompt = PromptTemplate(
    input_variables=["example", "structure"],
    template=("""
        Analyze the dataset structure below and extract the entity labels for nodes and their properties.\n
        The node properties should be based on the dataset columns and their values.\n
        Return the result as a dictionary where the keys are the node labels and the values are the node properties.\n\n
        Example: {example}\n\n
        
        Dataset Structure:\n{structure}\n\n
              
        Make sure to include all the possible node labels and their properties.\n
        If a property can be its own node, include it as a separate node label.\n
        Please do not report triple backticks to identify a code block, just return the list of tuples.\n
        Return only the dictionary containing node labels and properties, and don't include any other text or quotation.
        
        """
    ),
)

# Execute with error handling
try:
    node_chain = define_nodes_prompt | llm

    node_definitions = get_node_definitions(node_chain, structure=node_structure, example=node_example)
    logger.info(f"Node Definitions: {node_definitions}")
except Exception as e:
    logger.error(f"Failed to get node definitions: {e}")
    raise

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:__main__:Node Definitions: {'Movie': {'Poster_Link': 'row["Poster_Link"]', 'Series_Title': 'row["Series_Title"]', 'Released_Year': 'row["Released_Year"]', 'Certificate': 'row["Certificate"]', 'Runtime': 'row["Runtime"]', 'Genre': 'row["Genre"]', 'IMDB_Rating': 'row["IMDB_Rating"]', 'Overview': 'row["Overview"]', 'Meta_score': 'row["Meta_score"]'}, 'Director': {'Director_Name': 'row["Director"]'}, 'Star': {'Star1': 'row["Star1"]', 'Star2': 'row["Star2"]', 'Star3': 'row["Star3"]', 'Star4': 'row["Star4"]'}}


In [11]:
class RelationshipIdentifier:
    """Identifies relationships between nodes in a graph database."""
    
    RELATIONSHIP_EXAMPLE = [
        ("NodeLabel1", "RelationshipLabel", "NodeLabel2"),
        ("NodeLabel1", "RelationshipLabel", "NodeLabel3"),
        ("NodeLabel2", "RelationshipLabel", "NodeLabel3"),
    ]


    PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["structure", "node_definitions", "example"],
    template="""
        Consider the following Dataset Structure:\n{structure}\n\n

        Consider the following Node Definitions:\n{node_definitions}\n\n

        Based on the dataset structure and node definitions, identify relationships (edges) between nodes.\n
        Return the relationships as a list of triples where each triple contains the start node label, relationship label, and end node label, and each triple is a tuple.\n
        Please return only the list of tuples. Please do not report triple backticks to identify a code block, just return the list of tuples.\n\n

        Example:\n{example}
        """
)

    def __init__(self, llm: Any, logger: logging.Logger = None):
        self.llm = llm
        self.logger = logger or logging.getLogger(__name__)
        self.chain = self.PROMPT_TEMPLATE | self.llm

    def validate_relationships(self, relationships: List[Tuple]) -> bool:
        """Validate relationship structure."""
        return all(
            isinstance(rel, tuple) and 
            len(rel) == 3 and 
            all(isinstance(x, str) for x in rel)
            for rel in relationships
        )

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
    def identify_relationships(self, structure: str, node_definitions: Dict) -> List[Tuple]:
        """Identify relationships with retry logic."""
        try:
            # response = self.chain.run(
            #     structure=structure,
            #     node_definitions=str(node_definitions),
            #     example=str(self.RELATIONSHIP_EXAMPLE)
            # )
            response = self.chain.invoke({
                "structure": structure, 
                "node_definitions": str(node_definitions), 
                "example": str(self.RELATIONSHIP_EXAMPLE)
            })
            
            relationships = ast.literal_eval(response)
            
            if not self.validate_relationships(relationships):
                raise ValueError("Invalid relationship structure")
                
            self.logger.info(f"Identified {len(relationships)} relationships")
            return relationships
            
        except Exception as e:
            self.logger.error(f"Error identifying relationships: {e}")
            raise

    def get_relationship_types(self) -> List[str]:
        """Extract unique relationship types."""
        return list(set(rel[1] for rel in self.identify_relationships()))

# Usage
identifier = RelationshipIdentifier(llm=llm)
relationships = identifier.identify_relationships(node_structure, node_definitions)
print("Relationships:", relationships)

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:__main__:Identified 5 relationships


Relationships: [('Movie', 'Directed By', 'Director'), ('Movie', 'Starring', 'Star1'), ('Movie', 'Starring', 'Star2'), ('Movie', 'Starring', 'Star3'), ('Movie', 'Starring', 'Star4')]


In [12]:
class CypherQueryBuilder:
    """Builds Cypher queries for Neo4j graph database."""

    INPUT_EXAMPLE = """
    NodeLabel1: value1, value2
    NodeLabel2: value1, value2
    """
    
    EXAMPLE_CYPHER = example_cypher = """
    CREATE (n1:NodeLabel1 {property1: "row['property1']", property2: "row['property2']"})
    CREATE (n2:NodeLabel2 {property1: "row['property1']", property2: "row['property2']"})
    CREATE (n1)-[:RelationshipLabel]->(n2);
    """

    PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["structure", "node_definitions", "relationships", "example"],
    template="""
        Consider the following Node Definitions:\n{node_definitions}\n\n
        Consider the following Relationships:\n{relationships}\n\n
        Generate Cypher queries to create nodes and relationships using the node definitions and relationships below. Remember to replace the placeholder values with actual data from the dataset.\n
        Include all the properties in the Node Definitions for each node as defined and create relationships.\n
        Return a single string with each query separated by a semicolon.\n
        Don't include any other text or quotation marks in the response.\n
        Please return only the string containing Cypher queries. Please do not report triple backticks to identify a code block.\n\n

        Example Input:\n{input}\n\n

        Example Output Cypher query:\n{cypher}
    """
)

    def __init__(self, llm: Any, logger: logging.Logger = None):
        self.llm = llm
        self.logger = logger or logging.getLogger(__name__)
        # self.chain = LLMChain(llm=llm, prompt=self.PROMPT_TEMPLATE)
        self.chain = self.PROMPT_TEMPLATE | self.llm

    def validate_cypher_query(self, query: str) -> bool:
        """Validate Cypher query syntax using LLM and regex patterns."""
        
        VALIDATION_PROMPT = PromptTemplate(
            input_variables=["query"],
            template="""
            Validate this Cypher query and return TRUE or FALSE:
            
            Query: {query}
            
            Rules to check:
            1. Valid CREATE statements
            2. Proper property formatting
            3. Valid relationship syntax
            4. No missing parentheses
            5. Valid property names
            6. Valid relationship types
            
            Return only TRUE if query is valid, FALSE if invalid.
            """
        )
        
        try:
            # Basic pattern validation
            basic_valid = all(re.search(pattern, query) for pattern in [
                r'CREATE \(',  
                r'\{.*?\}',    
                r'\)-\[:.*?\]->'
            ])
            
            if not basic_valid:
                return False
                
            # LLM validation
            validation_chain = VALIDATION_PROMPT | self.llm
            result = validation_chain.invoke({"query": query})
            
            # Parse result
            is_valid = "TRUE" in result.upper()
            
            if not is_valid:
                self.logger.warning(f"LLM validation failed for query: {query}")
                
            return is_valid
            
        except Exception as e:
            self.logger.error(f"Validation error: {e}")
            return False

    def sanitize_query(self, query: str) -> str:
        """Sanitize and format Cypher query."""
        return (query
                .strip()
                .replace('\n', ' ')
                .replace('  ', ' ')
                .replace("'row[", "row['")
                .replace("]'", "']"))

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
    def build_queries(self, node_definitions: Dict, relationships: List) -> str:
        """Build Cypher queries with retry logic."""
        try:
            response = self.chain.invoke({
                "node_definitions": str(node_definitions),
                "relationships": str(relationships),
                "input": self.INPUT_EXAMPLE,
                "cypher": self.EXAMPLE_CYPHER
            })

            # Get response inside triple backticks
            if '```' in response:
                response = response.split('```')[1]

            
            # Sanitize response
            queries = self.sanitize_query(response)
            
            # Validate queries
            if not self.validate_cypher_query(queries):
                raise ValueError("Invalid Cypher query syntax")
                
            self.logger.info("Successfully generated Cypher queries")
            return queries
            
        except Exception as e:
            self.logger.error(f"Error building Cypher queries: {e}")
            raise

    def split_queries(self, queries: str) -> List[str]:
        """Split combined queries into individual statements."""
        return [q.strip() for q in queries.split(';') if q.strip()]

# Usage
builder = CypherQueryBuilder(llm=llm)
cypher_queries = builder.build_queries(node_definitions, relationships)
print("Cypher Queries:", cypher_queries)

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
ERROR:__main__:Error building Cypher queries: Invalid Cypher query syntax
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:__main__:Successfully generated Cypher queries


Cypher Queries: CREATE (m:Movie {Poster_Link: "row['Poster_Link']", Series_Title: "row['Series_Title']", Released_Year: "row['Released_Year']", Certificate: "row['Certificate']", Runtime: "row['Runtime']", Genre: "row['Genre']", IMDB_Rating: "row['IMDB_Rating']", Overview: "row['Overview']", Meta_score: "row['Meta_score']"}) CREATE (d:Director {Director_Name: "row['Director']"}) CREATE (s1:Star {Star1: "row['Star1']"}) CREATE (s2:Star {Star2: "row['Star2']"}) CREATE (s3:Star {Star3: "row['Star3']"}) CREATE (s4:Star {Star4: "row['Star4']"}) CREATE (m)-[:Directed_By]->(d) CREATE (m)-[:Starring]->(s1) CREATE (m)-[:Starring]->(s2) CREATE (m)-[:Starring]->(s3) CREATE (m)-[:Starring]->(s4)


In [13]:
# Iterate over dataframe with progress bar
logs = ""
total_rows = len(df)
def sanitize_value(value):
    if isinstance(value, str):
        return value.replace('"', '')
    return str(value)

for index, row in tqdm(df.iterrows(), 
                      total=total_rows,
                      desc="Loading data to Neo4j",
                      position=0,
                      leave=True):
    
    # Replace placeholders with actual values
    cypher_query = cypher_queries
    for column in df.columns:
        cypher_query = cypher_query.replace(
            f"row['{column}']", 
            f'{sanitize_value(row[column])}'
        )
    
    try:
        # Execute query and update progress
        conn.execute_query(cypher_query)
    except Exception as e:
        logs += f"Error on row {index+1}: {str(e)}\n"

# Display logs
# print(logs) # Uncomment to display logs

Loading data to Neo4j:   0%|          | 0/1000 [00:00<?, ?it/s]

Loading data to Neo4j: 100%|██████████| 1000/1000 [00:12<00:00, 79.70it/s]


In [15]:
query = """
MATCH p=(m:Movie)-[r]-(n)
RETURN p
LIMIT 5;
"""
conn.execute_query(query)

[<Record p=<Path start=<Node element_id='4:de1e008d-1c09-4c07-8b57-22301c3318fb:7256' labels=frozenset({'Movie'}) properties={'Runtime': '142 min', 'Meta_score': '80.0', 'Released_Year': '1994', 'Series_Title': 'The Shawshank Redemption', 'IMDB_Rating': '9.3', 'Overview': 'Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.', 'Genre': 'Drama', 'Poster_Link': 'https://m.media-amazon.com/images/M/MV5BMDFkYTc0MGEtZmNhMC00ZDIzLWFmNTEtODM1ZmRlYWMwMWFmXkEyXkFqcGdeQXVyMTMxODk2OTU@._V1_UX67_CR0,0,67,98_AL_.jpg', 'Certificate': 'A'}> end=<Node element_id='4:de1e008d-1c09-4c07-8b57-22301c3318fb:7258' labels=frozenset({'Star'}) properties={'Star1': 'Tim Robbins'}> size=1>>,
 <Record p=<Path start=<Node element_id='4:de1e008d-1c09-4c07-8b57-22301c3318fb:7256' labels=frozenset({'Movie'}) properties={'Runtime': '142 min', 'Meta_score': '80.0', 'Released_Year': '1994', 'Series_Title': 'The Shawshank Redemption', 'IMDB_Rating': '9.3', 

![custom_graph_builder.jpeg](imgs/custom_graph_builder.jpeg)

In [16]:
conn.reset_database()

Database resetted successfully!


In [17]:
llm_transformer = LLMGraphTransformer(
    llm=llm,
)

df_sample = df.head(100) # Reduce sample size for faster processing

documents = []
for _, row in tqdm(df_sample.iterrows(), 
                   total=len(df_sample), 
                   desc="Creating documents",
                   position=0, 
                   leave=True):
    try:
        # Format text with proper line breaks
        text = f"""Title: {row['Series_Title']}
        Director: {row['Director']}
        Stars: {', '.join([row[f'Star{i}'] for i in range(1,5)])}
        Genre: {row['Genre']}
        Overview: {row['Overview']}"""
        
        documents.append(Document(page_content=text))
        
    except KeyError as e:
        tqdm.write(f"Missing column: {e}")
    except Exception as e:
        tqdm.write(f"Error processing row: {e}")

Creating documents: 100%|██████████| 100/100 [00:00<00:00, 19944.38it/s]


In [18]:
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 

Nodes:[Node(id='The Shawshank Redemption', type='Movie', properties={}), Node(id='Morgan Freeman', type='Person', properties={}), Node(id='Drama', type='Genre', properties={}), Node(id='Tim Robbins', type='Person', properties={}), Node(id='William Sadler', type='Person', properties={}), Node(id='Bob Gunton', type='Person', properties={}), Node(id='Frank Darabont', type='Person', properties={})]
Relationships:[Relationship(source=Node(id='The Shawshank Redemption', type='Movie', properties={}), target=Node(id='Frank Darabont', type='Person', properties={}), type='DIRECTED_BY', properties={}), Relationship(source=Node(id='Tim Robbins', type='Person', properties={}), target=Node(id='The Shawshank Redemption', type='Movie', properties={}), type='ACTS_IN', properties={}), Relationship(source=Node(id='Morgan Freeman', type='Person', properties={}), target=Node(id='The Shawshank Redemption', type='Movie', properties={}), type='ACTS_IN', properties={}), Relationship(source=Node(id='Bob Gunton'

In [19]:
graph = Neo4jGraph(url=uri, username=user, password=password)
graph.add_graph_documents(graph_documents)

Cypher Query:
```Cypher
MATCH p=(m:Movie)-[r]-(n)
RETURN p;
```

![llm_graph_transformer.jpeg](imgs/llm_graph_transformer.jpeg)

In [None]:
graph.refresh_schema()

# llm_chat = ChatGoogleGenerativeAI(
#     model="gemini-1.5-pro",
#     temperature=0,
#     max_tokens=None,
#     timeout=None,
#     max_retries=2,
#     api_key=api_key
# )

CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Return every node as whole, do not return only the properties.

The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(
    llm, 
    graph=graph, 
    verbose=True, 
    allow_dangerous_requests=True, 
    return_intermediate_steps=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

# chain.run("What movie would you recommend me to cheer up my solitude?")
chain.run("Recommend me a movie produced by Quentin Tarantino.")



[1m> Entering new GraphCypherQAChain chain...[0m


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Movie)-[:DIRECTED_BY]->(p:Person {id: 'Quentin Tarantino'})
RETURN m
[0m
Full Context:
[32;1m[1;3m[{'m': {'id': 'Pulp Fiction'}}, {'m': {'id': 'Django Unchained'}}, {'m': {'id': 'Inglourious Basterds'}}][0m


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



[1m> Finished chain.[0m


"Based on the information provided, here are some movies that were produced by Quentin Tarantino:\n\n1. Pulp Fiction\n2. Django Unchained\n3. Inglourious Basterds\n\nThese films showcase Tarantino's distinctive style and are highly recommended for fans of his work."

In [None]:
conn.reset_database()
conn.close()

Database resetted successfully!
