https://python.langchain.com/v0.1/docs/integrations/graphs/neo4j_cypher/


https://neo4j.com/developer-blog/knowledge-graph-llama-nvidia-langchain/

1. Environment Setup

We will retain the environment setup to load API keys and Neo4j connection details.

In [1]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase
from langchain_community.chat_models import ChatOllama
from typing import Optional
from langchain.schema import AIMessage, SystemMessage, HumanMessage
from langchain_community.embeddings import OllamaEmbeddings
from langchain.embeddings.base import Embeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
import numpy as np
from langchain_core.tools import tool
import torch
torch.cuda.empty_cache()  # Clear GPU memory


# 2. Environment Setup
def load_env_vars():
    """
    Load environment variables from the .env file.
    """
    load_dotenv()  # Load .env file into environment variables
    env_vars = {
        "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
        "HUGGINGFACE_API_KEY": os.getenv("HUGGINGFACE_API_KEY"),
        "NOMIC_EMBEDDINGS_API_KEY": os.getenv("NOMIC_EMBEDDINGS_API_KEY"),  # Retained for consistency but unused
        "NEO4J_URI": os.getenv("NEO4J_URI", "bolt://44.222.222.181"),
        "NEO4J_USERNAME": os.getenv("NEO4J_USERNAME", "neo4j"),
        "NEO4J_PASSWORD": os.getenv("NEO4J_PASSWORD"),
        "NEO4J_DATABASE": os.getenv("NEO4J_DATABASE", "neo4j")
    }
    missing_keys = [key for key, value in env_vars.items() if not value]
    if missing_keys:
        raise EnvironmentError(f"Missing environment variables: {missing_keys}")
    return env_vars

# 3. Neo4j Connection
class Neo4jConnection:
    """
    Neo4j connection class for querying the database.
    """
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        """Close the Neo4j connection."""
        self.driver.close()

    def query(self, query, parameters=None):
        """Execute a query on the Neo4j database."""
        with self.driver.session() as session:
            result = session.run(query, parameters)
            return [record for record in result]

# 4. LLM and Embeddings Setup
def setup_llm_and_embeddings(env_vars):
    """
    Initialize the LLaMA 3.2 model and custom SentenceTransformer embeddings.
    """
    # Initialize the LLaMA model (assuming local model usage)
    llm = ChatOllama(model="llama3.2", temperature=0)
    
    # Initialize the custom SentenceTransformer embeddings model
    embeddings_model = OllamaEmbeddings(model="llama3.2")
    
    return llm, embeddings_model

# 5. Embedding and Storing Movies
def embed_and_store_movies(embeddings_model, conn):
    """
    Fetch top movies, generate embeddings, and store them in the Neo4j database.
    """
    query = """
    MATCH (m:Movie)
    WHERE m.imdbRating IS NOT NULL
    WITH m
    ORDER BY m.imdbRating DESC
    LIMIT 1000
    RETURN m
    """
    result = conn.query(query)

    # Prepare data for embeddings
    movies_data = [f"Plot: {r['m']['plot']}, Title: {r['m']['title']}, Year: {r['m']['year']}, Rating: {r['m']['imdbRating']}" for r in result]
    
    # Generate and store embeddings
    embeddings = embeddings_model.embed_documents(movies_data)
    for i, record in enumerate(result):
        conn.query(
            "MATCH (m:Movie {movieId: $movieId}) SET m.embedding = $embedding",
            parameters={"movieId": record['m']['movieId'], "embedding": embeddings[i]}
        )
    print(f"Stored embeddings for {len(result)} movies.")

# 6. Search Tool Definition
from langchain_core.tools import tool
from pydantic import Field

@tool
def search_movie_by_description_tool(
    description: str = Field(description="Movie description."),
    min_year: Optional[int] = Field(description="Minimum release year."),
    max_year: Optional[int] = Field(description="Maximum release year."),
    genre: Optional[str] = Field(description="Movie genre."),
    director: Optional[str] = Field(description="Movie director."),
):
    """
    Search for movies using the provided description and optional filters.
    """
    return search_movie_by_description(conn, embeddings_model, description, min_year, max_year, genre, director)

# 7. Search Movie by Description
def search_movie_by_description(conn, embeddings_model, description, min_year=None, max_year=None, genre=None, director=None):
    """
    Search movies in Neo4j based on description and optional filters.
    """
    query_embedding = embeddings_model.embed_query(description)
    
    query = """
    MATCH (m:Movie)
    WHERE m.embedding IS NOT NULL
    """
    filters = []
    if min_year:
        filters.append("m.year >= $min_year")
    if max_year:
        filters.append("m.year <= $max_year")
    if genre:
        filters.append("m.genre CONTAINS $genre")
    if director:
        filters.append("m.director CONTAINS $director")

    if filters:
        query += " AND " + " AND ".join(filters)
    
    query += """
    WITH m, gds.similarity.cosine(m.embedding, $embedding) AS similarity
    RETURN m
    ORDER BY similarity DESC
    LIMIT 5
    """
    
    params = {"embedding": query_embedding}
    if min_year: params["min_year"] = min_year
    if max_year: params["max_year"] = max_year
    if genre: params["genre"] = genre
    if director: params["director"] = director
    
    return conn.query(query, params)

# 8. Generate Movie Recommendation Using LLM
def generate_recommendation(llm, conn, description):
    """
    Generate movie recommendations based on a description using LLM.
    """
    prompt = f"Summarize the movie description: {description}"
    summary = llm.invoke(prompt)
    return search_movie_by_description(conn, embeddings_model, summary.content)

def format_messages_for_ollama(messages):
    """
    Convert SystemMessage, HumanMessage, and AIMessage types to
    the format supported by ChatOllama.
    """
    formatted_messages = []
    for message in messages:
        if isinstance(message, SystemMessage):
            role = "system"
        elif isinstance(message, HumanMessage):
            role = "user"
        elif isinstance(message, AIMessage):
            role = "assistant"
        else:
            raise ValueError("Received unsupported message type for Ollama.")
        
        formatted_messages.append({"role": role, "content": message.content})
    
    return formatted_messages

# 9. Question Answering Using LLM
def ask_llm_about_movie(llm, question, conn):
    """
    Answer movie-related questions using the LLaMA model and Neo4j database.
    """
    # Query for all movie data
    query = "MATCH (m:Movie) RETURN m"
    movies_data = conn.query(query)
    
    # Prepare movie information for the prompt
    movie_info = "\n".join(
        [f"Title: {m['m']['title']}, Plot: {m['m']['plot']}" for m in movies_data]
    )
    
    # Create the prompt using HumanMessage and SystemMessage
    messages = [
        SystemMessage(content="You are a movie expert."),
        HumanMessage(
            content=f"{movie_info}\n\nBased on the above movies, answer the following question: {question}"
        ),
    ]
    
    # Call the LLM and pass the messages directly
    response = llm(messages)
    return response.content


# 10. Final Execution Script
if __name__ == "__main__":
    env_vars = load_env_vars()
    llm, embeddings_model = setup_llm_and_embeddings(env_vars)

    # Set up Neo4j connection
    conn = Neo4jConnection(
        uri=env_vars["NEO4J_URI"],
        user=env_vars["NEO4J_USERNAME"],
        password=env_vars["NEO4J_PASSWORD"],
    )

    # Embed and store movie data in Neo4j
    embed_and_store_movies(embeddings_model, conn)

    # Example: Perform a movie recommendation based on a description
    description = "a movie with a giant robot"
    recommended_movies = generate_recommendation(llm, conn, description)

    print("Recommended Movies based on LLM Summary:")
    for movie in recommended_movies:
        print(movie)

    # Example: Question answering about movies
    question = "Which movie features a giant robot?"
    llm_response = ask_llm_about_movie(llm, question, conn)
    print(f"LLM Response: {llm_response}")

    # Close the Neo4j connection
    conn.close()



ServiceUnavailable: Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 111] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 111] Connection refused)