In [1]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
import os
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from neo4j import Driver
from langchain_core.documents import Document
from typing import List, Dict

# ... existing imports and setup ...


In [2]:

# Define custom Node and Relationship classes to match LLMGraphTransformer's output format
class Node:
    def __init__(self, id: str, type: str, properties: Dict = None):
        self.id = id
        self.type = type
        self.properties = properties or {}

class Relationship:
    def __init__(self, source: Node, target: Node, type: str, properties: Dict = None):
        self.source = source
        self.target = target
        self.type = type
        self.properties = properties or {}

class CustomGraphDocument:
    def __init__(self, text: str, nodes: List[Node], relationships: List[Relationship]):
        self.page_content = text
        self.nodes = nodes
        self.relationships = relationships
        # Add source attribute expected by Neo4jGraph
        self.source = Document(page_content=text)

def create_custom_graph_documents(documents: List[Document], custom_triplets: List[Dict]) -> List[CustomGraphDocument]:
    graph_documents = []
    
    for doc, triplets in zip(documents, custom_triplets):
        nodes = []
        relationships = []
        
        # Create nodes and relationships from custom triplets
        for triplet in triplets:
            source_node = Node(
                id=triplet['source'],
                type=triplet.get('source_type', 'Entity')
            )
            target_node = Node(
                id=triplet['target'],
                type=triplet.get('target_type', 'Entity')
            )
            
            # Add nodes if they don't exist
            if source_node not in nodes:
                nodes.append(source_node)
            if target_node not in nodes:
                nodes.append(target_node)
                
            # Create relationship
            relationship = Relationship(
                source=source_node,
                target=target_node,
                type=triplet['relationship']
            )
            relationships.append(relationship)
        
        # Create graph document
        graph_doc = CustomGraphDocument(
            text=doc.page_content,
            nodes=nodes,
            relationships=relationships
        )
        graph_documents.append(graph_doc)
    
    return graph_documents

# Example usage:
loader = TextLoader(file_path="dummytext.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=24)
documents = text_splitter.split_documents(documents=docs)

# Your custom triplets (one list of triplets per document)
custom_triplets = [
    [
        {
            'source': 'Entity1',
            'target': 'Entity2',
            'relationship': 'RELATES_TO',
            'source_type': 'Person',
            'target_type': 'Organization'
        },
        {
            'source': 'Entity3',
            'target': 'Entity4',
            'relationship': 'RELATES_TO_1',
            'source_type': 'Person',
            'target_type': 'Organization'
        },
        {
            'source': 'Entity5',
            'target': 'Entity6',
            'relationship': 'RELATES_TO_2',
            'source_type': 'Person',
            'target_type': 'Organization'
        },
        {
            'source': 'Entity7',
            'target': 'Entity8',
            'relationship': 'RELATES_TO_3',
            'source_type': 'Person',
            'target_type': 'Organization'
        }
        # Add more triplets for first document
    ],
    [
        {
            'source': 'Entity9',
            'target': 'Entity10',
            'relationship': 'RELATES_TO_4',
            'source_type': 'Person',
            'target_type': 'Organization'
        },
        {
            'source': 'Entity11',
            'target': 'Entity12',
            'relationship': 'RELATES_TO_5',
            'source_type': 'Person',
            'target_type': 'Organization'
        },
        {
            'source': 'Entity13',
            'target': 'Entity14',
            'relationship': 'RELATES_TO_6',
            'source_type': 'Person',
            'target_type': 'Organization'
        }
        # Add more triplets for second document
    ],
    [
        {
            'source': 'Entity15',
            'target': 'Entity16',
            'relationship': 'RELATES_TO_7',
            'source_type': 'Person',
            'target_type': 'Organization'
        },
        {
            'source': 'Entity17',
            'target': 'Entity18',
            'relationship': 'RELATES_TO_8',
            'source_type': 'Person',
            'target_type': 'Organization'
        },
        {
            'source': 'Entity19',
            'target': 'Entity20',
            'relationship': 'RELATES_TO_9',
            'source_type': 'Person',
            'target_type': 'Organization'
        }
        # Add more lists of triplets for other documents
    ]
]

# Create graph documents with custom triplets
graph_documents = create_custom_graph_documents(documents, custom_triplets)

In [3]:
graph_documents[0].nodes


[<__main__.Node at 0x1296097c0>,
 <__main__.Node at 0x12960b230>,
 <__main__.Node at 0x129609640>,
 <__main__.Node at 0x129609760>,
 <__main__.Node at 0x129651be0>,
 <__main__.Node at 0x129653890>,
 <__main__.Node at 0x129653bf0>,
 <__main__.Node at 0x129653ce0>]

In [4]:
NEO4J_URI = "neo4j://localhost:7687"  # Adjust based on your Neo4j instance
NEO4J_USERNAME = "neo4j"              # Your Neo4j username
NEO4J_PASSWORD = "00000000"      # Your Neo4j password

# Initialize Neo4j graph with connection details
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD
)

Unable to retrieve routing information


ValueError: Could not connect to Neo4j database. Please ensure that the url is correct

In [16]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)


In [22]:
from langchain_ollama import OllamaEmbeddings
# embeddings = OllamaEmbeddings(
#     model="mxbai-embed-large",
# )


from langchain_community.embeddings import HuggingFaceBgeEmbeddings
embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)
vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)
vector_retriever = vector_index.as_retriever()

  from tqdm.autonotebook import tqdm, trange
