In [None]:
from langchain_neo4j import Neo4jGraph
from langchain_experimental.graph_transformers import LLMGraphTransformer

# Initialize Neo4j connection
graph = Neo4jGraph(
    url="neo4j+s://your-aura-instance.databases.neo4j.io",
    username="neo4j",
    password="your-password"
)

# Configure LLM-based graph extraction
llm_transformer = LLMGraphTransformer(
    llm=ChatOpenAI(model="gpt-4o", temperature=0)
)

# Process documents and store in Neo4j
documents = TextLoader("data.txt").load()
graph_documents = llm_transformer.convert_to_graph_documents(documents)
graph.add_graph_documents(graph_documents, baseEntityLabel=True) [3][8]


In [7]:
import weaviate
from weaviate.classes.init import Auth
import os
from dotenv import load_dotenv

load_dotenv()
# Best practice: store your credentials in environment variables
weaviate_url = os.environ["WEAVIATE_URL"]
weaviate_api_key = os.environ["WEAVIATE_API_KEY"]

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
)

from weaviate.classes.config import Configure
questions = client.collections.create(
    name="News",
    vectorizer_config=Configure.Vectorizer.text2vec_weaviate(), # Configure the Weaviate Embeddings integration
    generative_config=Configure.Generative.openai()             # Configure the Cohere generative AI integration
)

client.close()  # Free up resources

/Users/daniel/Documents/Interview/RAG_KG_agent - Serendipity AI/.venv/lib/python3.13/site-packages/weaviate/collections/classes/config.py:1963: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  for cls_field in self.model_fields:


In [3]:
import os

from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-4.1-mini-2025-04-14")

llm_transformer = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Topic"])

from langchain_core.documents import Document

text = """
The AI Startup Helping Uber, Salesforce And Hundreds Of Companies Cut Costs. 
Businesses have poured millions into AI hoping for big returns in the future. This startup is saving them millions in labor costs today. 
"""
documents = [Document(page_content=text)]
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='Ai Startup', type='Topic', properties={}), Node(id='Uber', type='Topic', properties={}), Node(id='Salesforce', type='Topic', properties={}), Node(id='Businesses', type='Topic', properties={}), Node(id='Labor Costs', type='Topic', properties={})]
Relationships:[Relationship(source=Node(id='Ai Startup', type='Topic', properties={}), target=Node(id='Uber', type='Topic', properties={}), type='HELPING', properties={}), Relationship(source=Node(id='Ai Startup', type='Topic', properties={}), target=Node(id='Salesforce', type='Topic', properties={}), type='HELPING', properties={}), Relationship(source=Node(id='Ai Startup', type='Topic', properties={}), target=Node(id='Businesses', type='Topic', properties={}), type='HELPING', properties={}), Relationship(source=Node(id='Ai Startup', type='Topic', properties={}), target=Node(id='Labor Costs', type='Topic', properties={}), type='CUTTING', properties={})]


In [9]:
import json
from bs4 import BeautifulSoup
from pathlib import Path
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer

# --- 1. Load JSON ---
with open("sources_06-05-25.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# --- 2. Parse and clean text ---
documents = []
first_two_items = list(data["reference_by_id"].items())[:2]

# for doc_id, doc in data["reference_by_id"].items():
for doc_id, doc in first_two_items:
    title = doc.get("title", "")
    description = doc.get("description", "")
    # raw_text = doc.get("text", "")
    
    # Clean HTML or markdown-ish junk
    # clean_text = BeautifulSoup(raw_text, "html.parser").get_text()

    # structured_content_for_article = f"""Title: {title}
    # Description: {description}
    # URL: {url}
    # Source Type: {source_type}

    # Main Content:
    # {clean_text}
    # """
    
    full_text = f"{title}\n\n{description}"
    metadata = {
        "url": doc.get("url", ""),
        "source_type": doc.get("source_type", "unknown")
    }
    documents.append(Document(page_content=full_text, metadata=metadata))

# --- 3. Construction of knowledge graph ---
llm = ChatOpenAI(temperature=0, model_name="gpt-4.1-mini-2025-04-14")

llm_transformer = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Topic", "Article"],
    allowed_relationships=["DISCUSSES"],
    node_properties=True,
    relationship_properties=True
)

# Convert documents to graph documents asynchronously
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)

# Print the results for inspection
for i, gd in enumerate(graph_documents):
    print(f"\n--- Graph {i + 1} ---")
    print(f"Nodes: {gd.nodes}")
    print(f"Relationships: {gd.relationships}")

# The graph_documents variable now holds the result



--- Graph 1 ---
Nodes: [Node(id='Ai Startup', type='Topic', properties={'description': 'A startup helping Uber, Salesforce and hundreds of companies cut costs by saving millions in labor costs today.', 'industry': 'Artificial Intelligence'}), Node(id='Uber', type='Topic', properties={'industry': 'Transportation'}), Node(id='Salesforce', type='Topic', properties={'industry': 'Customer Relationship Management'}), Node(id='Businesses', type='Topic', properties={'description': 'Companies investing millions into AI hoping for big returns in the future.'})]
Relationships: [Relationship(source=Node(id='Ai Startup', type='Topic', properties={}), target=Node(id='Uber', type='Topic', properties={}), type='DISCUSSES', properties={}), Relationship(source=Node(id='Ai Startup', type='Topic', properties={}), target=Node(id='Salesforce', type='Topic', properties={}), type='DISCUSSES', properties={}), Relationship(source=Node(id='Ai Startup', type='Topic', properties={}), target=Node(id='Businesses', 

In [10]:
from langchain_neo4j import Neo4jGraph # Updated import

NEO4J_AURA_URL = os.getenv("NEO4J_AURA_URL")
NEO4J_AURA_USERNAME = os.getenv("NEO4J_AURA_USERNAME", "neo4j")
NEO4J_AURA_PASSWORD = os.getenv("NEO4J_AURA_PASSWORD")

def get_neo4j_graph_connection():
    if not all([NEO4J_AURA_URL, NEO4J_AURA_PASSWORD]):
        return None
    return Neo4jGraph(
        url=NEO4J_AURA_URL,
        username=NEO4J_AURA_USERNAME,
        password=NEO4J_AURA_PASSWORD
    )

graph_db = get_neo4j_graph_connection()
if graph_documents:
    graph_db.add_graph_documents(graph_documents,
                                #  baseEntityLabel=True, 
                                #  include_source=True
                                 )

[#CF83]  _: <CONNECTION> error: Failed to read from defunct connection IPv4Address(('si-9ad647b4-ac13.production-orch-0073.neo4j.io', 7687)) (ResolvedIPv4Address(('34.78.76.49', 7687))): BrokenPipeError(32, 'Broken pipe')
Transaction failed and will be retried in 1.097757722476686s (Failed to read from defunct connection IPv4Address(('si-9ad647b4-ac13.production-orch-0073.neo4j.io', 7687)) (ResolvedIPv4Address(('34.78.76.49', 7687))))
[#CF82]  _: <CONNECTION> error: Failed to read from defunct connection ResolvedIPv4Address(('34.78.76.49', 7687)) (ResolvedIPv4Address(('34.78.76.49', 7687))): BrokenPipeError(32, 'Broken pipe')
Unable to retrieve routing information
Transaction failed and will be retried in 1.8817563560830526s (Unable to retrieve routing information)
