In [51]:
import os
from typing import List
from langchain_community.document_loaders import WikipediaLoader
from langchain_text_splitters import TokenTextSplitter
from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer
from langchain_core.documents import Document
from langchain_neo4j import Neo4jGraph

In [24]:
# Configuration for Diffbot API Key
os.environ["DIFFBOT_API_KEY"] = "da94fef74b2133457fec1bfd3293855c"

In [21]:
# Load and Split Wikipedia Data
def load_and_split_wikipedia_article(query: str) -> List[Document]:
    """Load a Wikipedia article and split it into chunks."""

    print("Loading Wikipedia article...")
    raw_documents = WikipediaLoader(query=query).load()
    
    print("Splitting documents into chunks...")
    text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
    documents = text_splitter.split_documents(raw_documents)
    
    return documents

In [34]:
# Example usage
query = "Yann LeCun"
wiki_document = load_and_split_wikipedia_article(query)

print(f"Loaded and split {len(wiki_document)} document chunks from Wikipedia article on {query}.")

Loading Wikipedia article...
Splitting documents into chunks...
Loaded and split 50 document chunks from Wikipedia article on Yann LeCun.


In [30]:
# Extract Graph Data with Diffbot
def extract_graph_with_diffbot(documents: List[Document], api_key: str) -> DiffbotGraphTransformer:
    """Extract graph data from documents using Diffbot."""

    print("Extracting graph data with Diffbot...")
    try:
        graph_transformer = DiffbotGraphTransformer(api_key)
        graph = graph_transformer.convert_to_graph_documents(documents)
        print("Graph extraction complete.")

        return graph
    
    except Exception as e:
        print(f"Error during graph extraction: {e}")
        
        return None

In [47]:
# Example usage of graph extraction
wiki_graph = extract_graph_with_diffbot(wiki_document,
                                        os.environ["DIFFBOT_API_KEY"])

if wiki_graph:
    print(f"Extracted graph with {sum(len(page.nodes) for page in wiki_graph)} nodes and \
{sum(len(page.relationships) for page in wiki_graph)} edges.")

Extracting graph data with Diffbot...
Graph extraction complete.
Extracted graph with 354 nodes and 584 edges.


In [None]:
# Neo4j Connection Configuration
neo4j_url = os.environ.get("NEO4J_URI", "bolt://localhost:7687")
neo4j_username = os.environ.get("NEO4J_USERNAME", "neo4j")
neo4j_password = os.environ.get("NEO4J_PASSWORD", "password")

In [None]:
import requests

url = "https://github.com/neo4j/apoc/releases/download/5.15.0/apoc-5.15.0-core.jar"
jar = requests.get(url).content

with open("apoc.jar", "wb") as f:
    f.write(jar)

print("APOC JAR downloaded. Move it to your neo4j/plugins/ folder.")

In [52]:
graph = Neo4jGraph(
    url=neo4j_url,
    username=neo4j_username,
    password=neo4j_password
)

ValueError: Could not use APOC procedures. Please ensure the APOC plugin is installed in Neo4j and that 'apoc.meta.data()' is allowed in Neo4j configuration 