In [0]:
import os
import pandas as pd
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.graphs import Neo4jGraph

# --- ENV SETUP ---
os.environ["GOOGLE_API_KEY"] = "AIzaSyBEHPnDqF4nlQ8BlXJEo2LrAs9V5Sq7KIw"
os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "your-neo4j-password"

# --- LOAD TEXT DATA ---
df = pd.read_csv("path_to_your_csv_file.csv")
texts = df['text'].dropna().tolist()
documents = [Document(page_content=t) for t in texts]

# --- TEXT SPLITTING ---
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(documents)

# --- GEMINI MODEL INIT ---
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)

# --- SENTIMENT ANALYSIS ---
def analyze_sentiment(text):
    prompt = f"""Analyze the sentiment of the following text and classify it as 'positive', 'negative', or 'neutral':\n\n{text}"""
    return llm.invoke(prompt).strip().lower()

for doc in docs:
    doc.metadata['sentiment'] = analyze_sentiment(doc.page_content)

# --- EXTRACT TRIPLETS ---
graph_transformer = LLMGraphTransformer(llm=llm)
triplets = graph_transformer.convert_to_graph(docs)

# --- CONNECT TO NEO4J ---
graph = Neo4jGraph(
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"]
)

# --- STORE IN NEO4J WITH SENTIMENT ---
for doc, doc_triplets in zip(docs, triplets):
    sentiment = doc.metadata.get('sentiment', 'neutral')
    for subject, predicate, obj in doc_triplets:
        graph.query("""
            MERGE (s:Entity {name: $subject})
            MERGE (o:Entity {name: $object})
            MERGE (s)-[r:REL {type: $predicate, sentiment: $sentiment}]->(o)
        """, params={"subject": subject, "object": obj, "predicate": predicate, "sentiment": sentiment})

# --- EXAMPLE CYPHER QUERIES FOR ANALYSIS ---
# 1. Get all entities connected to "Joe Biden" with their relationship type and sentiment
# MATCH (n:Entity {name: "Joe Biden"})-[r]->(m) RETURN n.name, type(r), r.sentiment, m.name

# 2. Find the top 10 most connected entities
# MATCH (n:Entity)-[r]->() RETURN n.name, count(r) as connections ORDER BY connections DESC LIMIT 10

# 3. Get all "HOSTILE" relationships
# MATCH (a)-[r {sentiment: "HOSTILE"}]->(b) RETURN a.name, r.type, b.name

# 4. Get relationships related to elections
# MATCH (a)-[r]->(b) WHERE toLower(r.type) CONTAINS "election" RETURN a.name, r.type, b.name

# --- NATURAL LANGUAGE TO CYPHER USING LANGCHAIN ---
from langchain.chains import GraphCypherQAChain
from langchain.chains.graph_qa.cypher_prompt import CYHER_GENERATION_PROMPT

qa_chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    cypher_prompt=CYHER_GENERATION_PROMPT,
    verbose=True
)

# Example NL query:
# answer = qa_chain.run("Who does Joe Biden support?")
# print(answer)
