In [5]:
from inject_relationship import execute_ultra_optimized_relationships
from inject_node import execute_ultra_optimized_nodes
from schema import schema_modelling
from sqlite_to_csv import export_sqlite_to_csv
from erd_text import erd_text_generate

In [None]:
# ENV
from dotenv import load_dotenv
load_dotenv()
#LLM
from langchain_google_genai import ChatGoogleGenerativeAI
from google.generativeai.types import HarmCategory, HarmBlockThreshold

import os

os.environ["GOOGLE_API_KEY"] = os.environ["GOOGLE_API_KEY0"]
llm1 = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.5,
    max_tokens=None,
    timeout=None,
    max_retries=1,
    safety_settings={
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    }
)

os.environ["GOOGLE_API_KEY"] = os.environ["GOOGLE_API_KEY2"]
llm2 = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.2,
    max_tokens=None,
    timeout=None,
    max_retries=1,
)

os.environ["GOOGLE_API_KEY"] = os.environ["GOOGLE_API_KEY3"]
llm3 = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.2,
    max_tokens=None,
    timeout=None,
    max_retries=1,
)

In [3]:
from neo4j import GraphDatabase

NEO4J_URI="neo4j+s://eaee53dc.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="DH_4xGGIxkB2Acm6lfRoU6FOdYbW2bcGga2b4yLoIQE"
NEO4J_DATABASE="neo4j"
AURA_INSTANCEID="eaee53dc"
AURA_INSTANCENAME="Free instance"

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
print(driver)

<neo4j._sync.driver.Neo4jDriver object at 0x000001CD288A97D0>


In [5]:
from langchain_neo4j import Neo4jGraph
enhanced_graph = Neo4jGraph(
    url=NEO4J_URI,
    username="neo4j",
    password=NEO4J_PASSWORD,
    driver_config={
        "max_connection_lifetime": 300,  # 5 minutes
        "keep_alive": True,
        "max_connection_pool_size": 50
    },
    enhanced_schema=True)

In [11]:
db_name = "sales_master"
db_path = db_name+".sqlite"

In [12]:
replacements = export_sqlite_to_csv(db_name+".sqlite",db_name+"_files")

Processing table: customer_table
Saved customer_table -> sales_master_files\customer_table.csv
Processing table: order_table
Saved order_table -> sales_master_files\order_table.csv
Processing table: product_table
Saved product_table -> sales_master_files\product_table.csv
Processing table: order_product_table
Saved order_product_table -> sales_master_files\order_product_table.csv
All tables exported!


In [13]:
schema_info, erd_text = erd_text_generate(db_name, replacements)
print(erd_text)


Table: customer_table
Columns:
  - CUSTOMERNAME (TEXT)
  - PHONE (TEXT)
  - ADDRESSLINE1 (TEXT)
  - ADDRESSLINE2 (TEXT)
  - CITY (TEXT)
  - STATE (TEXT)
  - POSTALCODE (TEXT)
  - COUNTRY (TEXT)
  - TERRITORY (TEXT)
  - CONTACTLASTNAME (TEXT)
  - CONTACTFIRSTNAME (TEXT)
Primary Key: CUSTOMERNAME

Table: order_table
Columns:
  - ORDERNUMBER (INTEGER)
  - ORDERDATE (TEXT)
  - STATUS (TEXT)
  - QTR_ID (INTEGER)
  - MONTH_ID (INTEGER)
  - YEAR_ID (INTEGER)
  - CUSTOMERNAME (TEXT)
Primary Key: ORDERNUMBER
Foreign Keys:
  - CUSTOMERNAME â†’ customer_table.CUSTOMERNAME

Table: product_table
Columns:
  - PRODUCTCODE (TEXT)
  - MSRP (REAL)
  - PRODUCTLINE (TEXT)
Primary Key: PRODUCTCODE

Table: order_product_table
Columns:
  - ORDERNUMBER (INTEGER)
  - PRODUCTCODE (TEXT)
  - QUANTITYORDERED (INTEGER)
  - PRICEEACH (REAL)
  - SALES (REAL)
  - ORDERLINENUMBER (INTEGER)
  - DEALSIZE (TEXT)
Foreign Keys:
  - PRODUCTCODE â†’ product_table.PRODUCTCODE
  - ORDERNUMBER â†’ order_table.ORDERNUMBER



In [14]:
modelling_output = schema_modelling(erd_text,llm1)
print(modelling_output)

nodes=[Node(name='Customer', key='CUSTOMERNAME', properties=['PHONE', 'ADDRESSLINE1', 'ADDRESSLINE2', 'CITY', 'STATE', 'POSTALCODE', 'COUNTRY', 'TERRITORY', 'CONTACTLASTNAME', 'CONTACTFIRSTNAME'], table_name=['customer_table']), Node(name='Order', key='ORDERNUMBER', properties=['ORDERDATE', 'STATUS', 'QTR_ID', 'MONTH_ID', 'YEAR_ID'], table_name=['order_table']), Node(name='Product', key='PRODUCTCODE', properties=['MSRP', 'PRODUCTLINE'], table_name=['product_table'])] relationships=[Relationship(source='Customer', target='Order', label='PLACED', key_s='CUSTOMERNAME', key_t='ORDERNUMBER', properties=[], table_name='order_table'), Relationship(source='Order', target='Product', label='CONTAINS', key_s='ORDERNUMBER', key_t='PRODUCTCODE', properties=['QUANTITYORDERED', 'PRICEEACH', 'SALES', 'ORDERLINENUMBER', 'DEALSIZE'], table_name='order_product_table')]


In [15]:
inject_node_stats = execute_ultra_optimized_nodes(modelling_output.nodes,driver,db_name+"_files")

2025-09-19 11:26:13,996 - INFO - ðŸš€ Starting ULTRA-OPTIMIZED node injection for 3 node types
2025-09-19 11:26:15,158 - INFO - Node injection system initialized with 6 workers
2025-09-19 11:26:15,165 - INFO - Processing Customer nodes with ultra-optimized strategy
2025-09-19 11:26:15,165 - INFO - Processing Order nodes with ultra-optimized strategy
2025-09-19 11:26:15,165 - INFO - Processing Product nodes with ultra-optimized strategy
2025-09-19 11:26:15,181 - INFO - Loaded 109 records from product_table
2025-09-19 11:26:15,196 - INFO - Chunking Product: 109 records, complexity=1.3, batch_size=1600
2025-09-19 11:26:15,198 - INFO - Created 1 optimized chunks for Product
2025-09-19 11:26:15,198 - INFO - Loaded 92 records from customer_table
2025-09-19 11:26:15,211 - INFO - Loaded 307 records from order_table
2025-09-19 11:26:15,219 - INFO - Chunking Customer: 92 records, complexity=6.9, batch_size=500
2025-09-19 11:26:15,219 - INFO - Chunking Order: 307 records, complexity=1.6, batch_si

In [12]:
results = execute_ultra_optimized_relationships(
    relationships=modelling_output.relationships,
    driver=driver,
    output_dir=db_name+"_files",
    nodes_list=modelling_output.nodes,  # Pass your nodes list
    initial_batch_size=1000,  # Smaller for debugging
    max_workers=6  # Reduced for stability
)

2025-09-17 19:43:18,608 - INFO - Starting ENHANCED injection for 9 relationships with retry mechanisms
2025-09-17 19:43:18,609 - INFO - Node lookup: {'Customer': 'customer_id', 'Staff': 'staff_id', 'Store': 'store_id', 'Order': 'order_id', 'Category': 'category_id', 'Brand': 'brand_id', 'Product': 'product_id'}
2025-09-17 19:43:18,814 - INFO - Neo4j connection test: Connection OK at 2025-09-17T14:13:18.816000000+00:00
2025-09-17 19:43:18,902 - INFO - Current database has 3410 nodes
2025-09-17 19:43:19,083 - INFO - Initialized with 3 workers, batch size 1000
2025-09-17 19:43:19,083 - INFO - === PRE-FLIGHT DATABASE CHECK ===
2025-09-17 19:43:19,179 - INFO - Database contains 3410 total nodes
2025-09-17 19:43:19,317 - INFO - Node counts by label:
2025-09-17 19:43:19,318 - INFO -   Order: 1615
2025-09-17 19:43:19,318 - INFO -   Customer: 1445
2025-09-17 19:43:19,319 - INFO -   Product: 321
2025-09-17 19:43:19,321 - INFO -   Staff: 10
2025-09-17 19:43:19,321 - INFO -   Brand: 9
2025-09-17 1

In [6]:
from langraph_neo4j3 import AgentState, run_agent_workflow
user_input ="What is the total revenue per year?"

state: AgentState = {
        "question": user_input,
        "next_action": "",
        "cypher_errors": [],
        "database_records": [],
        "steps": [],
        "answer": "",
        "cypher_statement": ""
    }

result = run_agent_workflow(state,enhanced_graph)

KeyboardInterrupt: 

In [None]:
result

{'question': 'What is the total revenue per year?',
 'next_action': 'END',
 'cypher_errors': [],
 'database_records': [{'year': '2016', 'revenue': 2427378.527599994},
  {'year': '2017', 'revenue': 3447208.242499996},
  {'year': '2018', 'revenue': 1814529.7874999954}],
 'steps': ['generate_cypher',
  'validate_cypher',
  'execute_cypher',
  'generate_final_answer'],
 'answer': 'The total revenue per year is:\n*   **2016**: $2,427,378.53\n*   **2017**: $3,447,208.24\n*   **2018**: $1,814,529.79',
 'cypher_statement': 'MATCH (order:Order)-[ci:CONTAINS_ITEM]->(product:Product)\nWITH toString(date(order.order_date).year) AS year, SUM((ci.list_price * ci.quantity) * (1 - ci.discount)) AS revenue\nRETURN year, revenue\nORDER BY year'}