In [1]:
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from langchain_openai import ChatOpenAI
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_sql_agent, AgentType

import os

In [2]:
load_dotenv()

True

In [3]:
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
DB_USER = os.getenv('DB_USER')
DB_PASS = os.getenv('DB_PASS')

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [7]:
# Step 1: Set up database connection with SQLAlchemy
def initialize_db_connection():
    """Initialize SQLAlchemy engine and session for MySQL database."""
    try:
        engine = create_engine(
            f"mysql+mysqlconnector://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}",
            echo=True, # Verbose logging for debugging
            pool_pre_ping=True # Ensure active connections
        )
        Session = sessionmaker(bind=engine)
        session = Session()
        print("Database connection established!")
        return engine, session
    except Exception as e:
        print(f"Failed to connect to database: {e}")
        return None, None

# Step 2: Configure LangChain SQLDatabase
def get_sql_database(engine):
    """Wrap SQLAlchemy engine in LangChain's SQLDatabase."""
    try:
        db = SQLDatabase(engine)
        print("LangChain SQLDatabase initialized!")
        return db
    except Exception as e:
        print(f"Failed to initialize SQLDatabase: {e}")
        return None

# Step 3: Initialize OpenAI LLM
def initialize_llm():
    """Set up OpenAI LLM with API key"""
    try:
        llm = ChatOpenAI(
            model="gpt-4o-mini", # Cost-effective model: adjust as needed
            api_key=OPENAI_API_KEY,
            temperature=0
        )
        print("OpenAI LLM initialized!")
        return llm
    except Exception as e:
        print(f"Failed to initialize LLM: {e}")
        return None

# Step 4: Create SQL Agent
def create_agent(db, llm):
    """Build SQL agent with LangChain toolkit."""
    try:
        toolkit = SQLDatabaseToolkit(db=db, llm=llm)
        agent = create_sql_agent(
            llm=llm,
            toolkit=toolkit,
            agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, # React-based reasoning
            verbose=True, # Show intermediate steps
        )
        print("SQL Agent created successfully!")
        return agent
    except Exception as e:
        print(f"Failed to create agent: {e}")
        return None

# Step 5: Process Query and Format Output
def run_query(agent, query):
    """Execute enatural language query and return formatted response."""
    try:
        response = agent.run(query)
        # Format raw SQL output into natural language
        if "No results found" in response:
            return f"Sorry, I couldn't find any data for : '{query}'."
        else:
            return f"Here's what I found for '{query}':\n{response}"
    except Exception as e:
        return f"Error processing query '{query}': {e}"
        
        

In [8]:
# Set up
engine, session = initialize_db_connection()
if engine and session:
    db = get_sql_database(engine)
    if db:
        llm = initialize_llm()
        if llm:
            agent = create_agent(db, llm)

Database connection established!
2025-03-23 13:58:15,040 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2025-03-23 13:58:15,041 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-03-23 13:58:15,043 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2025-03-23 13:58:15,044 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-03-23 13:58:15,046 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2025-03-23 13:58:15,047 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-03-23 13:58:15,049 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-23 13:58:15,049 INFO sqlalchemy.engine.Engine SHOW FULL TABLES FROM `finquery`
2025-03-23 13:58:15,050 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-03-23 13:58:15,052 INFO sqlalchemy.engine.Engine ROLLBACK
2025-03-23 13:58:15,054 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-23 13:58:15,055 INFO sqlalchemy.engine.Engine SHOW FULL TABLES FROM `finquery`
2025-03-23 13:58:15,056 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-03-23 13:58:15,059

In [9]:
query = "What is the largest amount in the financial_transactions table?"
result = run_query(agent, query)
print(result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables  
Action Input: ""  [0m[38;5;200m[1;3maccounting_transactions, financial_transactions[0m[32;1m[1;3mI need to check the schema of the `financial_transactions` table to identify the relevant columns for my query.  
Action: sql_db_schema  
Action Input: "financial_transactions"  [0m2025-03-23 13:58:17,174 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-23 13:58:17,175 INFO sqlalchemy.engine.Engine SELECT financial_transactions.transaction_id, financial_transactions.date, financial_transactions.customer_id, financial_transactions.amount, financial_transactions.type, financial_transactions.description 
FROM financial_transactions 
 LIMIT %(param_1)s
2025-03-23 13:58:17,176 INFO sqlalchemy.engine.Engine [generated in 0.00172s] {'param_1': 3}
2025-03-23 13:58:17,178 INFO sqlalchemy.engine.Engine ROLLBACK
[33;1m[1;3m
CREATE TABLE financial_transactions (
	transaction_id BIGINT NOT NULL,

In [10]:
query = "What is the smallest amount in the financial_transactions table?"
result = run_query(agent, query)
print(result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables  
Action Input: ""  [0m[38;5;200m[1;3maccounting_transactions, financial_transactions[0m[32;1m[1;3mI need to check the schema of the financial_transactions table to identify the relevant columns for my query.  
Action: sql_db_schema  
Action Input: "financial_transactions"  [0m2025-03-23 14:00:58,525 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-23 14:00:58,526 INFO sqlalchemy.engine.Engine SELECT financial_transactions.transaction_id, financial_transactions.date, financial_transactions.customer_id, financial_transactions.amount, financial_transactions.type, financial_transactions.description 
FROM financial_transactions 
 LIMIT %(param_1)s
2025-03-23 14:00:58,526 INFO sqlalchemy.engine.Engine [cached since 161.3s ago] {'param_1': 3}
2025-03-23 14:00:58,528 INFO sqlalchemy.engine.Engine ROLLBACK
[33;1m[1;3m
CREATE TABLE financial_transactions (
	transaction_id BIGINT NOT NULL,

In [11]:
query = "What is the average amount in the financial_transactions table?"
result = run_query(agent, query)
print(result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables  
Action Input: ""  [0m[38;5;200m[1;3maccounting_transactions, financial_transactions[0m[32;1m[1;3mI need to check the schema of the `financial_transactions` table to identify the relevant columns for calculating the average amount.  
Action: sql_db_schema  
Action Input: "financial_transactions"  [0m2025-03-23 14:01:45,953 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-23 14:01:45,954 INFO sqlalchemy.engine.Engine SELECT financial_transactions.transaction_id, financial_transactions.date, financial_transactions.customer_id, financial_transactions.amount, financial_transactions.type, financial_transactions.description 
FROM financial_transactions 
 LIMIT %(param_1)s
2025-03-23 14:01:45,954 INFO sqlalchemy.engine.Engine [cached since 208.8s ago] {'param_1': 3}
2025-03-23 14:01:45,956 INFO sqlalchemy.engine.Engine ROLLBACK
[33;1m[1;3m
CREATE TABLE financial_transactions (
	transac