# DB Demo
Attempt to use alterntive methods to get SQL query.

See <https://python.langchain.com/docs/tutorials/sql_qa/> for notes

In [29]:
from langchain import hub
from langchain.chat_models import init_chat_model
from langchain_community.utilities import SQLDatabase
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from dotenv import load_dotenv
from langsmith import Client
from typing_extensions import Annotated, TypedDict
from IPython.display import Markdown, display
import pandas as pd

import os

In [2]:
load_dotenv()

True

In [3]:
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
DB_USER = os.getenv('DB_USER')
DB_PASS = os.getenv('DB_PASS')

OPENAI_API_KEY   = os.getenv('OPENAI_API_KEY')
OPENAI_LLM_MODEL = os.getenv('OPENAI_LLM_MODEL')

client = Client(api_key=os.getenv("LANGSMITH_API_KEY"))

In [4]:
def initialize_db_connection():
    """Initialize SQLAlchemy engine and session for MySQL database."""
    try: 
        engine = create_engine(
            f"mysql+mysqlconnector://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}",
            # echo=True, # Verbose logging for debugging
            echo=False, # Verbose logging for debugging
            pool_pre_ping=True # Ensure active connections
        )
        Session = sessionmaker(bind=engine)
        session = Session()
        print("Database connection established!")
        return engine, session
    except Exception as e:
        print(f"Failed to connect to database: {e}")
        return None, None

def get_sql_database(engine):
    """Wrap SQLAlchemy engine in LangChain's SQLDatabase."""
    try:
        db = SQLDatabase(engine)
        print("LangChain SQLDatabase initialized!")
        return db
    except Exception as e:
        print(f"Failed to initialize SQLDatabase: {e}")
        return None

    

In [21]:
engine, session = initialize_db_connection()
db = get_sql_database(engine)

Database connection established!
LangChain SQLDatabase initialized!


In [6]:
db.dialect

'mysql'

In [7]:
db.get_usable_table_names()

['accounting_transactions', 'financial_transactions', 'sales_data']

In [8]:
print(db.run("SELECT * FROM sales_data LIMIT 2"))

[('1001', 'P007', 'Noise Cancelling Headphones', 'Audio', 1, Decimal('200.00'), Decimal('200.00'), datetime.date(2025, 2, 28)), ('1002', 'P002', 'Laptop 15"', 'Computers', 4, Decimal('850.00'), Decimal('3400.00'), datetime.date(2025, 2, 11))]


In [9]:
llm = init_chat_model("gpt-4o-mini", model_provider="openai")

In [10]:
query_prompt_template = hub.pull("langchain-ai/sql-query-system-prompt")

In [23]:
class QueryOutput(TypedDict):
    """Generated SQL query."""
    query: Annotated[str, ..., "Syntatically valid SQL query."]
    
def write_query(question: str):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": question,
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return result["query"]

def exec_and_render(sql):
    with engine.connect() as connection:
        df = pd.read_sql(sql=text(sql), con=connection)
        if df.empty:
            return '**No Results**'
        else:
          return df.head().to_markdown()

    

In [12]:
test_question1 = "What were the top-selling products last month?"
test_question2 = "Plot a line chart of the monthly sales data."
test_question3 = "What were the top-selling products of all time?"
test_question4 = 'What is the minimum sale amount?'
questions = [test_question1, test_question2, test_question3, test_question4]

In [30]:
for question in questions:
    print(f'Question:', question)
    sql = write_query(question)
    print(f'---\n{sql}\n---')
    answer = db.run(sql)
    print(f'{answer}\n---\n')
    display(Markdown(exec_and_render(sql)))
    print()

Question: What were the top-selling products last month?
---
SELECT product_name, SUM(quantity) AS total_quantity_sold 
FROM sales_data 
WHERE order_date >= '2023-09-01' AND order_date < '2023-10-01' 
GROUP BY product_name 
ORDER BY total_quantity_sold DESC 
LIMIT 10;
---

---



**No Results**


Question: Plot a line chart of the monthly sales data.
---
SELECT DATE_FORMAT(order_date, '%Y-%m') AS month, SUM(total_price) AS total_sales
FROM sales_data
GROUP BY month
ORDER BY month LIMIT 10;
---
[('2025-01', Decimal('22920.00')), ('2025-02', Decimal('22405.00')), ('2025-03', Decimal('24145.00'))]
---



|    | month   |   total_sales |
|---:|:--------|--------------:|
|  0 | 2025-01 |         22920 |
|  1 | 2025-02 |         22405 |
|  2 | 2025-03 |         24145 |


Question: What were the top-selling products of all time?
---
SELECT product_id, product_name, SUM(quantity) as total_quantity_sold 
FROM sales_data 
GROUP BY product_id, product_name 
ORDER BY total_quantity_sold DESC 
LIMIT 10;
---
[('P005', 'Monitor 27"', Decimal('52')), ('P006', 'External SSD 1TB', Decimal('44')), ('P007', 'Noise Cancelling Headphones', Decimal('43')), ('P003', 'USB-C Hub', Decimal('41')), ('P002', 'Laptop 15"', Decimal('38')), ('P004', 'Gaming Keyboard', Decimal('34')), ('P001', 'Wireless Mouse', Decimal('26'))]
---



|    | product_id   | product_name                |   total_quantity_sold |
|---:|:-------------|:----------------------------|----------------------:|
|  0 | P005         | Monitor 27"                 |                    52 |
|  1 | P006         | External SSD 1TB            |                    44 |
|  2 | P007         | Noise Cancelling Headphones |                    43 |
|  3 | P003         | USB-C Hub                   |                    41 |
|  4 | P002         | Laptop 15"                  |                    38 |


Question: What is the minimum sale amount?
---
SELECT MIN(total_price) AS Minimum_Sale_Amount FROM sales_data LIMIT 10;
---
[(Decimal('25.00'),)]
---



|    |   Minimum_Sale_Amount |
|---:|----------------------:|
|  0 |                    25 |


