<!-- # Data Exploration -->


In [1]:
# Built-in library
from pathlib import Path
import re
import json
from typing import Any, Literal, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
import numpy.typing as npt
from pprint import pprint
import pandas as pd
import polars as pl
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

<!-- ### SQL Database(s) -->

In [3]:
go_up_from_current_directory(go_up=1)

from src.db_utils import SQLFromTabularData

/Users/neidu/Desktop/Projects/Personal/My_Projects/Gen-AI-Projects/QA_and_RAG


In [53]:
# create_db: SQLFromTabularData = SQLFromTabularData(
#     file_path="../data/flat_files/breast-cancer.csv",
#     db_path="../data/flat_files/stored_data.db",
#     table_name="breast_cancer",
# )
# create_db.run()


create_db: SQLFromTabularData = SQLFromTabularData(
    file_path="../data/flat_files/titanic.csv",
    db_path="../data/flat_files/stored_data.db",
    table_name="titanic",
)
create_db.run()

DB at sqlite:///../data/flat_files/stored_data.db successfully created/updated with titanic table.
DB Path: sqlite:///../data/flat_files/stored_data.db
Available table Names: ['breast_cancer', 'diabetes', 'titanic']


In [4]:
go_up_from_current_directory(go_up=2)

from QA_and_RAG import ROOT_PATH
from config import settings

/Users/neidu/Desktop/Projects/Personal/My_Projects/Gen-AI-Projects


In [5]:
from sqlalchemy import create_engine


db_path: str = "../data/sql/chinook.db"
conn = create_engine(f"sqlite:///{db_path}")
query: str = "SELECT name FROM sqlite_master WHERE type='table';"
pl.read_database(query=query, connection=conn.connect()).to_series().to_list()

['Album',
 'Artist',
 'Customer',
 'Employee',
 'Genre',
 'Invoice',
 'InvoiceLine',
 'MediaType',
 'Playlist',
 'PlaylistTrack',
 'Track']

In [6]:
query = "SELECT * FROM Track LIMIT 10"

pl.read_database(query=query, connection=conn.connect())

TrackId,Name,AlbumId,MediaTypeId,GenreId,Composer,Milliseconds,Bytes,UnitPrice
i64,str,i64,i64,i64,str,i64,i64,f64
1,"""For Those About To Rock (We Salute You)""",1,1,1,"""Angus Young, Malcolm Young, Brian Johnson""",343719,11170334,0.99
2,"""Balls to the Wall""",2,2,1,"""U. Dirkschneider, W. Hoffmann, H. Frank, P. Baltes, S. Kaufmann, G. Hoffmann""",342562,5510424,0.99
3,"""Fast As a Shark""",3,2,1,"""F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman""",230619,3990994,0.99
4,"""Restless and Wild""",3,2,1,"""F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman""",252051,4331779,0.99
5,"""Princess of the Dawn""",3,2,1,"""Deaffy & R.A. Smith-Diesel""",375418,6290521,0.99
6,"""Put The Finger On You""",1,1,1,"""Angus Young, Malcolm Young, Brian Johnson""",205662,6713451,0.99
7,"""Let's Get It Up""",1,1,1,"""Angus Young, Malcolm Young, Brian Johnson""",233926,7636561,0.99
8,"""Inject The Venom""",1,1,1,"""Angus Young, Malcolm Young, Brian Johnson""",210834,6852860,0.99
9,"""Snowballed""",1,1,1,"""Angus Young, Malcolm Young, Brian Johnson""",203102,6599424,0.99
10,"""Evil Walks""",1,1,1,"""Angus Young, Malcolm Young, Brian Johnson""",263497,8611245,0.99


In [7]:
query = "SELECT * FROM Artist LIMIT 10"

pl.read_database(query=query, connection=conn.connect())

ArtistId,Name
i64,str
1,"""AC/DC"""
2,"""Accept"""
3,"""Aerosmith"""
4,"""Alanis Morissette"""
5,"""Alice In Chains"""
6,"""Antônio Carlos Jobim"""
7,"""Apocalyptica"""
8,"""Audioslave"""
9,"""BackBeat"""
10,"""Billy Cobham"""


In [8]:
fp: str = "../data/flat_files/breast-cancer.csv"
df: pl.DataFrame = pl.read_csv(fp)

df.head()

id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
i64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
842302,"""M""",17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
842517,"""M""",20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
84300903,"""M""",19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
84348301,"""M""",11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
84358402,"""M""",20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [9]:
# Requires Pyarrow
try:
    full_db_path: str = "sqlite:///test.db"
    df.head().write_database(table_name="sample_table", connection=full_db_path)

except Exception as e:
    print(e)

Table 'sample_table' already exists.


In [10]:
from sqlalchemy import inspect, Inspector
from pydantic import BaseModel, computed_field, Field, PrivateAttr

In [11]:
conn = create_engine(full_db_path)
insp: Inspector = inspect(conn)

insp.get_table_names()

['diabetees', 'sample_table']

In [12]:
import os
import langchain
from langchain.chains import create_sql_query_chain
from langchain_community.utilities import SQLDatabase
from langchain_community.tools.sql_database.tool import QuerySQLDatabaseTool
from langchain_community.agent_toolkits import create_sql_agent
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough


langchain.debug = True

In [13]:
from omegaconf import OmegaConf, DictConfig


config: DictConfig = OmegaConf.load("../../config/config.yaml")

print(OmegaConf.to_yaml(config, resolve=True))

QA_and_RAG:
  path: QA_and_RAG
  chinook_db_path: sqlite:///QA_and_RAG/data/sql/chinook.db
  sql_agent_prompt: 'Based on the database query result, provide a complete sentence
    answer to the user''s question.


    Question: {question}

    SQL Result: {result}

    Answer:

    '



In [14]:
file_path: str = str(ROOT_PATH / "/data/sql/chinook.db")
file_path

# ROOT_PATH

'/data/sql/chinook.db'

In [15]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", api_key=settings.OPENAI_API_KEY, temperature=0)
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1225f81a0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1225f9d00>, root_client=<openai.OpenAI object at 0x11cd09970>, root_async_client=<openai.AsyncOpenAI object at 0x1225f8200>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [16]:
from operator import itemgetter


chat_type: Literal["qa-with-stored-sql-db", "qa-with-flat-file-sql-db"] = (
    "qa-with-stored-sql-db"
)
app_functionality: str = "chat"
message: str = "How many employees are there"

if app_functionality == "chat":
    if chat_type == "qa-with-stored-sql-db":
        # if os.path.exists(config.QA_and_RAG.chinook_db_path):
        if os.path.exists(f"../{file_path}"):  # for jupyter notebook

            # db = SQLDatabase.from_uri(config.QA_and_RAG.chinook_db_path)
            db = SQLDatabase.from_uri(
                f"sqlite:///../{file_path}"
            )  # for jupyter notebook
            exec_query = QuerySQLDatabaseTool(db=db)
            write_query = create_sql_query_chain(llm, db=db)

            answer_prompt = PromptTemplate.from_template(
                template=config.QA_and_RAG.sql_agent_prompt
            )

            answer = answer_prompt | llm | StrOutputParser()

            def execute_and_format(data: dict[str, str]) -> str:
                """Execute SQL query and format the answer.

                Parameters
                ----------
                data : dict[str, str]
                    Dictionary containing 'query' and 'question' keys with string values.
                    The 'query' value may optionally start with 'SQLQuery:'.

                Returns
                -------
                str
                    Formatted answer based on the question and query result.
                """
                # Execute query
                sql: str = data["query"]
                if sql.startswith("SQLQuery:"):
                    sql = sql.replace("SQLQuery:", "").strip()
                result: str = exec_query.invoke(sql)

                return answer.invoke({"question": data["question"], "result": result})

            chain = (
                RunnablePassthrough()
                # Add the key `query` to the input dict
                .assign(query=write_query)
                # Chain the output of the prev step with the next step
                .pipe(RunnableLambda(execute_and_format))
            )

            langchain.debug = True
            response = chain.invoke({"question": message})

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query>] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query>] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query> > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query> > chain:RunnableSequence > chain:RunnableAssign<input,table_info>] Entering Chain run with input:
[0m{
  "question": "How m

In [71]:
def _qa_sql_agent(db_path: str, config: DictConfig, message: str, llm: Any) -> str:
    db = SQLDatabase.from_uri(f"sqlite:///../{db_path}")  # for jupyter notebook
    print(db.dialect)
    print(db.get_usable_table_names())
    exec_query = QuerySQLDatabaseTool(db=db)
    write_query = create_sql_query_chain(llm, db=db)

    answer_prompt = PromptTemplate.from_template(
        template=config.QA_and_RAG.sql_agent_prompt
    )

    answer = answer_prompt | llm | StrOutputParser()

    def execute_and_format(data: dict[str, str]) -> str:
        """Execute SQL query and format the answer.

        Parameters
        ----------
        data : dict[str, str]
            Dictionary containing 'query' and 'question' keys with string values.
            The 'query' value may optionally start with 'SQLQuery:'.

        Returns
        -------
        str
            Formatted answer based on the question and query result.
        """
        # Execute query
        sql: str = data["query"]
        print("===" * 20)
        print("sql: ", sql)
        if (
            sql.startswith("SQLQuery:")
            or sql.startswith("sql:")
            or sql.__contains__("```")
        ):
            sql = (
                sql.replace("SQLQuery:", "")
                .replace("sql:", "")
                .replace("```sql", "")
                .replace("```", "")
                .strip()
            )
        result: str = exec_query.invoke(sql)

        return answer.invoke({"question": data["question"], "result": result})

    chain = (
        RunnablePassthrough()
        # Add the key `query` to the input dict
        .assign(query=write_query)
        # Chain the output of the prev step with the next step
        .pipe(RunnableLambda(execute_and_format))
    )
    response: str = chain.invoke({"question": message})
    return response

In [78]:
fp: str = "../data/flat_files/diabetes.csv"
df: pl.DataFrame = pl.read_csv(fp)

print(df["BloodPressure"].mean())
df.head()

69.10546875


Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
i64,i64,i64,i64,i64,f64,f64,i64,i64
6,148,72,35,0,33.6,0.627,50,1
1,85,66,29,0,26.6,0.351,31,0
8,183,64,0,0,23.3,0.672,32,1
1,89,66,23,94,28.1,0.167,21,0
0,137,40,35,168,43.1,2.288,33,1


In [58]:
# db_path: str = "/data/flat_files/stored_data.db"
# message: str = "What is the total smoothnessmean?"
# _qa_sql_agent(db_path=db_path, config=config, message=message, llm=llm)

In [81]:
from enum import Enum


class ChatType(Enum):
    qa_with_stored_sql_db = "q&a-with-stored-sql-db"
    qa_with_stored_flat_file_sql_db = "q&a-with-stored-flat-file-sql-db"
    qa_with_uploaded_flat_file_sql_db = "q&a-with-uploaded-flat-file-sql-db"


chat_type: Literal[
    ChatType.qa_with_stored_sql_db,
    ChatType.qa_with_stored_flat_file_sql_db,
    ChatType.qa_with_uploaded_flat_file_sql_db,
] = ChatType.qa_with_stored_sql_db.value
app_functionality: str = "chat"
message: str = "How many employees are there"
chatbot: list[str] = []
file_path: str = str(ROOT_PATH / "/data/sql/chinook.db")
flat_file_path: str = str(ROOT_PATH / "/data/sql/chinook.db")

if app_functionality == "chat":
    if chat_type == ChatType.qa_with_stored_sql_db.value:
        # if os.path.exists(config.QA_and_RAG.chinook_db_path):
        if os.path.exists(f"../{file_path}"):  # for jupyter notebook
            response = _qa_sql_agent(
                db_path=file_path, config=config, message=message, llm=llm
            )
        else:
            chatbot.append(
                (
                    message,
                    f"SQL DB does not exist. Please first create the `chinook` db",
                )
            )
    # elif chat_type == "qa-with-flat-file-sql-db":
    elif chat_type == ChatType.qa_with_uploaded_flat_file_sql_db.value:
        if os.path.exists(f"../{file_path}"):  # for jupyter notebook
            db = SQLDatabase.from_uri(f"sqlite:///../{file_path}")
            print(db.dialect)
        else:
            chatbot.append(
                (
                    message,
                    f"SQL DB from does not exist. Please first create the `chinook` db",
                )
            )

sqlite
['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query>] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query>] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query> > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "How many employees are there"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnablePar

In [60]:
def my_func(db_path: str, llm: Any) -> tuple[Any, ...]:
    db = SQLDatabase.from_uri(f"sqlite:///../{db_path}")  # for jupyter notebook
    print(db.dialect)
    return db, llm

In [79]:
db_path: str = "/data/flat_files/stored_data.db"
message: str = "How many people survived in the Titanic?"
message: str = "What is the average blood pressure?"

_qa_sql_agent(db_path=db_path, config=config, message=message, llm=llm)

sqlite
['breast_cancer', 'diabetes', 'titanic']
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "What is the average blood pressure?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query>] Entering Chain run with input:
[0m{
  "question": "What is the average blood pressure?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query>] Entering Chain run with input:
[0m{
  "question": "What is the average blood pressure?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query> > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "What is the average blood pressure?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<query> > chain:RunnableParallel<query> > chain:RunnableSequence > chain:RunnableAssign<

'The average blood pressure is approximately 69.11 mmHg.'

In [73]:
db_path: str = "/data/flat_files/stored_data.db"
# message: str = "How many people survived in the Titanic?"
db, llm = my_func(db_path=db_path, llm=llm)

agent_executor = create_sql_agent(
    llm=llm, db=db, agent_type="openai-tools", verbose=False
)
response = agent_executor.invoke(message)["output"]
response

sqlite
[32;1m[1;3m[chain/start][0m [1m[chain:SQL Agent Executor] Entering Chain run with input:
[0m{
  "input": "How many men were there in the Titanic?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:SQL Agent Executor > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:SQL Agent Executor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:SQL Agent Executor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad> > chain:RunnableParallel<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:SQL Agent Executor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad> > chain:RunnableParallel<agent_scratchpad> > chain:RunnableLambda] Entering Chain run with input:
[0m{
  "input": ""
}
[36;1m[1;3m[chain/end][0m [1m[chain:SQL Age

'There were 266 men on the Titanic.'

In [87]:
from langchain import hub

query_prompt_template = hub.pull("langchain-ai/sql-query-system-prompt")
# console.print(query_prompt_template.messages[0])
query_prompt_template.messages[0].pretty_print()


Given an input question, create a syntactically correct [33;1m[1;3m{dialect}[0m query to run to help find the answer. Unless the user specifies in his question a specific number of examples they wish to obtain, always limit your query to at most [33;1m[1;3m{top_k}[0m results. You can order the results by a relevant column to return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.

Only use the following tables:
[33;1m[1;3m{table_info}[0m

Question: [33;1m[1;3m{input}[0m


In [90]:
from typing import Annotated, TypedDict


class State(TypedDict):
    question: str
    query: str
    result: str
    answer: str


class QueryOutput(TypedDict):
    """Generated SQL query."""

    query: Annotated[str, ..., "Syntactically valid SQL query."]


def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return {"query": result["query"]}


def execute_query(state: State):
    """Execute SQL query."""
    execute_query_tool = QuerySQLDatabaseTool(db=db)
    return {"result": execute_query_tool.invoke(state["query"])}

In [100]:
state_data: State = State({"question": "What is the average blood pressure?"})
query: str = write_query(state_data)["query"]
state_data["query"] = query

[32;1m[1;3m[chain/start][0m [1m[prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m{
  "dialect": "sqlite",
  "top_k": 10,
  "table_info": "\nCREATE TABLE breast_cancer (\n\tid BIGINT, \n\tdiagnosis TEXT, \n\tradius_mean FLOAT, \n\ttexture_mean FLOAT, \n\tperimeter_mean FLOAT, \n\tarea_mean FLOAT, \n\tsmoothness_mean FLOAT, \n\tcompactness_mean FLOAT, \n\tconcavity_mean FLOAT, \n\t\"concave points_mean\" FLOAT, \n\tsymmetry_mean FLOAT, \n\tfractal_dimension_mean FLOAT, \n\tradius_se FLOAT, \n\ttexture_se FLOAT, \n\tperimeter_se FLOAT, \n\tarea_se FLOAT, \n\tsmoothness_se FLOAT, \n\tcompactness_se FLOAT, \n\tconcavity_se FLOAT, \n\t\"concave points_se\" FLOAT, \n\tsymmetry_se FLOAT, \n\tfractal_dimension_se FLOAT, \n\tradius_worst FLOAT, \n\ttexture_worst FLOAT, \n\tperimeter_worst FLOAT, \n\tarea_worst FLOAT, \n\tsmoothness_worst FLOAT, \n\tcompactness_worst FLOAT, \n\tconcavity_worst FLOAT, \n\t\"concave points_worst\" FLOAT, \n\tsymmetry_worst FLOAT, \n\tfractal_dimen

In [102]:
query_result: dict[str, Any] = execute_query(state_data)["result"]
state_data["result"] = query_result
state_data

[32;1m[1;3m[tool/start][0m [1m[tool:sql_db_query] Entering Tool run with input:
[0m"SELECT AVG(BloodPressure) AS average_blood_pressure FROM diabetes;"
[36;1m[1;3m[tool/end][0m [1m[tool:sql_db_query] [2ms] Exiting Tool run with output:
[0m"[(69.10546875,)]"


{'question': 'What is the average blood pressure?',
 'query': 'SELECT AVG(BloodPressure) AS average_blood_pressure FROM diabetes;',
 'result': '[(69.10546875,)]'}

In [93]:
chat_llm: ChatOpenAI = ChatOpenAI(
    model="gpt-4o-mini", api_key=settings.OPENAI_API_KEY, temperature=0
)


def generate_answer(state: State):
    """Answer question using retrieved information as context."""
    prompt = (
        "Given the following user question, corresponding SQL query, "
        "and SQL result, answer the user question.\n\n"
        f'Question: {state["question"]}\n'
        f'SQL Query: {state["query"]}\n'
        f'SQL Result: {state["result"]}'
    )
    response = chat_llm.invoke(prompt)
    return {"answer": response.content}

In [104]:
generate_answer(state=state_data)

[32;1m[1;3m[llm/start][0m [1m[llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n\nQuestion: What is the average blood pressure?\nSQL Query: SELECT AVG(BloodPressure) AS average_blood_pressure FROM diabetes;\nSQL Result: [(69.10546875,)]"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[llm:ChatOpenAI] [1.30s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "The average blood pressure is approximately 69.11.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "The average blood pressure is approximately 69

{'answer': 'The average blood pressure is approximately 69.11.'}