In [1]:
# IMPORT

import os
from typing import Any, Dict, List, Optional, Sequence, TypedDict, Union
from typing_extensions import Annotated

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

# --- LangChain / LangGraph core ---
from langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessage
from langchain_core.documents import Document as LCDocument
# from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from openai import AzureOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_chroma import Chroma
from langchain_community.vectorstores import Chroma
# from langchain_community.vectorstores import FAISS

from langchain_core.tools import tool
from langchain_core.runnables import RunnableLambda
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

from langgraph.graph import END, StateGraph, MessagesState
from langgraph.graph.message import add_messages

# --- SQL and validation ---
import sqlglot
from sqlglot import parse_one
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine

# --- Utilities ---
import json
from pathlib import Path
import time


In [None]:
# OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

OPENAI_API_KEY=os.getenv('AZURE_OPENAI_API_KEY_US')
OPENAI_API_KEY_E=os.getenv('AZURE_OPENAI_API_KEY_US2')

# os.environ['OPENAI_API_TYPE'] = 'azure'
os.environ['OPENAI_API_VERSION'] = '2024-08-01-preview'
os.environ['AZURE_OPENAI_ENDPOINT'] = 'https://azure-chat-try-2.openai.azure.com/'
os.environ['AZURE_OPENAI_DEPLOYMENT'] = 'chat-endpoint-us-gpt4o'

os.environ['OPENAI_API_VERSION_E'] = '2024-12-01-preview'
os.environ['AZURE_OPENAI_ENDPOINT_E'] = 'https://agents-4on.openai.azure.com/'
os.environ['AZURE_OPENAI_EMBEDDING_DEPLOYMENT_E'] = "text-embedding-3-large-eus2"

# LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
# os.environ['LANGCHAIN_TRACING_V2'] = 'true'
# os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
# os.environ['LANGCHAIN_PROJECT'] = "rag-sql"

In [4]:
llm = AzureChatOpenAI(
    api_key = OPENAI_API_KEY,  
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    openai_api_version=os.getenv("OPENAI_API_VERSION"),
    azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT")
)

emb_model = AzureOpenAIEmbeddings(
    api_key=OPENAI_API_KEY_E,
    azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT_E'),  
    api_version=os.getenv('OPENAI_API_VERSION_E'),
    azure_deployment=os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT_E')
)

In [5]:
# # Database
# from langchain_community.utilities import SQLDatabase
# db = SQLDatabase.from_uri("sqlite:///./database/credit-risk.db", sample_rows_in_table_info=2)
# print(db.dialect)
# print(db.get_usable_table_names())

In [5]:
# CONFIG

DB_CONN_STR = os.getenv("DB_CONN_STR", "sqlite:///./database/credit-risk.db")
VECTOR_DIR = os.getenv("VECTOR_DIR", "./database")
DB_DOCS_DIR = os.getenv("DB_DOCS_DIR", "./database")

# Safety limits
MAX_ROWS_DEFAULT = int(os.getenv("MAX_ROWS_DEFAULT", "2000"))
QUERY_TIMEOUT_SECS = int(os.getenv("QUERY_TIMEOUT_SECS", "60"))
REWRITE_MAX_ATTEMPTS = int(os.getenv("REWRITE_MAX_ATTEMPTS", "2"))