## Set Up MCP Server to Expose the Database

In [2]:
import os
import pandas as pd
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import sessionmaker
import re
from models import get_tables_in_order
user = os.getenv("USER")
password = os.getenv("PASSWORD")
host = os.getenv("HOST")
port = os.getenv("PORT")
dbname = os.getenv("DATABASE")



# ---------- Database Setup ----------
DATABASE_URL = "postgresql+psycopg2://postgres:ashish6677@34.30.63.17:5432/postgres"
engine = create_engine(DATABASE_URL)
Session = sessionmaker(bind=engine)
session = Session()

def load_csv_to_db(file_path, table_name):
    """
    Reads CSV and inserts data into Postgres table with length validation.
    """
    print(f"📂 Loading {file_path} into table '{table_name}'")

    # 1. Read CSV
    df = pd.read_csv(file_path)

    # 2. Get DB column max lengths for VARCHAR columns
    inspector = inspect(engine)
    columns_info = inspector.get_columns(table_name)
    varchar_limits = {
        col["name"]: int(re.search(r"\((\d+)\)", str(col["type"])).group(1))
        for col in columns_info
        if "VARCHAR" in str(col["type"]).upper()
    }

    # 3. Trim only values exceeding VARCHAR limits
    for col, limit in varchar_limits.items():
        if col in df.columns:
            df[col] = df[col].astype(str).apply(lambda x: x[:limit] if len(x) > limit else x)

    # 4. Push to DB (append to existing table)
    df.to_sql(table_name, engine, if_exists="append", index=False)

    print(f"✅ Inserted {len(df)} rows into {table_name}")

# ---------- Load all CSVs in schema order ----------


def load_all_csvs(csv_folder):
    table_order = get_tables_in_order()
    for table in table_order:
        print(table)
        csv_file = os.path.join(csv_folder, f"{table}.csv")
        if os.path.exists(csv_file):
            load_csv_to_db(csv_file, table)
        else:
            print(f"⚠️ No CSV found for table '{table}'")

# Example usage:
load_all_csvs("dummy_data_folder")


<class 'models.Company'>
⚠️ No CSV found for table '<class 'models.Company'>'
<class 'models.Role'>
⚠️ No CSV found for table '<class 'models.Role'>'
<class 'models.Address'>
⚠️ No CSV found for table '<class 'models.Address'>'
<class 'models.UserCompanyRole'>
⚠️ No CSV found for table '<class 'models.UserCompanyRole'>'
<class 'models.Department'>
⚠️ No CSV found for table '<class 'models.Department'>'
<class 'models.Account'>
⚠️ No CSV found for table '<class 'models.Account'>'
<class 'models.Customer'>
⚠️ No CSV found for table '<class 'models.Customer'>'
<class 'models.Vendor'>
⚠️ No CSV found for table '<class 'models.Vendor'>'
<class 'models.Product'>
⚠️ No CSV found for table '<class 'models.Product'>'
<class 'models.CompanyGSTIN'>
⚠️ No CSV found for table '<class 'models.CompanyGSTIN'>'
<class 'models.Transaction'>
⚠️ No CSV found for table '<class 'models.Transaction'>'
<class 'models.Inventory'>
⚠️ No CSV found for table '<class 'models.Inventory'>'
<class 'models.SalesOrder'

In [7]:
from sqlalchemy.orm import Session
import pandas as pd

def load_all_csvs_with_orm(engine, csv_folder):
    session = Session(engine)

    table_models = get_tables_in_order()  # e.g., [Company, Employee, Department]

    for model_class in table_models:
        table_name = model_class.__tablename__  # ORM table name
        print(table_name)
        csv_file = f"{csv_folder}/{table_name}.csv"

        df = pd.read_csv(csv_file)

        # Convert each row into ORM objects
        records = [model_class(**row.to_dict()) for _, row in df.iterrows()]

        # Add to DB
        session.add_all(records)
        session.commit()

    session.close()


In [8]:
engine = create_engine(DATABASE_URL)
folder_name = "dummy_data_full"

load_all_csvs_with_orm(engine, folder_name)

companies


DataError: (psycopg2.errors.StringDataRightTruncation) value too long for type character varying(10)

[SQL: INSERT INTO companies (company_id, name, cin, pan, created_at) VALUES (%(company_id__0)s::UUID, %(name__0)s, %(cin__0)s, %(pan__0)s, %(created_at__0)s), (%(company_id__1)s::UUID, %(name__1)s, %(cin__1)s, %(pan__1)s, %(created_at__1)s), (%(company_id_ ... 4068 characters truncated ... ed_at__48)s), (%(company_id__49)s::UUID, %(name__49)s, %(cin__49)s, %(pan__49)s, %(created_at__49)s)]
[parameters: {'pan__0': 'Gw003569302', 'name__0': 'Peter Bullock', 'cin__0': 'iB492209632', 'created_at__0': '2023-04-12 11:51:52', 'company_id__0': '5f84b9d2-ea42-4b8f-ba29-a080f089a87e', 'pan__1': 'IV863708620', 'name__1': 'Karen Mccoy', 'cin__1': 'DB656636176', 'created_at__1': '2023-04-15 07:27:16', 'company_id__1': '8c7f0b27-6c32-4530-9d32-e6c04875d130', 'pan__2': 'Zi551607267', 'name__2': 'Nicholas Hughes', 'cin__2': 'sH794278115', 'created_at__2': '2023-12-14 04:11:27', 'company_id__2': '7c1ed19d-c166-48da-9ad5-5089765fb5bd', 'pan__3': 'Vo633925508', 'name__3': 'Michael Lane', 'cin__3': 'Cm765034913', 'created_at__3': '2023-05-13 14:16:05', 'company_id__3': '6577a689-6b05-40ce-ba00-ae14f63d4a5c', 'pan__4': 'ux622984320', 'name__4': 'Christopher Clements', 'cin__4': 'rh078875102', 'created_at__4': '2022-11-08 10:15:16', 'company_id__4': 'e6e7feaa-8de8-44a3-a959-718a50704464', 'pan__5': 'rL414624650', 'name__5': 'Pamela May', 'cin__5': 'CF494082081', 'created_at__5': '2023-10-18 02:08:20', 'company_id__5': 'daa06ec3-6ba1-4463-bea9-d23eb9fe5b9f', 'pan__6': 'YZ287295836', 'name__6': 'Steven Leon', 'cin__6': 'ns917592064', 'created_at__6': '2025-03-02 19:03:51', 'company_id__6': 'e7db09f0-5b39-4d30-9fda-499b5bb3ec78', 'pan__7': 'OM511515519', 'name__7': 'Tracy Johnson', 'cin__7': 'yv032677302', 'created_at__7': '2024-06-27 12:56:10', 'company_id__7': 'fda8041e-2a05-4c17-b70c-6be0f0180b05', 'pan__8': 'SS632032103', 'name__8': 'Erin Diaz', 'cin__8': 'bB311390592', 'created_at__8': '2022-10-23 12:02:43', 'company_id__8': '0b82e5c2-786f-40c5-9343-615a0c1b73cc', 'pan__9': 'Jl182441650', 'name__9': 'Rachel Pruitt', 'cin__9': 'am411724725', 'created_at__9': '2024-09-22 08:00:58', 'company_id__9': '474f07c5-fdba-47fc-92aa-102a9d943e15' ... 150 parameters truncated ... 'pan__40': 'TZ639566783', 'name__40': 'Megan Jackson', 'cin__40': 'Wo047954831', 'created_at__40': '2022-10-24 14:19:39', 'company_id__40': '202fccc4-482b-454c-8ecf-ab203899cd5d', 'pan__41': 'og258423909', 'name__41': 'Melissa Campos', 'cin__41': 'ZA135364496', 'created_at__41': '2024-06-13 23:13:57', 'company_id__41': 'cad7a80d-6ef5-4669-aaa9-3a47f44085b6', 'pan__42': 'uC614721012', 'name__42': 'Bryce Hubbard', 'cin__42': 'sn114122801', 'created_at__42': '2024-01-25 23:44:12', 'company_id__42': '6cbd2803-83eb-4b04-ace8-ac4d5672e19c', 'pan__43': 'TN106908348', 'name__43': 'John Barajas', 'cin__43': 'ln268400233', 'created_at__43': '2024-09-14 20:50:42', 'company_id__43': 'eb1abbba-cf12-42ed-b25e-26ad6cb78a67', 'pan__44': 'Xa447902938', 'name__44': 'Martin Lindsey', 'cin__44': 'sV668442631', 'created_at__44': '2023-08-22 15:09:58', 'company_id__44': 'f1f3aa1e-094d-4e20-8e75-a202d517ddcb', 'pan__45': 'Wo057463930', 'name__45': 'Nicole Hartman', 'cin__45': 'fx119585255', 'created_at__45': '2023-06-05 06:21:49', 'company_id__45': '861c2f76-534f-4beb-b06e-a51ea1bc01a9', 'pan__46': 'kV752568880', 'name__46': 'John Lucas', 'cin__46': 'CR841694020', 'created_at__46': '2023-03-23 19:02:37', 'company_id__46': '052e5d30-c019-4d94-bb63-5b04317841ed', 'pan__47': 'UR348135176', 'name__47': 'Bryan Casey', 'cin__47': 'EB954638846', 'created_at__47': '2024-12-12 01:30:19', 'company_id__47': '7834abd7-a989-4031-9cec-974bb24e101b', 'pan__48': 'Ti108380931', 'name__48': 'Rhonda Mathis', 'cin__48': 'wB218704830', 'created_at__48': '2023-01-28 08:34:20', 'company_id__48': '4f12c98b-2fe9-45c2-b71c-08b378181569', 'pan__49': 'pR124577960', 'name__49': 'Crystal Ross', 'cin__49': 'FB521083536', 'created_at__49': '2024-09-03 11:05:54', 'company_id__49': '06c9e5e3-f1ad-4149-9b96-102ee7afb04e'}]
(Background on this error at: https://sqlalche.me/e/20/9h9h)

In [2]:


# ---------- Database Setup ----------
DATABASE_URL = "postgresql+psycopg2://postgres:ashish6677@34.30.63.17:5432/postgres"
engine = create_engine(DATABASE_URL)
Session = sessionmaker(bind=engine)
session = Session()


In [3]:
import os
import pandas as pd
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import sessionmaker
from models import Base, Company, Employee  # Import your ORM models
from models import get_tables_in_order  # Function returning ORM classes
from sqlalchemy.exc import DataError
from sqlalchemy.dialects.postgresql import insert as pg_insert

# DB connection

engine = create_engine(DATABASE_URL)


inspector = inspect(engine)




def bulk_insert_ignore_conflicts(session, records, model_class):
    table = model_class.__table__
    values = [obj.__dict__.copy() for obj in records]
    # Remove SQLAlchemy state key if present
    for v in values:
        v.pop('_sa_instance_state', None)
    stmt = pg_insert(table).values(values)
    stmt = stmt.on_conflict_do_nothing(index_elements=['company_id'])
    session.execute(stmt)
    session.commit()


def clean_dataframe_for_orm(df, table_name):
    """Truncate strings in DF according to DB column sizes"""
    columns_info = inspector.get_columns(table_name)
    for col in columns_info:
        col_name = col["name"]
        col_type = str(col["type"])
        print(f"col name and type: {col_name} & {col_type}")
        if "VARCHAR" in col_type:
            max_len = int(col_type.split("(")[1].split(")")[0])
            print(max_len)
            if col_name in df.columns:
                df[col_name] = df[col_name].astype(str)
                df[col_name] = df[col_name].apply(lambda x: x[:max_len])
                print(df[col_name].loc[:])
                print(df[col_name].shape)

        elif "character" in col_type and "varying" not in col_type:
            max_len = int(col_type.split("(")[1].split(")")[0])
            print(max_len)
            if col_name in df.columns:
                df[col_name] = df[col_name].astype(str).str.slice(0, max_len)

    return df





# Path to CSV folder
folder_name = "dummy_data_full"

# Loop through ORM models in correct order
for model_class in get_tables_in_order():
    table_name = model_class.__tablename__
    print(table_name)
    csv_file = os.path.join(folder_name, f"{table_name}.csv")
    
    if os.path.exists(csv_file):
        df = pd.read_csv(csv_file)

        # Clean based on DB schema
        df = clean_dataframe_for_orm(df, table_name)

        # Convert to ORM objects
        records = [model_class(**row.to_dict()) for _, row in df.iterrows()]

        # Insert into DB
        try:
            if table_name == "companies":
                bulk_insert_ignore_conflicts(session, records, model_class)  # 👈 Use your new function
            else:
                session.bulk_save_objects(records)  # others unchanged
                session.commit()
        except DataError as e:
            session.rollback()
            print(f"❌ Data too long for {table_name}: {e}")


# Commit changes
session.commit()
session.close()


companies
col name and type: company_id & UUID
col name and type: name & VARCHAR(255)
255
0            Peter Bullock
1              Karen Mccoy
2          Nicholas Hughes
3             Michael Lane
4     Christopher Clements
5               Pamela May
6              Steven Leon
7            Tracy Johnson
8                Erin Diaz
9            Rachel Pruitt
10    Dr. Ethan Jordan DVM
11             Sarah Flynn
12          Jessica Bailey
13             Tammy Jones
14            Joseph Lopez
15              Amy Dorsey
16          Jason Sullivan
17           Paul Sullivan
18        Michael Mckinney
19         Benjamin Parker
20           Courtney Bush
21           Scott Walters
22             Holly Avila
23          Crystal Cortez
24           Timothy Brown
25            Judith Jones
26            Zoe Anderson
27    Kristopher Hernandez
28             Gerald Tran
29            Matthew Hart
30           Craig Morales
31          Erica Marshall
32          Breanna Ibarra
33             Robi

  records = [model_class(**row.to_dict()) for _, row in df.iterrows()]


roles
col name and type: role_id & UUID
col name and type: company_id & UUID
col name and type: role_name & VARCHAR(50)
50
0      Mrs. Michelle Fisher
1           Timothy Simmons
2              James Carter
3                Keith Wong
4              Ashley Grant
5         Thomas Stephenson
6          Nicholas Jenkins
7               Faith Mckay
8              Theresa West
9         Michelle Martinez
10             Nicole Woods
11         Alexander Nguyen
12    Mitchell Mcdonald DVM
13           Autumn Winters
14               Gary Brown
15              Kelly Beard
16           Leslie Edwards
17                Kyle Owen
18               Anna Davis
19             Leah Barrett
20             Tiffany Webb
21                Eric Gray
22              Mary Romero
23            Melissa Myers
24            Manuel Duncan
25             Dawn Perkins
26            Bryan Stevens
27           Shannon Palmer
28            James Osborne
29           Allison Wilson
30                 Eric Lee
31       

IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "roles_pkey"
DETAIL:  Key (role_id)=(0200bc30-2086-414d-afb0-456f4bcfb067) already exists.

[SQL: INSERT INTO roles (role_id, company_id, role_name, description, created_at) VALUES (%(role_id__0)s::UUID, %(company_id__0)s::UUID, %(role_name__0)s, %(description__0)s, %(created_at__0)s), (%(role_id__1)s::UUID, %(company_id__1)s::UUID, %(role_name__ ... 5231 characters truncated ... d__49)s::UUID, %(company_id__49)s::UUID, %(role_name__49)s, %(description__49)s, %(created_at__49)s)]
[parameters: {'description__0': 'Federal information bill box member base. Ahead activity simply officer owner question.', 'company_id__0': '74acb795-400f-4f4f-bc9f-6d49beb1469f', 'role_name__0': 'Mrs. Michelle Fisher', 'role_id__0': '0200bc30-2086-414d-afb0-456f4bcfb067', 'created_at__0': '2023-04-05 03:07:02', 'description__1': 'Trade parent opportunity bill remember.', 'company_id__1': 'b2099831-d995-46b8-9865-4ca93ccd46cf', 'role_name__1': 'Timothy Simmons', 'role_id__1': '626ef389-2343-4290-8906-126ef93910ee', 'created_at__1': '2023-10-04 07:39:45', 'description__2': 'Light article more fish.', 'company_id__2': 'b6e77aa3-54c1-4e5b-aa05-16ccdd7196f0', 'role_name__2': 'James Carter', 'role_id__2': '6702f9fc-bd13-4915-9860-2d407c2be977', 'created_at__2': '2023-09-08 10:09:25', 'description__3': 'Husband tonight simply center themselves enough.', 'company_id__3': '5621c24a-877c-415e-8725-6774ed8ce9e8', 'role_name__3': 'Keith Wong', 'role_id__3': '04fcfaf9-d787-4297-afa3-024ba6aa623b', 'created_at__3': '2024-03-21 10:45:43', 'description__4': 'Girl along themselves game interview speech today.', 'company_id__4': '06c9e5e3-f1ad-4149-9b96-102ee7afb04e', 'role_name__4': 'Ashley Grant', 'role_id__4': '531ca9d9-0da5-458e-be83-c7104358c181', 'created_at__4': '2023-06-30 23:39:43', 'description__5': 'Station resource body hope industry.', 'company_id__5': 'b6e77aa3-54c1-4e5b-aa05-16ccdd7196f0', 'role_name__5': 'Thomas Stephenson', 'role_id__5': '0e32673d-6359-447e-ba4e-a66e91dc3e9b', 'created_at__5': '2025-05-19 13:43:04', 'description__6': 'Which us run hard.', 'company_id__6': 'eb1abbba-cf12-42ed-b25e-26ad6cb78a67', 'role_name__6': 'Nicholas Jenkins', 'role_id__6': '4cd95d9f-06ea-4259-b17b-1240b3830826', 'created_at__6': '2023-03-02 05:41:50', 'description__7': 'Exist best officer.', 'company_id__7': '5621c24a-877c-415e-8725-6774ed8ce9e8', 'role_name__7': 'Faith Mckay', 'role_id__7': '210baedc-a95e-4346-8cd7-dc687597b3d8', 'created_at__7': '2024-01-19 20:57:58', 'description__8': 'Poor itself concern them either site of.', 'company_id__8': '474f07c5-fdba-47fc-92aa-102a9d943e15', 'role_name__8': 'Theresa West', 'role_id__8': '38e022c3-9adf-4d66-9d67-b57c48995f88', 'created_at__8': '2023-05-17 07:04:55', 'description__9': 'Meet day cover two. Old task husband education.', 'company_id__9': '6a53cd75-dab2-4fb6-acac-59e3c8a6713f', 'role_name__9': 'Michelle Martinez', 'role_id__9': 'a4a1f9d8-72f5-4b4e-b8b5-71ee30af6302', 'created_at__9': '2025-04-13 07:17:00' ... 150 parameters truncated ... 'description__40': 'Exist speak night check.', 'company_id__40': '4f12c98b-2fe9-45c2-b71c-08b378181569', 'role_name__40': 'Diamond Ramsey', 'role_id__40': '22556ae0-4fa1-4689-a5c4-501e7ed6fdb3', 'created_at__40': '2023-04-25 14:21:33', 'description__41': 'Floor star Republican finally drive across avoid.', 'company_id__41': '6237c655-0bee-40f7-a5d1-20fae3cbc54b', 'role_name__41': 'Sandra Chavez', 'role_id__41': '05f87f50-7f07-48ac-9331-b5a795cf610a', 'created_at__41': '2023-10-23 17:05:50', 'description__42': 'Relate above pass short know ever.', 'company_id__42': 'daa06ec3-6ba1-4463-bea9-d23eb9fe5b9f', 'role_name__42': 'Colleen Burke', 'role_id__42': '5b89f09d-8f0b-479a-b0ee-8e503ba96bb2', 'created_at__42': '2024-05-26 14:15:08', 'description__43': 'Police firm tend move north science customer.', 'company_id__43': '6237c655-0bee-40f7-a5d1-20fae3cbc54b', 'role_name__43': 'Karen Chung MD', 'role_id__43': '9f2396c9-5ee3-440a-a20e-b8f5bf928ba3', 'created_at__43': '2025-04-28 03:23:19', 'description__44': 'Wind assume talk between quite thus eight account. Final same despite cut foot practice.', 'company_id__44': '5621c24a-877c-415e-8725-6774ed8ce9e8', 'role_name__44': 'Jennifer Scott', 'role_id__44': 'ea3c9f51-d3df-49c4-8b56-b98132181a58', 'created_at__44': '2024-04-21 15:23:13', 'description__45': 'Card nature less possible upon despite story turn. Risk speak character cover close operation red.', 'company_id__45': 'abcdd24e-678f-432b-b550-739fc042a0c0', 'role_name__45': 'Logan Rivera', 'role_id__45': '0823792c-5568-465f-9c68-c54e1ff24334', 'created_at__45': '2025-08-07 11:13:43', 'description__46': 'Gun form amount care military table. Color kitchen pay cost debate run away.', 'company_id__46': '3eaa38bf-2f66-4bcc-a41a-3c7b282699e0', 'role_name__46': 'Sara Stevens', 'role_id__46': 'bda6ad57-0691-4fc5-8b43-a4778bc7e5c5', 'created_at__46': '2024-10-08 13:02:57', 'description__47': 'Food career carry usually near sit.', 'company_id__47': '861c2f76-534f-4beb-b06e-a51ea1bc01a9', 'role_name__47': 'Sharon Goodman', 'role_id__47': '58ba0cc1-cc9e-4985-a9d0-88f712ca7d7f', 'created_at__47': '2022-08-30 11:06:33', 'description__48': 'Finally together beyond score there receive. Star threat source decade before local.', 'company_id__48': '5621c24a-877c-415e-8725-6774ed8ce9e8', 'role_name__48': 'Christie Fry', 'role_id__48': '0193d71a-98e8-4777-86e6-34787e08087f', 'created_at__48': '2023-09-02 00:34:05', 'description__49': 'Cultural perhaps list international attorney TV news too. Expert him account continue who increase director mouth.', 'company_id__49': 'b6e77aa3-54c1-4e5b-aa05-16ccdd7196f0', 'role_name__49': 'Brad Woods', 'role_id__49': '9325c184-312d-4c4a-bebe-bb1ad92f32dc', 'created_at__49': '2023-01-10 21:56:06'}]
(Background on this error at: https://sqlalche.me/e/20/gkpj)

In [25]:
session.rollback()

In [4]:
for model_class in get_tables_in_order():
    table_name = model_class.__tablename__
    print(table_name)

companies
roles
addresses
user_company_roles
departments
accounts
customers
vendors
products
company_gstins
transactions
inventory
sales_orders
purchase_orders
fixed_assets
employees
sales_order_items
purchase_order_items
invoices
payables
payroll
audit_logs
subscriptions
payments
email_verifications
phone_verifications
file_exports


In [5]:
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import DataError, IntegrityError

def get_primary_key_names(model_class):
    """Return a list of primary key column names for a model class."""
    return [key.name for key in model_class.__table__.primary_key.columns]

def bulk_insert_ignore_conflicts(session, records, model_class, pk_names):
    """Bulk insert records, skipping duplicates on the table's PK(s)."""
    table = model_class.__table__
    values = [obj.__dict__.copy() for obj in records]
    for v in values:
        v.pop('_sa_instance_state', None)
    stmt = pg_insert(table).values(values)
    stmt = stmt.on_conflict_do_nothing(index_elements=pk_names)
    session.execute(stmt)
    session.commit()

def table_has_data(session, model_class):
    """Check if table has any rows."""
    return session.query(model_class).first() is not None

# Set up session as before
Session = sessionmaker(bind=engine)
session = Session()

for model_class in get_tables_in_order():
    table_name = model_class.__tablename__
    print(f"\n--- Processing table: {table_name} ---")
    csv_file = os.path.join(folder_name, f"{table_name}.csv")
    
    if os.path.exists(csv_file):
        df = pd.read_csv(csv_file)
        df = clean_dataframe_for_orm(df, table_name)
        records = [model_class(**row.to_dict()) for _, row in df.iterrows()]
        
        pk_names = get_primary_key_names(model_class)
        if not pk_names:
            print(f"⚠️ Skipping {table_name}: no primary key found!")
            continue
        
        try:
            # Always do UPSERT (skip duplicates) for bulk inserts!
            bulk_insert_ignore_conflicts(session, records, model_class, pk_names)
            print(f"✅ Bulk insert complete for {table_name} (duplicates skipped)")
        except DataError as e:
            session.rollback()
            print(f"❌ Data too long for {table_name}: {e}")
        except IntegrityError as e:
            session.rollback()
            print(f"❌ Integrity error for {table_name}: {e}")
    else:
        print(f"⚠️ No CSV found for table '{table_name}'")

session.close()



--- Processing table: companies ---
col name and type: company_id & UUID
col name and type: name & VARCHAR(255)
255
0            Peter Bullock
1              Karen Mccoy
2          Nicholas Hughes
3             Michael Lane
4     Christopher Clements
5               Pamela May
6              Steven Leon
7            Tracy Johnson
8                Erin Diaz
9            Rachel Pruitt
10    Dr. Ethan Jordan DVM
11             Sarah Flynn
12          Jessica Bailey
13             Tammy Jones
14            Joseph Lopez
15              Amy Dorsey
16          Jason Sullivan
17           Paul Sullivan
18        Michael Mckinney
19         Benjamin Parker
20           Courtney Bush
21           Scott Walters
22             Holly Avila
23          Crystal Cortez
24           Timothy Brown
25            Judith Jones
26            Zoe Anderson
27    Kristopher Hernandez
28             Gerald Tran
29            Matthew Hart
30           Craig Morales
31          Erica Marshall
32          Breanna

## v2 code

In [None]:


# Persistent conversation memory in Redis
history = RedisChatMessageHistory(
    url="redis://localhost:6379",
    session_id="user_001"
)


def extract_sql_from_response(response: str) -> str:
    import re
    # Extract content between ```sql and ```
    pattern = r"```sql\s*(.*?)```"
    match = re.search(pattern, response, re.DOTALL | re.IGNORECASE)
    if match:
        return match.group(1).strip()
    else:
        # fallback: return the whole response if no fences found
        return response.strip()

# Load schema from Redis
async def load_schema(redis_url: str):
    redis_client = redis.from_url(redis_url)
    schema_json = await redis_client.get("db_metadata")
    await redis_client.close()
    return json.loads(schema_json.decode("utf-8")) if schema_json else []

# Agent 1: NL to SQL with follow-up support
async def agent1_nl_to_sql(user_question, schema_info, llm):
    # Extract last SQL if any from history for context
    last_sql = None
    for msg in reversed(history.messages):
        if "[Agent 1 Generated SQL]" in msg.content:
            last_sql = msg.content.replace("[Agent 1 Generated SQL]", "").strip()
            break

    schema_desc = ""
    print(type(schema_info))
    for table in {col['table_name'] for col in schema_info}:
        cols = [c['column_name'] for c in schema_info if c['table_name'] == table]
        schema_desc += f"Table {table} (Columns: {', '.join(cols)})\n"

    # Prepare prompt template supporting follow-up
    template_text = (
        "Conversation so far:\n{history}\n\n"
        "You are a SQL expert. Use ONLY this schema:\n{schema} and also keep in mind that it's PostgresSQL, generate the SQL query accordingly\n"
        "Also take a decision on your own, where the question given to you, requires to be converted into a SQL query or its an normal question\
        , if it's a normal question in that case, reply back to the user without converting it to a SQL query and calling the second agent\n\n"
    )
    if last_sql:
        template_text += (
            "The previous SQL query was:\n"
            f"{last_sql}\n"
            "The user now asked a follow-up question:\n{question}\n"
            "Please provide a modified or new SQL query that satisfies the follow-up.\nSQL:"
        )
    else:
        template_text += (
            "Convert the following user question into a valid SQL statement:\n{question}\nSQL:"
        )

    prompt_template = PromptTemplate(
        input_variables=["history", "schema", "question"],
        template=template_text,
    )

    prompt = prompt_template.format(
        history="\n".join([f"{m.type}: {m.content}" for m in history.messages]),
        schema=schema_desc,
        question=user_question
    )

    resp = llm.invoke(prompt)
    sql_query = resp.content.strip()
    history.add_ai_message(f"[Agent 1 Generated SQL] {sql_query}")
    print(f"[Agent 1 Output]\n{sql_query}")
    return sql_query

# Agent 2 and 3 same as before (validate and execute)...

async def agent2_validate_sql(sql_query, schema_info, user_question, llm, max_retries=2):
    attempt = 0
    while attempt <= max_retries:
        attempt += 1
        try:
            if not sqlparse.parse(sql_query):
                raise ValueError("SQL parse failed")

            table_columns = {}
            for col in schema_info:
                table_columns.setdefault(col['table_name'].lower(), set()).add(col['column_name'].lower())

            sql_lower = sql_query.lower()
            valid = any(
                table in sql_lower and any(col in sql_lower for col in cols)
                for table, cols in table_columns.items()
            )

            if not valid:
                raise ValueError("Table/column mismatch")

            history.add_ai_message("[Agent 2] SQL validation passed.")
            print("[Agent 2] SQL validation passed.")
            return sql_query

        except Exception as e:
            history.add_ai_message(f"[Agent 2] Validation failed: {e}")
            print(f"[Agent 2] Validation failed: {e}")

            correction_template = PromptTemplate(
                input_variables=["history", "schema", "question", "bad_sql", "error"],
                template=(
                    "Conversation so far:\n{history}\n\n"
                    "The SQL query `{bad_sql}` is invalid due to: {error}\n"
                    "Schema: {schema}\n"
                    "Rewrite the SQL so it matches the schema & user request and also the syntax of the postgresql:\n"
                    "{question}\nSQL:"
                ),
            )

            schema_desc = json.dumps(schema_info)
            prompt = correction_template.format(
                history="\n".join([f"{m.type}: {m.content}" for m in history.messages]),
                schema=schema_desc,
                question=user_question,
                bad_sql=sql_query,
                error=str(e)
            )
            sql_query = llm.invoke(prompt).content.strip()
            history.add_ai_message(f"[Agent 2 Corrected SQL] {sql_query}")

    return sql_query

async def agent3_execute_sql(sql_query, toolbox_url="http://127.0.0.1:5000", toolset_name="my-toolset"):
    async with ToolboxClient(toolbox_url) as client:
        tools = await client.aload_toolset(toolset_name)
        run_query_tool = next((t for t in tools if t.name == "run-query"), None)
        if not run_query_tool:
            raise RuntimeError("run-query tool not found")
        

        raw_sql = extract_sql_from_response(sql_query)
        print("SQL sent:", repr(raw_sql))


        print(f"The correct sql query: {raw_sql}")
        tool_call = {'query': raw_sql}
        result = run_query_tool.invoke(tool_call)
        history.add_ai_message(f"[Agent 3 Execution Result] {result}")
        print("[Agent 3] Execution Result:")
        print(result)
        return result

# Orchestrator with follow-up support
async def multi_llm_pipeline():
    redis_url = "redis://localhost:6379"
    toolbox_url = "http://127.0.0.1:5000"

    # Load schema info once outside the loop
    print("Loading the schema info...")
    schema_info = await load_schema(redis_url)
    print(f"Schema info loaded. Type: {type(schema_info)}")

    # Initialize LLM agents once outside the loop
    llm_agent1 = ChatVertexAI(model_name="gemini-2.0-flash-001")
    llm_agent2 = ChatOpenAI(
        model="gpt-4o-mini",
        temperature=0,
        api_key=open_ai_key  # or rely on env variable
    )

    print("Starting interactive session. Type 'exit' or 'quit' to stop.")

    while True:
        user_question = input("\nEnter query (or follow-up): ")
        if user_question.strip().lower() in {"exit", "quit"}:
            print("Ending session.")
            break

        history.add_user_message(user_question)

        # NL->SQL
        sql_query = await agent1_nl_to_sql(user_question, schema_info, llm_agent1)

        # Validate SQL
        validated_sql = await agent2_validate_sql(sql_query, schema_info, user_question, llm_agent2)

        # Execute SQL and get results
        results = await agent3_execute_sql(validated_sql, toolbox_url)

        print("\n[Final Results]")
        print(results)

        print("\n[Conversation History]")
        for m in history.messages:
            print(f"{m.type.capitalize()}: {m.content}")


if __name__ == "__main__":
    await multi_llm_pipeline()
