In [1]:
from llm.llm_utils import *
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATABASE = "postgres"
USER = "postgres"
PASSWORD = "mysecretpassword"
HOST = "localhost"
PORT = 5432
TABLES = []  # list of tables to load or [] to load all tables

from llm.db_connectors import PostgresConnector
from llm.prompt_formatters import SqlCoderFormatter

try:
    # Get the connector and formatter
    postgres_connector = PostgresConnector(
        user=USER, password=PASSWORD, dbname=DATABASE, host=HOST, port=PORT
    )
    postgres_connector.connect()
    if len(TABLES) <= 0:
        TABLES.extend(postgres_connector.get_tables())
    
    print(f"Loading tables: {TABLES}")
    
    db_schema = [postgres_connector.get_schema(table) for table in TABLES]
    formatter = SqlCoderFormatter(db_schema)
except:
    formatter = SqlCoderFormatter([])
    # Reading from the file
    with open("sample_table_str_raw.txt", 'r') as file:
        formatter.table_str = file.read()

Loading tables: ['payments', 'suppliers', 'products', 'categories', 'customers', 'orders', 'orderdetails', 'customerfeedback']


In [3]:
# Enrich the db definition with comments (One time run and can be cached)
formatter.enriched_table_str = add_comment_to_sql_mixtral(formatter.raw_table_str, debug=False)

In [None]:
question = "What are the suppliers with products that have the most reviews from costumers ?"


In [None]:
%%time
# Build the prompt for Text2SQL
prompt = formatter.format_prompt(question, database='Postgres', model_prompt="Mixtral")
# Get SQLQuery
response = prompt_llm_online(user_prompt=prompt)
query = extract_sql_code(response)
# Execute the query and get the final results
exec_results = postgres_connector.run_sql_as_df(query)
if not isinstance(exec_results, pd.DataFrame):
    response = prompt_llm_online(user_prompt=f"Fix the SQL query based on the error \n {exec_results} {query}.\nSQL DDL is the following:\n {formatter.enriched_table_str}")
    query = extract_sql_code(response)
    exec_results = postgres_connector.run_sql_as_df(query)
    
interpretation_prompt = f"Answer the following question based on this query {query} and the results of the execution of the query: {exec_results}\n{question}. Don't give intermediate steps, just answer the question."
# Ask Mixtral to answer the question:
final_response = prompt_llm_online(user_prompt=interpretation_prompt, system_prompt="You are a helpful assistant.")

print(final_response)


The given SQL query is used to find the suppliers with the most customer reviews for their products. The query joins multiple tables, including `suppliers`, `products`, `orderdetails`, `orders`, and `customerfeedback`. It then groups the results by `supplierid` and `suppliername`, counts the number of feedback entries per supplier (which represents the total number of reviews), and orders the results in descending order based on this count.

Based on the execution results provided, there are two suppliers listed:
1. Global Supplies with 1 review
2. Quality Goods Inc. with 1 review

In this case, both suppliers have an equal number of reviews (which is only 1). Therefore, it can be said that Global Supplies and Quality Goods Inc. are the two suppliers with products having the most customer reviews. However, if there were other suppliers with more reviews, they would appear above these two in the results table.
CPU times: user 36.6 ms, sys: 118 ms, total: 155 ms
Wall time: 2min 59s
