In [3]:
pip install groq

Collecting groq
  Downloading groq-0.9.0-py3-none-any.whl (103 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.5/103.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from groq)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, groq
Successfully installed groq-0.9.0 h11-0.14.0 httpcore-1.0.5 http

In [None]:
pip install configparser

In [8]:
import os
from groq import Groq
import json
import duckdb
import sqlparse
import numpy as np
import pandas as pd
import psycopg2
import configparser

In [9]:
config = configparser.ConfigParser()
config.read('config.ini')

PGEND_POINT = config['database']['PGEND_POINT']
PGUSER_NAME = config['database']['PGUSER_NAME']
PGPASSWORD = config['database']['PGPASSWORD']
PGDATABASE_NAME = config['database']['PGDATABASE_NAME']

In [10]:
def connect():
    # Set up a connection to the PostgreSQL server.
    conn_string = "host=" + PGEND_POINT + " port=5432 dbname=" + PGDATABASE_NAME + \
                  " user=" + PGUSER_NAME + " password=" + PGPASSWORD

    conn = psycopg2.connect(conn_string)
    print("Connected!")

    # Create a cursor object
    cursor = conn.cursor()

    return conn, cursor

def execute_postgresql_query(query):
    conn, cursor = connect()
    try:
        cursor.execute(query)
        columns = [desc[0] for desc in cursor.description]
        query_result = pd.DataFrame(cursor.fetchall(), columns=columns)
    finally:
        conn.close()
    return query_result

In [11]:
def chat_with_groq(client, prompt, model, response_format):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": f"{prompt}\n\nPlease provide the response in JSON format."
            }
        ],
        response_format=response_format
    )
    return completion.choices[0].message.content

In [12]:
def get_summarization(client, user_question, df, model):
    """
    This function generates a summarization prompt based on the user's question and the resulting data.
    It then sends this summarization prompt to the Groq API and retrieves the AI's response.
    """
    prompt = '''
    A user asked the following question pertaining to local database tables:

    {user_question}

    To answer the question, a dataframe was returned:

    Dataframe:
    {df}

    In a few sentences, summarize the data in the table as it pertains to the original user question. Avoid qualifiers like "based on the data" and do not comment on the structure or metadata of the table itself.
    '''.format(user_question=user_question, df=df)

    # Response format is set to 'None'
    return chat_with_groq(client, prompt, model, None)

In [13]:
# Use the Llama3 70b model
model = "llama3-70b-8192"

# Get the Groq API key and create a Groq client
groq_api_key = os.environ['api']['groq_api_key']
client = Groq(
  api_key=groq_api_key
)

In [14]:
print("Welcome to the Query Generator!")
print("You can ask questions about the data")

base_prompt = "Given a user's question about this data, write a valid SQL query that accurately extracts or calculates the requested information from the 'turbotable' and adheres to SQL best practices for AWS RDS, optimizing for readability and performance where applicable."

Welcome to the Query Generator!
You can ask questions about the data


In [None]:
base_prompt = """
Given a user's question about this data, write a valid SQL query that accurately extracts or calculates the requested information from the 'turbotable' and adheres to SQL best practices for AWS RDS, optimizing for readability and performance where applicable. Here is the user's question:
{user_question}
"""

while True:
    # Get the user's question
    user_question = input("Ask a question: ")

    if user_question:
        # Generate the full prompt for the AI
        full_prompt = base_prompt.format(user_question=user_question)

        print(full_prompt)

        # Get the AI's response. Call with '{"type": "json_object"}' to use JSON mode
        llm_response = chat_with_groq(client, full_prompt, model, {"type": "json_object"})

        result_json = json.loads(llm_response)
        if 'sql' in result_json:
            sql_query = result_json['sql']
            results_df = execute_postgresql_query(sql_query)

            formatted_sql_query = sqlparse.format(sql_query, reindent=True, keyword_case='upper')

            print("```sql\n" + formatted_sql_query + "\n```")
            print(results_df.to_markdown(index=False))

            summarization = get_summarization(client, user_question, results_df, model)
            print(summarization)
        elif 'error' in result_json:
            print("ERROR:", 'Could not generate valid SQL for this question')
            print(result_json['error'])

Ask a question: maximum value of altitude

Given a user's question about this data, write a valid SQL query that accurately extracts or calculates the requested information from the 'turbotable' and adheres to SQL best practices for AWS RDS, optimizing for readability and performance where applicable. Here is the user's question:
maximum value of altitude



# First run

In [7]:
import os
from groq import Groq
import json
import sqlparse
import psycopg2

def chat_with_groq(client, prompt, model, response_format):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": prompt
            }
        ],
        response_format=response_format
    )

    return completion.choices[0].message.content

def print_sql_query(query):
    """
    This function prints the SQL query without executing it.
    """
    formatted_sql_query = sqlparse.format(query, reindent=True, keyword_case='upper')
    print("\nSQL Query:\n" + formatted_sql_query + "\n")

def get_summarization(client, user_question, df, model):
    """
    This function generates a summarization prompt based on the user's question and the resulting data.
    It then sends this summarization prompt to the Groq API and retrieves the AI's response.

    Parameters:
    client (Groqcloud): The Groq API client.
    user_question (str): The user's question.
    df (DataFrame): The DataFrame resulting from the SQL query.
    model (str): The AI model to use for the response.

    Returns:
    str: The content of the AI's response to the summarization prompt.
    """
    prompt = '''
      A user asked the following question pertaining to local database tables:

      {user_question}

      To answer the question, a dataframe was returned:

      Dataframe:
      {df}

    In a few sentences, summarize the data in the table as it pertains to the original user question. Avoid qualifiers like "based on the data" and do not comment on the structure or metadata of the table itself.
    '''.format(user_question=user_question, df=df)

    # Response format is set to 'None'
    return chat_with_groq(client, prompt, model, None)

# Use the Llama3 70b model
model = "llama3-70b-8192"

# Get the Groq API key and create a Groq client
groq_api_key = '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'
client = Groq(api_key=groq_api_key)

print("Welcome to the DuckDB Query Generator!")
print("You can ask questions about the data in the 'employees.csv' and 'purchases.csv' files.")

# Load the base prompt
base_prompt = """
Given a user's question about this data, write a valid SQL query that accurately extracts or calculates the requested information from the 'turbotable' and adheres to SQL best practices for AWS RDS, optimizing for readability and performance where applicable. Here is the user's question:
{user_question}
"""

while True:
    # Get the user's question
    user_question = input("Ask a question: ")

    if user_question:
        # Generate the full prompt for the AI
        full_prompt = base_prompt.format(user_question=user_question)

        # Get the AI's response. Call with '{"type": "json_object"}' to use JSON mode
        llm_response = chat_with_groq(client, full_prompt, model, {"type": "json_object"})

        result_json = json.loads(llm_response)
        if 'sql' in result_json:
            sql_query = result_json['sql']
            print_sql_query(sql_query)

            # PGEND_POINT = 'databaseinstance.crooqm2eco97.eu-north-1.rds.amazonaws.com' # End_point
# PGDATABASE_NAME = 'TestDB' # Database Name example: youtube_test_db
# PGUSER_NAME = 'postgres' # UserName
# PGPASSWORD = 'Root.123'

            # Connect to the PostgreSQL database (if needed for other purposes)
            conn = psycopg2.connect(
                host="databaseinstance.crooqm2eco97.eu-north-1.rds.amazonaws.com",
                port="5432",
                dbname="TestDB",
                user="postgres",
                password="Root.123"
            )

            # Optional: you can fetch data from the database if needed
            # cursor = conn.cursor()
            # cursor.execute(sql_query)
            # results_df = pd.DataFrame(cursor.fetchall(), columns=[desc[0] for desc in cursor.description])
            # cursor.close()
            # conn.close()
            #
            # print(results_df.to_markdown(index=False))
            #
            # summarization = get_summarization(client, user_question, results_df, model)
            # print(summarization)
        elif 'error' in result_json:
            print("ERROR:", 'Could not generate valid SQL for this question')
            print(result_json['error'])


Welcome to the DuckDB Query Generator!
You can ask questions about the data in the 'employees.csv' and 'purchases.csv' files.
Ask a question: maximum value of altitude


BadRequestError: Error code: 400 - {'error': {'message': "'messages' must contain the word 'json' in some form, to use 'response_format' of type 'json_object'.", 'type': 'invalid_request_error'}}