In [69]:
import os
import dotenv
import psycopg2
from psycopg2 import sql
import pandas as pd
from groq import Groq

In [70]:
def get_database_connection():
  # Establishes a connection to the PostgreSQL database
  dotenv.load_dotenv()
  try:
    conn = psycopg2.connect(
        database=os.environ.get("DATABASE_NAME"),
        user=os.environ.get("DATABASE_USER"),
        password=os.environ.get("DATABASE_PASSWORD"),
        host=os.environ.get("DATABASE_HOST"),
        port=os.environ.get("DATABASE_PORT")
    )
    print("Connected to the PostgreSQL database!")
    return conn
  
  except (Exception, psycopg2.Error) as error:
    print("Error while connecting to PostgreSQL", error)
    return None

In [71]:
api_key = os.environ.get("GROQ_API_KEY")

if not api_key:
    raise ValueError("GROQ_API_KEY environment variable not set")

In [72]:
def get_table_metadata(conn, table_name):
    try:
        cursor = conn.cursor()

        # Get column names and data types
        cursor.execute(sql.SQL("""
            SELECT column_name, data_type
            FROM information_schema.columns
            WHERE table_name = %s;
        """), [table_name])
        columns = cursor.fetchall()

        # Prepare a dictionary to hold the column metadata
        table_metadata = {}
        
        for column_name, data_type in columns:
            # Convert column name to lowercase and replace blank spaces and slashes with underscores
            column_name = column_name.lower().replace(" ", "_").replace("/", "_")

            unique_values = []

            # Only fetch unique values for text columns
            if data_type == 'text':
                cursor.execute(sql.SQL("""
                    SELECT DISTINCT {column}
                    FROM {table}
                    LIMIT 10;
                """).format(
                    column=sql.Identifier(column_name),
                    table=sql.Identifier(table_name)
                ))
                unique_values = cursor.fetchall()
                # Flatten the list of unique values
                unique_values = [val[0] for val in unique_values]
            
            # Store the column metadata
            table_metadata[column_name] = {
                'data_type': data_type,
                'unique_values': unique_values
            }
        
        cursor.close()
        return table_metadata

    except (Exception, psycopg2.Error) as error:
        print("Error while fetching table metadata", error)
        return None

In [73]:
def format_metadata(metadata):
    formatted_metadata = ""
    for col, info in metadata.items():
        formatted_metadata += f"{col}: {info['data_type']}"
        if info['data_type'] == 'text':
            formatted_metadata += f" (Unique Values: {', '.join(info['unique_values'])})"
        formatted_metadata += "\n"
    return formatted_metadata


connection = get_database_connection()
if connection:
    table_name = "disc_off"
    metadata = get_table_metadata(connection, table_name)
    formatted_metadata = format_metadata(metadata)
    print(formatted_metadata)
    connection.close()

Connected to the PostgreSQL database!
portfolio_balance: double precision
life_insurance: double precision
medical_insurance: double precision
average_a_c_balance: double precision
personal_loan: double precision
investment_in_mutual_fund: double precision
investment_tax_saving_bond: double precision
home_loan: double precision
online_purchase_amount: double precision
discount_offering: integer
investment_in_commudity: double precision
investment_in_equity: double precision
investment_in_derivative: double precision
ref_no: integer
year_last_moved: integer
average_credit_card_transaction: double precision
balance_transfer: double precision
term_deposit: double precision
children: text (Unique Values: 3, 4+, 2, 1, Zero)
age_band: text (Unique Values: 18-21, 36-40, Unknown, 45-50, 65-70, 26-30, 71+, 61-65, 22-25, 55-60)
status: text (Unique Values: Unknown, Single/Never Married, Widowed, Partner, Divorced/Separated)
occupation: text (Unique Values: Professional, Other, Unknown, Housewife

In [74]:
def get_llama_assistance(prompt, formatted_metadata, table_name):
    main_purpose = f"""
    As an SQL Query Expert, your primary role is to understand the given data, the questions based on the provided input and generate accurate SQL queries ONLY. 
    Remember, you only have to answer the Query for the given input, don't give any explanation, just the query. 
    Here are the column names with respect to their information: 
    {formatted_metadata}
    Please follow the exact variable patterns. 
    And the table name is {table_name}
    Here is/are the Questions:"""

    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
    completion = client.chat.completions.create(
        model="llama3-70b-8192",
        messages=[
            {
                "role": "user",
                "content": f"{main_purpose} {prompt}"
            },
            {
                "role": "assistant",
                "content": ""
            }
        ],
        temperature=1.4,
        max_tokens=8192,
        top_p=1,
        stream=True,
        stop=None,
    )

    response_text = ""
    for chunk in completion:
        response_text += chunk.choices[0].delta.content or ""
    
    return response_text

In [75]:
print(formatted_metadata)

portfolio_balance: double precision
life_insurance: double precision
medical_insurance: double precision
average_a_c_balance: double precision
personal_loan: double precision
investment_in_mutual_fund: double precision
investment_tax_saving_bond: double precision
home_loan: double precision
online_purchase_amount: double precision
discount_offering: integer
investment_in_commudity: double precision
investment_in_equity: double precision
investment_in_derivative: double precision
ref_no: integer
year_last_moved: integer
average_credit_card_transaction: double precision
balance_transfer: double precision
term_deposit: double precision
children: text (Unique Values: 3, 4+, 2, 1, Zero)
age_band: text (Unique Values: 18-21, 36-40, Unknown, 45-50, 65-70, 26-30, 71+, 61-65, 22-25, 55-60)
status: text (Unique Values: Unknown, Single/Never Married, Widowed, Partner, Divorced/Separated)
occupation: text (Unique Values: Professional, Other, Unknown, Housewife, Student, Retired, Business Manager, 

In [77]:
connection = get_database_connection()
if connection:
    table_name = "disc_off"
    input_question = input("Enter the Question: ")
    response = get_llama_assistance(input_question, formatted_metadata, table_name)
    print(response)
    connection.close()

Connected to the PostgreSQL database!
```
SELECT post_area, COUNT(ref_no) as num_customers
FROM disc_off
GROUP BY post_area
ORDER BY num_customers DESC
LIMIT 5;
```
