In [5]:
from dotenv import load_dotenv
import openai
import os
import langchain.llms as llms
from langchain.chains import create_sql_query_chain
import pandas as pd
import sqlite3
from langchain_community.utilities import SQLDatabase
import openpyxl
from langchain.sql_database import SQLDatabase
from langchain_community.agent_toolkits import create_sql_agent
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.prompts import FewShotPromptTemplate, PromptTemplate


In [2]:
working_dir = "C:/Users/nithi/OneDrive - University of Illinois Chicago/Documents/Capstone project/LLM project"
os.chdir(working_dir)

print(f"The current working directory: {os.getcwd()}")

The current working directory: C:\Users\nithi\OneDrive - University of Illinois Chicago\Documents\Capstone project\LLM project


In [18]:
# SQLite database connection
db = SQLDatabase.from_uri("sqlite:///dataset.db")

# GPT connection
llm = ChatOpenAI(model="gpt-4", temperature=0.2)

In [19]:
# Define the base prompt
base_prompt = PromptTemplate(
    input_variables=["input"],
    template="""You are a SQLite expert and Machine Learning Engineer. 
    Given an input question, create a syntactically correct SQLite query to run, to answer the input question.
    \n Only use the following tables: {table_info}.Question: {input}.Generate up to {top_k} SQL queries to answer the question.""",
)

# Define the metric prompt
metric_prompt = """**Metric Selection Instructions:**

* For questions about detailed performance metrics (accuracy, MAE, recall, precision), you MUST JOIN the 'models' table with the 'model_metrics_view' on the MODEL_ID column.

* Check the model type first:

   * For Regression models, use MAE from model_metrics_view.

   * For Classification models, use accuracy, precision, and recall from model_metrics_view.

    **Example:**  

    Question: Which models have recall greater than 0.9?

    SELECT Model_Name 
         FROM models 
         JOIN model_metrics_view ON models.MODEL_ID = model_metrics_view.MODEL_ID
         WHERE Model_type IN ('Multi-Class', 'Classification') and accuracy > 0.9;
  

    Question: What is the accuracy of Model 10?

     SELECT accuracy 
         FROM models 
         JOIN model_metrics_view ON models.MODEL_ID = model_metrics_view.MODEL_ID
         WHERE lower(Model_Name) = 'model 10' and Model_type IN ('Multi-Class', 'Classification');
    Answer: Model 10 is a Regression model type so it accuracy is not the right metric to calculate performace of the model
"""

# Define the data validation prompt
data_validation_prompt = """**Data Validation Instructions:**
* Before executing the query, ensure that the referenced data likely exists in the database.
* If potential issues are detected, indicate the problem and suggest alternatives instead of executing the query.
"""


# Prompt to answer the questions
answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question in conversational tone.

      If the SQL result is empty, provide a helpful message indicating that no matching data was found.  

Question: {question}
SQL Query: {query}
SQL Result: {result}

Answer: """
)


In [20]:
# Combine the prompts using FewShotPromptTemplate
def generate_prompt(query):
    prompt = base_prompt

    # Analyze user query for keywords
    keywords = ["accuracy", "recall", "precision", "MAE", 'model', 'best', 'worst']
    detected_metrics = [metric for metric in keywords if metric in query.lower()]

    # Add conditional prompts based on detected metrics
    if detected_metrics:
        prompt = FewShotPromptTemplate(
            examples=[
                FewShotPromptTemplate.create_example(
                    base_prompt.template,
                    metric_prompt,
                ),
                FewShotPromptTemplate.create_example(
                    base_prompt.template,
                    data_validation_prompt,
                ),
            ],
            prefix=prompt.template,
            suffix="\nSQL Query:",
            example_prompt=base_prompt,
        )

    return prompt

prompt = generate_prompt("What is the accuracy of model A")
print(prompt)


AttributeError: type object 'FewShotPromptTemplate' has no attribute 'create_example'

In [16]:
# Initialize Chain 1: Generate SQL Query
generate_sql_chain = create_sql_query_chain(prompt=generate_prompt("dummy_query"), llm=llm, db=db)

execute_query = QuerySQLDataBaseTool(db=db)

# Initialize Chain 2: Execute SQL Query and generate structured answer
execute_sql_chain = answer_prompt | llm | StrOutputParser()

def calculate_token_size(text):
    # Split text into tokens and count the total number of tokens
    tokens = text.split()
    return len(tokens)

def execute_combined_chain(question):
    # Step 1: Generate SQL Query
    sql_query = generate_sql_chain.invoke({"question": question, "top_k": 1})

    # Step 2: Execute the SQL Query to get the result
    sql_result = execute_query(sql_query)  # Ensure this returns the result of executing the SQL query

    # Step 3: Pass the necessary inputs to the final chain and format the output to include both SQL query and result
    final_response = execute_sql_chain.invoke({"question": question, "query": sql_query, "result": sql_result})

    # Calculate token size of input and output
    input_token_size = calculate_token_size(question)
    output_token_size = calculate_token_size(final_response)
    total_token_size = input_token_size + output_token_size
    print(total_token_size)
    
    # Prompt user if total token size exceeds 10K tokens
    if total_token_size > 10000:
        return "Your query is too complex. Please try asking in a simpler way or split it into multiple questions."

    # Format the final answer to include both the SQL query and its result
    final_answer = f"SQL Query: {sql_query}\n Answer: {final_response}"
    return final_answer

In [17]:
# Example invocation
question = "Which model version is better in each model"
final_answer = execute_combined_chain(question)
print(final_answer)

219
SQL Query: SELECT Model_Name, Model_Version, MAX(Performance_Metrics) as Best_Performance
FROM models
GROUP BY Model_Name;
 Answer: The best model versions for each model are as follows: Model 1 version 1, Model 10 version 2, Model 11 version 1, Model 12 version 2, Model 13 version 3, Model 14 version 1, Model 15 version 1, Model 16 version 1, Model 17 version 2, Model 18 version 2, Model 19 version 1, Model 2 version 1, Model 20 version 2, Model 21 version 2, Model 22 version 1, Model 23 version 3, Model 24 version 3, Model 25 version 1, Model 26 version 2, Model 27 version 2, Model 28 version 3, Model 29 version 2, Model 3 version 3, Model 30 version 1, Model 31 version 3, Model 32 version 3, Model 33 version 2, Model 34 version 3, Model 35 version 3, Model 36 version 3, Model 37 version 1, Model 38 version 2, Model 39 version 1, Model 4 version 3, Model 40 version 1, Model 41 version 2, Model 42 version 3, Model 43 version 1, Model 44 version 1, Model 45 version 2, Model 46 vers

In [31]:
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.agents import AgentType, initialize_agent
from langchain.agents.tools import BaseTool
from langchain.sql_database import SQLDatabase

# SQLite database connection
db = SQLDatabase.from_uri("sqlite:///dataset.db")
# GPT connection
llm = ChatOpenAI(model="gpt-4", temperature=0.2)
# Get the table information
table_info = db.table_info

# Define the base prompt
base_prompt = PromptTemplate(
    input_variables=["input", "table_info"],
    template="""You are a SQLite expert and Machine Learning Engineer. Given an input question, create a syntactically correct SQLite query to run, to answer the input question.
    Use CTEs (Common Table Expressions) and window functions whenever possible to make the queries more readable and efficient.
    Only use the following tables: {table_info}.
    Question: {input}.
    Generate the best SQL query to answer the question.""",
)

# Define the metric prompt
metric_prompt = """**Metric Selection Instructions:**
* For questions about detailed performance metrics (accuracy, MAE, recall, precision), you MUST JOIN the 'models' table with the 'model_metrics_view' on the MODEL_ID column.
* Use the 'model_metrics_view' to access the individual performance metrics (recall, precision, accuracy, MAE) based on the model type.
* For Classification models, use accuracy, precision, and recall from model_metrics_view.
* For Regression models, use MAE from model_metrics_view.
* Do not use aggregate functions like MAX() on the performance metrics columns, as they are stored as JSON arrays. Instead, directly access the required metric from the 'model_metrics_view'.

**Example:**
Question: Which classification models have a recall greater than 0.8?
WITH classification_models AS (
  SELECT m.Model_Name, m.Model_Type, mmv.recall
  FROM models m
  JOIN model_metrics_view mmv ON m.MODEL_ID = mmv.MODEL_ID
  WHERE m.Model_Type IN ('Multi-Class', 'Classification')
)
SELECT Model_Name, recall 
FROM classification_models
WHERE CAST(recall AS FLOAT) > 0.8;
"""

# Define the data validation prompt
data_validation_prompt = """**Data Validation Instructions:**
* Before executing the query, ensure that the referenced data likely exists in the database.
* If potential issues are detected, indicate the problem and suggest alternatives instead of executing the query.
Question: What is the accuracy of Model 10?

     SELECT accuracy 
         FROM models 
         JOIN model_metrics_view ON models.MODEL_ID = model_metrics_view.MODEL_ID
         WHERE lower(Model_Name) = 'model 10' and Model_type IN ('Multi-Class', 'Classification');
    Answer: Model 10 is a Regression model type so it accuracy is not the right metric to calculate performace of the model
"""

# Prompt to answer the questions
answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question in a conversational tone.
    If the SQL result is empty, provide a helpful message indicating that no matching data was found.
    Question: {question}
    SQL Query: {query}
    SQL Result: {result}
    Answer:"""
)

# Combine the prompts using FewShotPromptTemplate
def generate_prompt(query):
    prompt_template = FewShotPromptTemplate(
        examples=[],
        prefix=base_prompt.template,
        suffix="SQL Query:",
        input_variables=["input", "table_info"],
        example_prompt=base_prompt,
        example_separator="\n\n",
    )

    # Analyze user query for keywords
    keywords = ["accuracy", "recall", "precision", "MAE", "model", "best", "worst"]
    detected_metrics = [metric for metric in keywords if metric in query.lower()]

    # Add conditional prompts based on detected metrics
    if detected_metrics:
        prompt_template = prompt_template.partial(
            examples=[
                {"input": metric_prompt, "table_info": table_info},
                {"input": data_validation_prompt, "table_info": table_info},
            ],
        )

    return prompt_template.format(input=query, table_info=table_info)

# Initialize Chain 1: Generate SQL Query
generate_sql_chain = LLMChain(llm=llm, prompt=base_prompt, output_key="query")

# Initialize Chain 2: Execute SQL Query and generate structured answer
execute_sql_chain = LLMChain(llm=llm, prompt=answer_prompt, output_key="result")

# Create a custom tool for executing SQL queries
class QuerySQLDataBaseTool(BaseTool):
    name = "QuerySQLDataBase"
    description = "Useful for querying a SQL database to obtain information."

    def _run(self, query: str) -> str:
        return db.run(query)

    async def _arun(self, query: str) -> str:
        raise NotImplementedError("QuerySQLDataBaseTool does not support async")

# Create an agent with the custom tool
tools = [QuerySQLDataBaseTool()]
agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, handle_parsing_errors=True
)

def execute_combined_chain(question):
    # Step 1: Generate SQL Query
    prompt = generate_prompt(question)
    sql_query = generate_sql_chain.run({"input": question, "table_info": table_info})

    # Step 2: Execute the SQL Query to get the result
    sql_result = agent.run(sql_query)

    # Step 3: Pass the necessary inputs to the final chain and format the output
    final_response = execute_sql_chain.run(
        question=question, query=sql_query, result=sql_result
    )

    # Format the final answer to include both the SQL query and its result
    final_answer = f"SQL Query: {sql_query}\nAnswer: {final_response}"
    return final_answer

# Example invocation
question = "Which classification models have a recall greater than 0.8?"
final_answer = execute_combined_chain(question)
print(final_answer)

TypeError: generate_prompt() missing 1 required positional argument: 'query'

In [72]:
# SQLite database connection
db = SQLDatabase.from_uri("sqlite:///dataset.db")

# GPT connection
llm = ChatOpenAI(model="gpt-4", temperature=0)

# Define the base prompt
base_prompt = PromptTemplate(
    input_variables=["input"],
    template="""You are a SQLite expert and Machine Learning Engineer. Given an input question, create a syntactically correct SQLite query to run, to answer the input question. 
    * Do not use aggregate functions like MAX() on the performance metrics columns, as they are stored as JSON arrays. Instead, directly access the required metric from the 'model_metrics_view'.
\n
**Example:**  

Question: Give company-wise best performing models with metrics.
WITH cte AS (
  SELECT m.Company_Name, m.Model_Name, m.Model_Version, m.Model_Type,
         CASE 
           WHEN m.Model_Type IN ('Multi-Class', 'Classification') THEN mv.accuracy
           WHEN m.Model_Type = 'Regression' THEN mv.MAE
         END AS metric,
         ROW_NUMBER() OVER (PARTITION BY m.Company_Name ORDER BY 
                              CASE 
                                WHEN m.Model_Type IN ('Multi-Class', 'Classification') THEN mv.accuracy
                                WHEN m.Model_Type = 'Regression' THEN -mv.MAE
                              END DESC) AS rn
  FROM models m
  JOIN model_metrics_view mv ON m.MODEL_ID = mv.MODEL_ID
)
SELECT Company_Name, Model_Name, Model_Version, Model_Type, metric
FROM cte
WHERE rn = 1;

\n Only use the following tables: {table_info}.Question: {input}.Generate up to {top_k} SQL queries to answer the question.""",
)

# Define the metric prompt
metric_prompt = """**Metric Selection Instructions:**
* For questions about detailed performance metrics (accuracy, MAE, recall, precision), you MUST JOIN the 'models' table with the 'model_metrics_view' using a CTE (Common Table Expression).
* Use window functions (e.g., ROW_NUMBER(), RANK()) to handle multiple model versions and select the best metrics for each model.
* Handle the performance metrics column correctly by extracting the appropriate metric based on the model type.
* Below are the examples for you to learn on how to write effective code and on how to join the table with model_metrics_view

**Example:**
Question: Which models have the highest recall for each model name?
WITH cte AS (
  SELECT m.Model_Name, m.Model_Version, mv.recall,
         ROW_NUMBER() OVER (PARTITION BY m.Model_Name ORDER BY mv.recall DESC) AS rn
  FROM models m
  JOIN model_metrics_view mv ON m.MODEL_ID = mv.MODEL_ID
  WHERE m.Model_type IN ('Multi-Class', 'Classification')
)

SELECT Model_Name, Model_Version, recall
FROM cte
WHERE rn = 1;

Question: What is the accuracy of Model 10?
SELECT accuracy 
         FROM models 
         JOIN model_metrics_view ON models.MODEL_ID = model_metrics_view.MODEL_ID
         WHERE lower(Model_Name) = 'model 10' and Model_type IN ('Multi-Class', 'Classification');
Answer: Model 10 is a Regression model type so it accuracy is not the right metric to calculate performace of the model

"""

# Define the data validation prompt
data_validation_prompt = """**Data Validation Instructions:**
* Before executing the query, ensure that the referenced data likely exists in the database.
* If potential issues are detected, indicate the problem and suggest alternatives instead of executing the query.

Question: What is the volume of Model A in CCC company ?
SQL Query:    SELECT SUM(Daily_Volume) AS total_volume 
         FROM models 
         WHERE lower(Model_Name) = 'model a' and lower(Company_Name) == 'ccc';
    Answer: It seems there is no model named 'Model A' and no company called CCC in the database.  Can you please try with a different model and company name?
"""
# Combine the base prompt with the data validation prompt
combined_prompt = PromptTemplate(
    input_variables=["input", "table_info", "top_k"],
    template=base_prompt.template + "\n\n" + data_validation_prompt,
)

# Prompt to answer the questions
answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question in conversational tone. If the SQL result is empty, provide a helpful message indicating that no matching data was found.

    {question}  <-- Notice the removal of "Question:" 
    SQL Query: {query}
    SQL Result: {result}
    Answer:
    """
)

# Combine the prompts using FewShotPromptTemplate
from sentence_transformers import SentenceTransformer, util


# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Predefined reference questions related to performance metrics and model comparison
reference_questions = [
    "What is the accuracy of the model?",
    "Which model has the highest precision?",
    "Compare the recall of different models.",
    "What is the MAE of the regression model?",
    "Which model performs best in terms of accuracy?",
]

# Combine the prompts using FewShotPromptTemplate
def generate_prompt(query):
    prompt = base_prompt 
    # Analyze user query using semantic search
    if should_include_metric_prompt(query):
        prompt = FewShotPromptTemplate(
            examples=[
                {"query": base_prompt.template, "context": metric_prompt},
                {"query": base_prompt.template, "context": data_validation_prompt},
            ],
            example_prompt=base_prompt,
            suffix="\nSQL Query:",
            input_variables=["query"],
        )
    return prompt

def should_include_metric_prompt(query):
    # Encode the user's question and reference questions
    query_embedding = model.encode(query)
    reference_embeddings = model.encode(reference_questions)

    # Compute the cosine similarity between the user's question and reference questions
    similarity_scores = util.cos_sim(query_embedding, reference_embeddings)

    # Check if any similarity score exceeds a threshold (e.g., 0.7)
    if (similarity_scores > 0.7).any():
        return True

    return False

# Initialize Chain 1: Generate SQL Query
generate_sql_chain = create_sql_query_chain(prompt=generate_prompt("dummy_query"), llm=llm, db=db)
execute_query = QuerySQLDataBaseTool(db=db)

# Initialize Chain 2: Execute SQL Query and generate structured answer
execute_sql_chain = answer_prompt | llm | StrOutputParser()

def calculate_token_size(text):
    # Split text into tokens and count the total number of tokens
    tokens = text.split()
    return len(tokens)

def execute_combined_chain(question):
    # Step 1: Generate SQL Query
    sql_query = generate_sql_chain.invoke({"question": question, "top_k": 1})
    
    # Step 2: Execute the SQL Query to get the result
    sql_result = execute_query(sql_query)
    
    # Step 3: Pass the necessary inputs to the final chain and format the output to include both SQL query and result
    final_response = execute_sql_chain.invoke({"question": question, "query": sql_query, "result": sql_result})
    
    # Calculate token size of input and output
    input_token_size = calculate_token_size(question)
    output_token_size = calculate_token_size(final_response)
    total_token_size = input_token_size + output_token_size
    print(total_token_size)
    
    # Prompt user if total token size exceeds 10K tokens
    if total_token_size > 10000:
        return "Your query is too complex. Please try asking in a simpler way or split it into multiple questions."
    
    # Format the final answer to include both the SQL query and its result
    final_answer = f"SQL Query: {sql_query}\nAnswer: {final_response}"
    return final_answer

# Example invocation
question = "give company wise best performing model"
final_answer = execute_combined_chain(question)
print(final_answer)

94
SQL Query: WITH cte AS (
  SELECT m.Company_Name, m.Model_Name, m.Model_Version, m.Model_Type,
         CASE 
           WHEN m.Model_Type IN ('Multi-Class', 'Classification') THEN json_extract(m.Performance_Metrics, '$[0]')
           WHEN m.Model_Type = 'Regression' THEN json_extract(m.Performance_Metrics, '$[0]')
         END AS metric,
         ROW_NUMBER() OVER (PARTITION BY m.Company_Name ORDER BY 
                              CASE 
                                WHEN m.Model_Type IN ('Multi-Class', 'Classification') THEN json_extract(m.Performance_Metrics, '$[0]')
                                WHEN m.Model_Type = 'Regression' THEN -json_extract(m.Performance_Metrics, '$[0]')
                              END DESC) AS rn
  FROM models m
)
SELECT Company_Name, Model_Name, Model_Version, Model_Type, metric
FROM cte
WHERE rn = 1;
Answer: The best performing models for each company are as follows: For Allstate, it's 'Model 20' version 2, which is a Classification model with a 