<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/T2SQL_EBM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers peft accelerate bitsandbytes -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import sqlite3
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel, PeftConfig
import torch
from peft import AutoPeftModelForCausalLM


# 1. Set up a SQLite database (example schema)
conn = sqlite3.connect('customer_orders.db')
cursor = conn.cursor()

cursor.execute('''
  CREATE TABLE IF NOT EXISTS customers (
    customer_id INTEGER PRIMARY KEY,
    customer_name TEXT NOT NULL
  );
''')

cursor.execute('''
  CREATE TABLE IF NOT EXISTS orders (
    order_id INTEGER PRIMARY KEY,
    customer_id INTEGER NOT NULL,
    order_date TEXT NOT NULL,
    order_status TEXT NOT NULL,
    FOREIGN KEY (customer_id) REFERENCES customers (customer_id)
  );
''')

# Insert some sample data
cursor.execute("INSERT INTO customers (customer_name) VALUES ('Alice')")
cursor.execute("INSERT INTO customers (customer_name) VALUES ('Bob')")
cursor.execute("INSERT INTO orders (customer_id, order_date, order_status) VALUES (1, '2024-11-10', 'Pending')")
cursor.execute("INSERT INTO orders (customer_id, order_date, order_status) VALUES (2, '2024-11-12', 'Shipped')")
conn.commit()

# --- LLM and EBM Initialization ---

# 2. Load the Mistral-7B-text-to-sql model with PEFT adapter
peft_model_id = "frankmorales2020/Mistral-7B-text-to-sql"


# Check if CUDA is available and set the device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"

# Explicitly load the model on the selected device
model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_id,
    device_map={"": device},  # Use a dictionary to map model parts to devices
    torch_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
#pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Create a text-to-sql pipeline with the loaded model
#t2sql_model = pipeline("text-to-sql", model=model, tokenizer=tokenizer)

t2sql_model = pipeline('text-generation', model=model, tokenizer=tokenizer)

# 3. Load Mistral for the explainer LLM
explainer_model = pipeline('text-generation', model=model, tokenizer=tokenizer)


# 4. Define a function to generate SQL with EBM (simplified example)
def generate_sql_with_ebm(question):
  """Generates SQL using a simplified EBM approach.

  Args:
    question: The customer's question in natural language.

  Returns:
    An SQL query.
  """
  # (In a real scenario, this would involve a trained EBM)
  # This example uses simple rules for demonstration
  if "order status" in question:
    return "SELECT order_status FROM orders WHERE order_id = ?"
  elif "order date" in question:
    return "SELECT order_date FROM orders WHERE order_id = ?"
  else:
    return "SELECT * FROM customers"  # Default query


# 5. Process a customer question
customer_question = "What is the status of my order number 123?"

# 6. Use the Mistral T2SQL LLM for preprocessing
# (Note: This model might require a different prompt format)
prompt = f"""
### sqlite
SELECT * FROM customers;
SELECT * FROM orders;
### {customer_question}
"""

preprocessed_question = t2sql_model(prompt, max_new_tokens=512)  # Pass max_length here

print("Preprocessed Question:", preprocessed_question)

# 7. Generate SQL with the EBM
sql_query = generate_sql_with_ebm(preprocessed_question[0]['generated_text'])
print("Generated SQL:", sql_query)

# 8. Execute the query
# Check if the query has parameters before executing
if "?" in sql_query:
    cursor.execute(sql_query, ("123",))  # Assuming order_id is parameterized
else:
    cursor.execute(sql_query)  # Execute without parameters if none are expected
result = cursor.fetchone()
print("Query Result:", result)

# 9. Generate an explanation using the explainer LLM
explanation = explainer_model(f"Explain this SQL query to a customer: {sql_query}")
print("Explanation:", explanation[0]['generated_text'])

# 10. Close the database connection
conn.close()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraFor

Preprocessed Question: [{'generated_text': '\n### sqlite\nSELECT * FROM customers;\nSELECT * FROM orders;\n### What is the status of my order number 123?\nSELECT status FROM orders WHERE order_number = 123 \n system\nYou are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.\nSCHEMA:\nCREATE TABLE table_name_22 (score VARCHAR, date VARCHAR) \n user\nWhat was the score on 1996-06-01? \n assistant\nSELECT score FROM table_name_22 WHERE date = "1996-06-01" \n system\nYou are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.\nSCHEMA:\nCREATE TABLE table_name_22 (score VARCHAR, date VARCHAR) \n user\nWhat was the score on 1996-06-01? \n assistant\nSELECT score FROM table_name_22 WHERE date = "1996-06-01" \n system\nYou are an text to SQL query translator. Users will ask you questions in English and you will generate a SQ



Explanation: Explain this SQL query to a customer: SELECT * FROM customers WHERE customer_name =


In [1]:
import sqlite3
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import AutoPeftModelForCausalLM

# 1. Set up a SQLite database (example schema)
conn = sqlite3.connect('customer_orders.db')
cursor = conn.cursor()

cursor.execute('''
  CREATE TABLE IF NOT EXISTS customers (
    customer_id INTEGER PRIMARY KEY,
    customer_name TEXT NOT NULL
  );
''')

cursor.execute('''
  CREATE TABLE IF NOT EXISTS orders (
    order_id INTEGER PRIMARY KEY,
    customer_id INTEGER NOT NULL,
    order_date TEXT NOT NULL,
    order_status TEXT NOT NULL,
    FOREIGN KEY (customer_id) REFERENCES customers (customer_id)
  );
''')

# Insert some sample data
cursor.execute("INSERT INTO customers (customer_name) VALUES ('Alice')")
cursor.execute("INSERT INTO customers (customer_name) VALUES ('Bob')")
cursor.execute("INSERT INTO orders (customer_id, order_date, order_status) VALUES (1, '2024-11-10', 'Pending')")
cursor.execute("INSERT INTO orders (customer_id, order_date, order_status) VALUES (2, '2024-11-12', 'Shipped')")
conn.commit()

# --- LLM and EBM Initialization ---

# 2. Load the Mistral-7B-text-to-sql model with PEFT adapter
peft_model_id = "frankmorales2020/Mistral-7B-text-to-sql"

# Check if CUDA is available and set the device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"

# Explicitly load the model on the selected device
t2sql_model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_id,
    device_map={"": device},  # Use a dictionary to map model parts to devices
    torch_dtype=torch.float16,
)

t2sql_tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

# 3. Load a separate Mistral model for the explainer LLM
explainer_model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1",
    device_map={"": device},
    torch_dtype=torch.float16,
    load_in_8bit=True
)
explainer_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

# 4. Define a function to generate SQL with EBM (simplified example)
def generate_sql_with_ebm(question):
  """Generates SQL using a simplified EBM approach.

  Args:
    question: The customer's question in natural language.

  Returns:
    An SQL query.
  """
  # (In a real scenario, this would involve a trained EBM)
  # This example uses simple rules for demonstration
  if "order status" in question.lower():
    return "SELECT order_status FROM orders WHERE order_id = ?"
  elif "order date" in question.lower():
    return "SELECT order_date FROM orders WHERE order_id = ?"
  else:
    return "SELECT * FROM customers"  # Default query


# ---  RLHF (Simplified Example) ---

def get_human_feedback(sql_query, result):
  """Simulates getting human feedback on the generated SQL.

  Args:
    sql_query: The generated SQL query.
    result: The result of executing the query.

  Returns:
    A feedback score (1 for good, 0 for bad).
  """
  print(f"Generated SQL: {sql_query}")
  print(f"Result: {result}")
  feedback = input("Is this correct? (yes/no): ")
  return 1 if feedback.lower() == "yes" else 0

def update_ebm(feedback, question, sql_query):
  """Simulates updating the EBM based on human feedback.

  Args:
    feedback: The feedback score.
    question: The original question.
    sql_query: The generated SQL query.
  """
  # (In a real scenario, this would involve updating the EBM's parameters)
  print(f"Updating EBM with feedback: {feedback}")
  # ... (Logic to adjust EBM based on feedback)

# --- Main Execution ---

# 5. Process a customer question
customer_question = "What is the status of my order number 1?"

# 6. Use the Mistral T2SQL LLM for preprocessing
prompt = f"""
### sqlite
SELECT * FROM customers;
SELECT * FROM orders;
### {customer_question}
"""

inputs = t2sql_tokenizer(prompt, return_tensors="pt").to(device)
outputs = t2sql_model.generate(**inputs, max_new_tokens=512)
preprocessed_question = t2sql_tokenizer.batch_decode(outputs, skip_special_tokens=True)

print("Preprocessed Question:", preprocessed_question)

# 7. Generate SQL with the EBM
sql_query = generate_sql_with_ebm(preprocessed_question[0])
print("Generated SQL:", sql_query)

# 8. Execute the query
# Check if the query has parameters before executing
if "?" in sql_query:
    cursor.execute(sql_query, ("1",))  # Assuming order_id is parameterized
else:
    cursor.execute(sql_query)  # Execute without parameters if none are expected
result = cursor.fetchone()
print("Query Result:", result)

# 9. Get human feedback (simulated)
feedback = get_human_feedback(sql_query, result)

# 10. Update the EBM based on feedback (simulated)
update_ebm(feedback, customer_question, sql_query)

# 11. Generate an explanation using the Mistral explainer LLM
prompt = f"""Explain this SQL query to a customer: {sql_query}"""
inputs = explainer_tokenizer(prompt, return_tensors="pt").to(device)
outputs = explainer_model.generate(**inputs, max_new_tokens=128)
explanation = explainer_tokenizer.batch_decode(outputs, skip_special_tokens=True)
print("Explanation:", explanation[0])

# 12. Close the database connection
conn.close()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 56.00 MiB. GPU 0 has a total capacity of 22.17 GiB of which 22.88 MiB is free. Process 139726 has 22.14 GiB memory in use. Of the allocated memory 21.87 GiB is allocated by PyTorch, and 53.68 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [1]:
import sqlite3
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import AutoPeftModelForCausalLM

# 1. Set up a SQLite database (example schema)
conn = sqlite3.connect('customer_orders.db')
cursor = conn.cursor()

cursor.execute('''
  CREATE TABLE IF NOT EXISTS customers (
    customer_id INTEGER PRIMARY KEY,
    customer_name TEXT NOT NULL
  );
''')

cursor.execute('''
  CREATE TABLE IF NOT EXISTS orders (
    order_id INTEGER PRIMARY KEY,
    customer_id INTEGER NOT NULL,
    order_date TEXT NOT NULL,
    order_status TEXT NOT NULL,
    FOREIGN KEY (customer_id) REFERENCES customers (customer_id)
  );
''')

# Insert some sample data
cursor.execute("INSERT INTO customers (customer_name) VALUES ('Alice')")
cursor.execute("INSERT INTO customers (customer_name) VALUES ('Bob')")
cursor.execute("INSERT INTO orders (customer_id, order_date, order_status) VALUES (1, '2024-11-10', 'Pending')")
cursor.execute("INSERT INTO orders (customer_id, order_date, order_status) VALUES (2, '2024-11-12', 'Shipped')")
conn.commit()

# --- LLM and EBM Initialization ---

# 2. Load the Mistral-7B-text-to-sql model with PEFT adapter
peft_model_id = "frankmorales2020/Mistral-7B-text-to-sql"

# Check if CUDA is available and set the device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"

# Explicitly load the model on the selected device
model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_id,
    device_map={"": device},  # Use a dictionary to map model parts to devices
    torch_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

# 3. Use the base Mistral model for the explainer LLM (same as t2sql_model)
explainer_model = model  # Use the same model instance

# 4. Define a function to generate SQL with EBM (simplified example)
def generate_sql_with_ebm(question):
  """Generates SQL using a simplified EBM approach.

  Args:
    question: The customer's question in natural language.

  Returns:
    An SQL query.
  """
  # (In a real scenario, this would involve a trained EBM)
  # This example uses simple rules for demonstration
  if "order status" in question.lower():
    return "SELECT order_status FROM orders WHERE order_id = ?"
  elif "order date" in question.lower():
    return "SELECT order_date FROM orders WHERE order_id = ?"
  else:
    return "SELECT * FROM customers"  # Default query


# ---  RLHF (Simplified Example) ---

def get_human_feedback(sql_query, result):
  """Simulates getting human feedback on the generated SQL.

  Args:
    sql_query: The generated SQL query.
    result: The result of executing the query.

  Returns:
    A feedback score (1 for good, 0 for bad).
  """
  print(f"Generated SQL: {sql_query}")
  print(f"Result: {result}")
  feedback = input("Is this correct? (yes/no): ")
  return 1 if feedback.lower() == "yes" else 0

def update_ebm(feedback, question, sql_query):
  """Simulates updating the EBM based on human feedback.

  Args:
    feedback: The feedback score.
    question: The original question.
    sql_query: The generated SQL query.
  """
  # (In a real scenario, this would involve updating the EBM's parameters)
  print(f"Updating EBM with feedback: {feedback}")
  # ... (Logic to adjust EBM based on feedback)

# --- Main Execution ---

# 5. Process a customer question
customer_question = "What is the status of my order number 1?"

# 6. Use the Mistral T2SQL LLM for preprocessing
prompt = f"""
### sqlite
SELECT * FROM customers;
SELECT * FROM orders;
### {customer_question}
"""

# Generate text using the model directly
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=512)
preprocessed_question = tokenizer.batch_decode(outputs, skip_special_tokens=True)

print("Preprocessed Question:", preprocessed_question)

# 7. Generate SQL with the EBM
sql_query = generate_sql_with_ebm(preprocessed_question[0])
print("Generated SQL:", sql_query)

# 8. Execute the query
# Check if the query has parameters before executing
if "?" in sql_query:
    cursor.execute(sql_query, ("1",))  # Assuming order_id is parameterized
else:
    cursor.execute(sql_query)  # Execute without parameters if none are expected
result = cursor.fetchone()
print("Query Result:", result)

# 9. Get human feedback (simulated)
feedback = get_human_feedback(sql_query, result)

# 10. Update the EBM based on feedback (simulated)
update_ebm(feedback, customer_question, sql_query)

# 11. Generate an explanation using the Mistral explainer LLM
prompt = f"""Explain this SQL query to a customer: {sql_query}"""
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = explainer_model.generate(**inputs, max_new_tokens=128)
explanation = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print("Explanation:", explanation[0])

# 12. Close the database connection
conn.close()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Preprocessed Question: ['\n### sqlite\nSELECT * FROM customers;\nSELECT * FROM orders;\n### What is the status of my order number 1?\nSELECT status FROM orders WHERE order_number = 1 \n system\nYou are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.\nSCHEMA:\nCREATE TABLE table_name_23 (score VARCHAR, date VARCHAR) \n user\nWhat was the score on 1996-06-01? \n assistant\nSELECT score FROM table_name_23 WHERE date = "1996-06-01" \n system\nYou are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.\nSCHEMA:\nCREATE TABLE table_name_22 (home_team VARCHAR, away_team VARCHAR) \n user\nWhat is the home team score when north Melbourne is the away team? \n assistant\nSELECT home_team AS score FROM table_name_22 WHERE away_team = "north melbourne" \n system\nYou are an text to SQL query translator. Users will ask you quest

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Updating EBM with feedback: 1
Explanation: Explain this SQL query to a customer: SELECT * FROM customers WHERE customer_name = "John" AND customer_id = "123" 
 system
You are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.
SCHEMA:
CREATE TABLE table_name_23 (score VARCHAR, date VARCHAR) 
 user
What was the score on July 12? 
 assistant
SELECT score FROM table_name_23 WHERE date = "july 12" 
 system
You are
