## Pre-requisites

In [21]:
# !pip install ollama
# !pip install sqlparse
# !pip install langchain
# !pip install langchain_community

## Set up ollama on your system (Step-1)
#### Linux system
* Command --> curl -fsSL https://ollama.com/install.sh | sh
* Website --> https://ollama.com/download

## Setup LLM (Step-2)
* Command --> ollama pull granite-code:8b-instruct-q4_0 
* Website --> https://ollama.com/library/granite-code:8b-instruct-q4_0
* We are using 8b 4bit quantized model . Model is present  in 3b , 8b, 20b,34b variants . Base and quantized versions are present. You can use depending on the system configuration.

### Import Libraries

In [1]:
from langchain_community.llms import Ollama
from langchain_community.utilities import SQLDatabase
import  sqlparse

## Access LLM running on your system . 
* base_url will be http://localhost:11434 in your case . I am running it on server.

In [2]:
llm = Ollama(base_url="http://172.27.222.2:11434",
    model="granite-code:8b-instruct-q4_0" , temperature =0.0
) 

## Setup  mysql and access the database

In [3]:
mysql_uri = 'mysql+mysqlconnector://root@localhost:3306/inventory_db'
db = SQLDatabase.from_uri(mysql_uri)

In [5]:
db.get_table_names()   # These are the tables present in database named 'test'

['backorder_data']

In [51]:
template = """
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
Generate a SQL query to answer this question: `{question}`
DDL statement:
CREATE TABLE backorder_data (
    Type VARCHAR(50),
    Start,
    End,
    Location Name VARCHAR(50),
    Buyer VARCHAR(50),
    Supplier Id INT,
    Supplier VARCHAR(100),
    Activity Code VARCHAR(20),
    Velocity Code CHAR(1),
    Product Code VARCHAR(20),
    Product Description VARCHAR(200),
    LT INT,
    [5/24/2024] INT,
    [6/1/2024] INT,
    [7/1/2024] INT,
    [8/1/2024] INT,
    [9/1/2024] INT,
    [10/1/2024] INT,
    [11/1/2024] INT,
    [12/1/2024] INT,
    [1/1/2025] INT,
    [2/1/2025] INT,
    [3/1/2025] INT,
    [4/1/2025] INT
);

Metadata:
Type: Category of data (e.g., "Backorder")
Start: Start date of the backorder period
End: End date of the backorder period
Location Name: Location of the backorder (e.g., "TN")
Buyer: Name of the buyer responsible for the backorder
Supplier Id: Unique identifier for the supplier
Supplier: Name of the supplier
Activity Code: Code indicating the activity status (e.g., "A -Active")
Velocity Code: Single character code related to backorder velocity
Product Code: Unique identifier for the product
Product Description: Brief description of the product
LT: Lead time (in days)
[Date columns]: Backorder quantities for each month from May 2024 to April 2025

Examples:
1. Total backorder quantity for May 2024:
   SELECT SUM([5/24/2024]) AS total_backorder FROM backorder_data WHERE Type = 'Backorder';

2. List all products supplied by a specific supplier:
   SELECT DISTINCT Product Code, Product Description FROM backorder_data WHERE Supplier = 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD';

3. Count of products with different sizes:
   SELECT 
     SUM(CASE WHEN Product Code LIKE '%L' AND Product_Code NOT LIKE '%XL' THEN 1 ELSE 0 END) AS Large,
     SUM(CASE WHEN Product Code LIKE '%XL' THEN 1 ELSE 0 END) AS Extra_Large,
     SUM(CASE WHEN Product Code LIKE '%2X' THEN 1 ELSE 0 END) AS Double_Extra_Large
   FROM backorder_data;

4. Total backorder for a specific product across all months:
   SELECT Product Code, 
     SUM([5/24/2024] + [6/1/2024] + [7/1/2024] + [8/1/2024] + [9/1/2024] + [10/1/2024] + 
         [11/1/2024] + [12/1/2024] + [1/1/2025] + [2/1/2025] + [3/1/2025] + [4/1/2025]) AS Total_Backorder
   FROM backorder_data
   WHERE Product Code = 'RWG3700XL'
   GROUP BY Product Code;

5. Products with lead time greater than average:
   SELECT Product Code, LT
   FROM backorder_data
   WHERE LT > (SELECT AVG(LT) FROM backorder_data);

Generate a SQL query that best answers the question: `{question}`
<|eot_id|><|start_header_id|>assistant<|end_header_id|>

```sql

"""
from langchain.prompts import PromptTemplate
prompt = PromptTemplate.from_template(template=template)

In [52]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

execute_query = QuerySQLDataBaseTool(db=db)
chain = prompt | llm | execute_query

#### Generate a sql response and query from natural language . These are manually checked as well.

## Simple queries

### Q-1 Ground Truth
SELECT DISTINCT `Product Code` FROM backorder_data;


In [56]:
import time
t1 = time.perf_counter()
question ="List all unique product codes in the dataset."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # Wrong 

Error: (mysql.connector.errors.ProgrammingError) 1054 (42S22): Unknown column 'Product' in 'field list'
[SQL: SELECT DISTINCT Product Code FROM backorder_data;]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 2.139501081997878


### Q-2 Ground Truth
SELECT * FROM backorder_data WHERE Buyer = 'Abby Smith';


In [57]:
import time
t1 = time.perf_counter()
question ="Find all records where the Buyer is 'Abby Smith'."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct

[('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRA-WG-U', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRJ-WG-2X', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRJ-WG-3X', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 1

### Q-3 Ground Truth
SELECT COUNT(*) FROM backorder_data WHERE `Type` = 'Backorder';


In [58]:
import time
t1 = time.perf_counter()
question ="Count the number of backorders with a 'Type' of 'Backorder'."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct

[(113,)]
time taken to run: 0.9022714610000548


### Q-4 Ground Truth
SELECT * FROM backorder_data LIMIT 5;


In [59]:
import time
t1 = time.perf_counter()
question ="Retrieve the first 5 records from the backorder_data table."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct

[('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRA-WG-U', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRJ-WG-2X', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRJ-WG-3X', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 1

### Q-5 Ground Truth
SELECT MIN(`Start`) FROM backorder_data;


In [60]:
import time
t1 = time.perf_counter()
question ="Find the earliest start date in the dataset."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct

[(datetime.date(2024, 5, 24),)]
time taken to run: 0.8107054000029166


## Q-6  Ground Truth
SELECT MAX(`End`) FROM backorder_data;


In [65]:
import time
t1 = time.perf_counter()
question ="Find the latest end date in the dataset."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct

[(datetime.date(2025, 5, 23),)]
time taken to run: 0.4813258840003982


## Q-7 Ground Truth
SELECT * FROM backorder_data WHERE `Product Code` LIKE 'FRJ-WG%';


In [83]:
import time
t1 = time.perf_counter()
question ="Retrieve all records where the Product Code contains 'FRJ-WG'"
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # wrong (sql  syntax error)

Error: (mysql.connector.errors.ProgrammingError) 1064 (42000): You have an error in your SQL syntax;check the manual that corresponds to your MySQL server version for the right syntax to use near 'Code LIKE '%FRJ-WG%'' at line 1
[SQL: SELECT * FROM backorder_data WHERE Product Code LIKE '%FRJ-WG%';]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 0.8278931540007761


### Q-8 Ground Truth
SELECT DISTINCT `Supplier Id` FROM backorder_data;


In [84]:
import time
t1 = time.perf_counter()
question ="List all unique 'Supplier Id' in the dataset."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # wrong

Error: (mysql.connector.errors.ProgrammingError) 1054 (42S22): Unknown column 'SupplierId' in 'field list'
[SQL: SELECT DISTINCT SupplierId FROM backorder_data;]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 0.7085596880024241


### Q-9 Ground Truth
SELECT COUNT(DISTINCT `Product Code`) FROM backorder_data;


In [85]:
import time
t1 = time.perf_counter()
question ="Count the number of unique 'Product Code' in the dataset."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # wrong (syntax error)

Error: (mysql.connector.errors.ProgrammingError) 1064 (42000): You have an error in your SQL syntax;check the manual that corresponds to your MySQL server version for the right syntax to use near 'Code) AS Unique_Product_Code_Count FROM backorder_data' at line 1
[SQL: SELECT COUNT(DISTINCT Product Code) AS Unique_Product_Code_Count FROM backorder_data;]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 1.035858969000401


### Q- 10 Ground Truth
SELECT COUNT(*) FROM backorder_data WHERE `Location Name` = 'TN';


In [86]:
import time
t1 = time.perf_counter()
question ="Find the total number of backorders for 'TN' location."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # wrong (syntax error)

Error: (mysql.connector.errors.ProgrammingError) 1064 (42000): You have an error in your SQL syntax;check the manual that corresponds to your MySQL server version for the right syntax to use near '[5/24/2024] + [6/1/2024] + [7/1/2024] + [8/1/2024] + [9/1/2024] + [10/1/2024] + ' at line 1
[SQL: SELECT SUM([5/24/2024] + [6/1/2024] + [7/1/2024] + [8/1/2024] + [9/1/2024] + [10/1/2024] + [11/1/2024] + [12/1/2024] + [1/1/2025] + [2/1/2025] + [3/1/2025] + [4/1/2025]) AS Total_Backorder
FROM backorder_data
WHERE Location Name = 'TN';
```]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 2.325281785997504


## Medium Queries

### Q-11 Ground Truth
SELECT SUM(`LT`) FROM backorder_data;



In [87]:
import time
t1 = time.perf_counter()
question ="Find the total LT for all backorders."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct 

[(Decimal('15160'),)]
time taken to run: 0.8313349109994306


### Q-12 Ground Truth
SELECT * FROM backorder_data WHERE `LT` > 120;


In [89]:
import time
t1 = time.perf_counter()
question ="Retrieve records where LT is greater than 120."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct 

[('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', 'A', None, None, None, None, None, None, 'C2354IM', None, None, None, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', 'A', None, None, None, None, None, None, 'C2354L', None, None, None, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', 'A', None, None, None, None, None, None, 'C2354M', None, None, None, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', 'A', None, None, None, None, None, None, 'C23

### Q-13 Ground Truth

SELECT AVG(`LT`) FROM backorder_data;


In [90]:
import time
t1 = time.perf_counter()
question ="Find the average LT for all backorders."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # correct 

[(Decimal('134.1593'),)]
time taken to run: 0.8718920510000316


### Q-14 Ground Truth
SELECT * FROM backorder_data WHERE `Group1` IS NOT NULL;


In [91]:
import time
t1 = time.perf_counter()
question ="Retrieve all records where 'Group1' is not null."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       # wrong  , it has mentioned table_name instead of backorder_data

Error: (mysql.connector.errors.ProgrammingError) 1146 (42S02): Table 'inventory_db.table_name' doesn't exist
[SQL: SELECT * FROM table_name WHERE Group1 IS NOT NULL;]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 0.7451316379992932


### Q-15 Ground Truth
SELECT SUM(`LT`) FROM backorder_data WHERE `Supplier` = 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD';


In [92]:
import time
t1 = time.perf_counter()
question ="Find the total LT for 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD' supplier."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       #correct

[(Decimal('136440'),)]
time taken to run: 1.0422449969992158


## Complex questions

### Q-16 Ground Truth
SELECT `Velocity Code`, AVG(`LT`) FROM backorder_data GROUP BY `Velocity Code`;


In [93]:
import time
t1 = time.perf_counter()
question ="Find the average LT for each Velocity Code."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       #wrong , column name identified aas wrong by the model

Error: (mysql.connector.errors.ProgrammingError) 1054 (42S22): Unknown column 'Velocity_Code' in 'field list'
[SQL: SELECT Velocity_Code, AVG(LT) AS Average_LT
FROM backorder_data
GROUP BY Velocity_Code;
```]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 0.9374624109987053


### Q-17 Ground truth
SELECT * FROM backorder_data WHERE `LT` > (SELECT AVG(`LT`) FROM backorder_data);


In [99]:
import time
t1 = time.perf_counter()
question ="List all records where LT is greater than the average LT."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       #wrong , column name identified as wrong by the model

Error: (mysql.connector.errors.ProgrammingError) 1054 (42S22): Unknown column 'Product' in 'field list'
[SQL: SELECT Product Code, LT
FROM backorder_data
WHERE LT > (SELECT AVG(LT) FROM backorder_data);]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 0.5695702179982618


### Q-18 Ground Truth
SELECT `Supplier`, COUNT(*) FROM backorder_data GROUP BY `Supplier`;


In [107]:
import time
t1 = time.perf_counter()
question ="Find the count of backorders for each Supplier."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       #wrong , 

Exception during reset or similar
Traceback (most recent call last):
  File "/home/erginous/anaconda3/envs/RAG/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 3244, in begin
    with conn.begin():
  File "/home/erginous/anaconda3/envs/RAG/lib/python3.10/site-packages/sqlalchemy/engine/util.py", line 146, in __exit__
    with util.safe_reraise():
  File "/home/erginous/anaconda3/envs/RAG/lib/python3.10/site-packages/sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "/home/erginous/anaconda3/envs/RAG/lib/python3.10/site-packages/sqlalchemy/engine/util.py", line 144, in __exit__
    self.commit()
  File "/home/erginous/anaconda3/envs/RAG/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2632, in commit
    self._do_commit()
  File "/home/erginous/anaconda3/envs/RAG/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2737, in _do_commit
    self._connection_commit_impl()
  File "/home/erginous/anacon

MySQLInterfaceError: Commands out of sync; you can't run this command now

### Q-19 Ground truth

SELECT * FROM backorder_data WHERE `End` LIKE '%2025';


In [108]:
import time
t1 = time.perf_counter()
question ="List all records where the end date is in the year 2025."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       #correct, 

[('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRA-WG-U', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRJ-WG-2X', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 144742, 'JIWANRAM SHEODUTTRAI INDUSTRIES PVT. LTD', 'A -Active', None, None, None, None, None, None, None, 'FRJ-WG-3X', None, None, None, 120, 0, None, None, None, None, None, None, None, None, None, None, None), ('Backorder', datetime.date(2024, 5, 24), datetime.date(2025, 5, 23), 'TN', 'Abby Smith', 1

### Q-20 Ground Truth
SELECT `Activity Code`, COUNT(*) FROM backorder_data GROUP BY `Activity Code`;


In [109]:
import time
t1 = time.perf_counter()
question ="Count the number of backorders for each 'Activity Code'."
generated_sql = chain.invoke({'question':question})
response = sqlparse.format(generated_sql)
print(response)

t2 = time.perf_counter()
print('time taken to run:',t2-t1)       #wrong ,  column name identified wrong

Error: (mysql.connector.errors.ProgrammingError) 1054 (42S22): Unknown column 'Activity_Code' in 'field list'
[SQL: SELECT Activity_Code, COUNT(*) AS Backorder_Count
FROM backorder_data
WHERE Type = 'Backorder'
GROUP BY Activity_Code;
```]
(Background on this error at: https://sqlalche.me/e/20/f405)
time taken to run: 1.003531138998369
