install the model you want to run, use the command line :  ollama run qwen3:8b  

example: qwen3:4b, qwen3:8b etc

In [1]:
semantic_model = 'https://raw.githubusercontent.com/djouallah/Fabric_Notebooks_Demo/refs/heads/main/SemanticModel/sm/semantic_model.txt' 

In [2]:
import requests
import json
import duckdb
import pathlib
import time

In [3]:
def get_ollama_response(user_message,LLM,  ollama_url='http://127.0.0.1:11434/api/chat'):
    try:
        github_response = requests.get(semantic_model)
        github_response.raise_for_status()
        system_prompt = github_response.text.strip()
    except requests.RequestException as e:
        return f"Error fetching system prompt from GitHub: {e}"

    # Prepare payload for Ollama
    payload = {
        'model': LLM,
        'messages': [
            {'role': 'system', 'content': system_prompt},
            {'role': 'user', 'content': user_message}
        ],
        'stream': False
    }

    # Send request to Ollama
    try:
        response = requests.post(ollama_url, json=payload)
        response.raise_for_status()
        return json.loads(response.text)['message']['content'].replace("<think>", "").replace("</think>", "")
    except requests.RequestException as e:
        return f"Error with Ollama API request: {e}"

In [4]:
def execute_sql_with_retry(query, LLM,max_attempts) :
    attempt = 1
    current_query = query.strip()

    while attempt <= max_attempts:
        try:
            # Execute the SQL query
            result = con.sql(current_query).show()
            return str(result)  # Return the result as a string
        except Exception as e:  # DuckDB uses generic Exception for syntax errors; adjust for your engine
            if "syntax" not in str(e).lower() and "parser" not in str(e).lower()  and "binder" not in str(e).lower():
                return f"Non-syntax error: {e}"  # Exit if error is not syntax-related

            print(f"Attempt {attempt}/{max_attempts} failed with syntax error: {e}")
            if attempt == max_attempts:
                return f"Max attempts reached. Last error: {e}"

            # Prepare message for Ollama to fix the query
            ollama_message = (
                f"The following SQL query has a syntax error: '{current_query}'.\n"
                f"Error message: {e}\n"
                f"Please provide the corrected SQL query. Return only the corrected query without explanation."
            )

            # Get corrected query from Ollama
            corrected_query = get_ollama_response( ollama_message, LLM)
            if corrected_query.startswith("Error"):
                return f"Failed to get corrected query from Ollama: {corrected_query}"

            print(f"Received corrected query")
            current_query = corrected_query.strip()
            attempt += 1


In [5]:
SF = 0.1  # Scale Factor
if SF <1 :
 schema = f"{str(SF).replace('.', '_')}"
else :
 schema = f'DS{SF:02d}'
import os
folder_path = '/some_location'
os.makedirs(folder_path, exist_ok=True)
db_path = folder_path +"/"+ schema +".duckdb"
if not pathlib.Path(db_path).exists():
    con = duckdb.connect(db_path)
    con.sql("SET memory_limit = '14GB' ")
    con.sql(f"CALL dsdgen(sf={SF})")
    con.close()
con = duckdb.connect()
con.sql(f""" attach or replace '{db_path}' as ds(read_only) ; use ds """)

In [6]:
def ask_question(questions,LLM):
    for i, x in enumerate(questions):
        print(f"Question {i+1}: {x}")
        start_time = time.time() 
        sql_query = get_ollama_response(x,LLM)
        result = execute_sql_with_retry(sql_query,LLM,max_attempts=3)
        print(result)
        print(sql_query)
        end_time = time.time()
        print(f"\nExecution Time: {end_time - start_time:.2f} seconds")
    return 'done'

In [7]:
questions = [
          'total sales',
          'return rate',
          "Identify the top 10 item categories with the highest total return amount from customers born in 'USA' who made returns in 2001.",
          'customer age group with the worst return rate?',
          'return rate per year',
          'any days with unusual return rate?, use fancy statistics',
          ]

In [8]:
%%time
ask_question(questions,'qwen3:14b')

Question 1: total sales
┌───────────────┐
│  total_sales  │
│ decimal(38,2) │
├───────────────┤
│  514300867.72 │
└───────────────┘

None




-- total sales  
SELECT  
    SUM(ss.ss_sales_price * ss.ss_quantity) AS total_sales  
FROM store_sales AS ss;

Execution Time: 46.78 seconds
Question 2: return rate
┌──────────────────┐
│   return_rate    │
│      double      │
├──────────────────┤
│ 5.19283603163974 │
└──────────────────┘

None




-- return rate
WITH store_sales_agg AS (
    SELECT
        SUM(ss.ss_sales_price * ss.ss_quantity) AS total_sales
    FROM store_sales AS ss
), store_returns_agg AS (
    SELECT
        SUM(sr.sr_return_amt) AS total_returns
    FROM store_returns AS sr
)
SELECT
    (COALESCE(sr.total_returns, 0) / NULLIF(COALESCE(ss.total_sales, 0), 0))  * 100 AS return_rate
FROM store_sales_agg ss, store_returns_agg sr

Execution Time: 42.45 seconds
Question 3: Identify the top 10 item categories with the highest total return amount from customers born in 'USA' wh

'done'

In [9]:
%%time
ask_question(questions,'qwen3:8b')

Question 1: total sales
┌───────────────┐
│  total_sales  │
│ decimal(38,2) │
├───────────────┤
│  514300867.72 │
└───────────────┘

None




-- total sales
SELECT
  total_sales
FROM (
  SELECT
    SUM(ss.ss_sales_price * ss.ss_quantity) AS total_sales
  FROM store_sales AS ss
) AS total_sales_query;

Execution Time: 33.53 seconds
Question 2: return rate
┌──────────────────┐
│   return_rate    │
│      double      │
├──────────────────┤
│ 5.19283603163974 │
└──────────────────┘

None




-- return rate
WITH store_sales_agg AS (
    SELECT
        SUM(ss.ss_sales_price * ss.ss_quantity) AS total_sales
    FROM store_sales AS ss
), store_returns_agg AS (
    SELECT
        SUM(sr.sr_return_amt) AS total_returns
    FROM store_returns AS sr
)
SELECT
    (COALESCE(sr.total_returns, 0) / NULLIF(COALESCE(ss.total_sales, 0), 0)) * 100 AS return_rate
FROM store_sales_agg ss, store_returns_agg sr;

Execution Time: 25.10 seconds
Question 3: Identify the top 10 item categories with the highest to

'done'

In [10]:
%%time
ask_question(questions,'qwen3:4b')

Question 1: total sales
┌───────────────┐
│  total_sales  │
│ decimal(38,2) │
├───────────────┤
│  514300867.72 │
└───────────────┘

None




SELECT
    SUM(ss.ss_sales_price * ss.ss_quantity) AS total_sales
FROM
    store_sales ss;

Execution Time: 16.91 seconds
Question 2: return rate
┌──────────────────┐
│   return_rate    │
│      double      │
├──────────────────┤
│ 5.19283603163974 │
└──────────────────┘

None




-- return rate
WITH store_sales_agg AS (
    SELECT
        SUM(ss.ss_sales_price * ss.ss_quantity) AS total_sales
    FROM store_sales AS ss
), store_returns_agg AS (
    SELECT
        SUM(sr.sr_return_amt) AS total_returns
    FROM store_returns AS sr
)
SELECT
    (COALESCE(sr.total_returns, 0) / NULLIF(COALESCE(ss.total_sales, 0), 0)) * 100 AS return_rate
FROM store_sales_agg ss, store_returns_agg sr;

Execution Time: 8.34 seconds
Question 3: Identify the top 10 item categories with the highest total return amount from customers born in 'USA' who made returns in 200

'done'