In [2]:
test_semantic_model_url = 'https://raw.githubusercontent.com/djouallah/Fabric_Notebooks_Demo/refs/heads/main/SemanticModel/sm/semantic_model.txt' 
SF = 1
output_dir="/tmp/llm"

In [1]:
import requests
import json
import duckdb
import pathlib
import time
import pandas as pd
import re
from   openai import AzureOpenAI, OpenAIError
import datetime
import os
import traceback
import google.generativeai as genai


# Generate Data

In [None]:
if SF <1 :
 schema = f"{str(SF).replace('.', '_')}"
else :
 schema = f'DS{SF:02d}'
os.makedirs(output_dir, exist_ok=True)
db_path = output_dir +"/"+ schema +".duckdb"
if not pathlib.Path(db_path).exists():
    con = duckdb.connect(db_path)
    con.sql("SET memory_limit = '14GB' ")
    con.sql(f"CALL dsdgen(sf={SF})")
    con.close()
con = duckdb.connect()
con.sql(f""" attach '{db_path}' as ds(read_only) ; use ds """)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Run Tests

In [None]:
def get_ai_response(user_message, LLM, provider, api_key=None, azure_endpoint=None, azure_api_version=None):
    system_prompt = ""
    try:
        github_response = requests.get(test_semantic_model_url)
        github_response.raise_for_status()
        system_prompt = github_response.text.strip()
    except requests.RequestException as e:
        print(f"Error fetching system prompt from GitHub ({test_semantic_model_url}): {e}")
        return f"Error fetching system prompt: {e}"

    generated_text = None
    data = None

    if provider == "ollama":
        url = 'http://127.0.0.1:11434/api/chat'
        headers = {'Content-Type': 'application/json'}
        payload = {
            'model': LLM,
            'messages': [
                {'role': 'system', 'content': system_prompt},
                {'role': 'user', 'content': user_message}
            ],
            'stream': False
        }
        try:
            response = requests.post(url, headers=headers, json=payload)
            response.raise_for_status()
            data = response.json()
            generated_text = data.get('message', {}).get('content', '')
            if generated_text:
                generated_text = generated_text.replace("<think>", "").replace("</think>", "")
        except requests.RequestException as e:
            return f"Error with Ollama API request: {e}"
        except (KeyError, IndexError, AttributeError) as e:
            return f"Unexpected response format from Ollama: {e}\nFull response data: {data}"

    elif provider == "azure_foundry":
        if not azure_endpoint or not azure_api_version or not api_key:
            return "Azure provider requires 'azure_endpoint', 'azure_api_version', and 'api_key' parameters."

        try:
            client = AzureOpenAI(
                api_version=azure_api_version,
                azure_endpoint=azure_endpoint,
                api_key=api_key,
            )

            response = client.chat.completions.create(
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_message}
                ],
                model=LLM,
            )

            generated_text = response.choices[0].message.content

        except Exception as e:
            return f"An error occurred with Azure OpenAI: {e}"


    elif provider == "gemini":
        try:
            model = genai.GenerativeModel(LLM)

            final_prompt = f"{system_prompt}\n\nUser's request: {user_message}"

            response = model.generate_content(final_prompt)

            generated_text = ""
            if response.parts:
                generated_text = "".join(part.text for part in response.parts).strip()
            elif hasattr(response, 'text') and response.text:
                generated_text = response.text.strip()
            else:
                if response.prompt_feedback:
                    block_reason = getattr(response.prompt_feedback, 'block_reason', 'Unknown reason')
                    safety_ratings = getattr(response.prompt_feedback, 'safety_ratings', 'N/A')
                    print(f"Gemini Prompt feedback: Blocked - {block_reason}, Safety Ratings: {safety_ratings}")
                    return f"Error: Gemini model did not generate text. Blocked due to: {block_reason}. Safety Ratings: {safety_ratings}"
                print("Gemini API returned no generated text and no prompt feedback.")
                return "Error: Gemini API returned no generated text and no prompt feedback."

        except Exception as e:
            print(f"An error occurred during Gemini API call or processing: {e}\n{traceback.format_exc()}")
            if "configure" in str(e).lower() or "api key" in str(e).lower():
                 return f"Gemini API configuration error: {e}. Ensure genai.configure() is called with a valid API key."
            return f"Error with Gemini API request or processing: {e}"


    else:
        return f"Unsupported provider for this test setup: {provider}. Supported providers are 'ollama', 'azure_foundry', and 'gemini'."

    if not isinstance(generated_text, str):
         print(f"Generated text is not a string (type: {type(generated_text)}). Cannot clean.")
         if generated_text is None:
              return "Could not retrieve generated text from provider response."
         else:
              return f"Received unexpected output type from {provider}: {type(generated_text)}. Value: {generated_text}"


    cleaned_text = re.sub(r'```(sql|duckdb)?\s*([\s\S]*?)\s*```', r'\2', generated_text, flags=re.IGNORECASE).strip()

    if "```" in cleaned_text or "SELECT " in cleaned_text.upper() or "WITH " in cleaned_text.upper():
         cleaned_text = cleaned_text.replace("```sql", "").replace("```duckdb", "").replace("```", "").strip()


    return cleaned_text

In [None]:
def execute_sql_with_retry(query, test_model,provider, api_key, azure_endpoint, azure_api_version,max_attempts):
    attempt = 1
    current_query = query.strip()

    while attempt <= max_attempts:
        try:
            # Execute the SQL query
            result = con.sql(current_query).df()
            # Return the DataFrame result and the number of attempts
            return result, attempt
        except Exception as e:
            # Catch any exception during execution
            error_message = str(e).lower()

            # Check if the error is likely a syntax, parser, or binder error
            if "syntax" not in error_message and "parser" not in error_message and "binder" not in error_message:
                # If it's a non-syntax error, return the error message and attempts
                return f"Non-syntax error: {e}", attempt

            # If it's a syntax-like error, print attempt info
            print(current_query)
            print(f"Attempt {attempt}/{max_attempts} failed with syntax error: {e}")

            # If max attempts reached, return the final error message and attempts
            if attempt == max_attempts:
                return f"Max attempts reached. Last error: {e}", attempt

            # Prepare message for Gemini to fix the query
            message = (
                f"The following SQL query has a syntax error: '{current_query}'.\n"
                f"Error message: {e}\n"
                f"Please provide the corrected SQL query. Return only the corrected query without explanation."
            )

            # Get corrected query from Gemini
            corrected_query = get_ai_response(message, test_model,provider, api_key, azure_endpoint, azure_api_version)

            # If Gemini returns an error, return that error message and attempts
            if corrected_query.startswith("Error"):
                return f"Failed to get corrected query : {corrected_query}", attempt

            #print(f"Received corrected query")
            # Update current_query with the corrected one and increment attempt
            current_query = corrected_query.strip()
            attempt += 1
    return "Unexpected error or loop termination", attempt



In [None]:
def ask_question(questions, test_model,provider,api_key=None, azure_endpoint=None, azure_api_version=None):
    results_data = [] 
    for i, x in enumerate(questions):
        print(f"Question {i+1}: {x}") # Keep or remove print as needed
        start_time = time.time()
        sql_query_or_error = get_ai_response(x, test_model, provider, api_key, azure_endpoint, azure_api_version)
        print(sql_query_or_error) # Keep or remove print as needed
        query_result_data_json = [] # Initialize as an empty list for JSON result
        attempts_count = None
        error_details = None # Initialize error_details
        if sql_query_or_error is None or sql_query_or_error.startswith("Error"):
            # If get_ai_response returned an error or None, store the error string
            error_message = sql_query_or_error if sql_query_or_error is not None else "AI response was None"
            # print(f"Failed to get query from AI: {error_message}") # Keep or remove print
            # print("Execution: SKIPPED (AI error)") # Keep or remove print
            error_details = f"AI Error: {error_message}" # Store error details
            # query_result_data_json remains empty []
            result_row_count = 0 # Result count is 0 on error
        else:
            # Attempt to execute the SQL query
            # Ensure execute_sql_with_retry returns a DataFrame on success or a string error
            # Pass the test_model to execute_sql_with_retry as it might need it
            result_from_execution, attempts_count = execute_sql_with_retry(sql_query_or_error, test_model,provider,api_key, azure_endpoint, azure_api_version, max_attempts=3)

            display(result_from_execution)
            is_successful = isinstance(result_from_execution, pd.DataFrame)

            if is_successful:
                print("Execution: SUCCESS") # Keep or remove print
                query_result_data_json = result_from_execution.to_dict('records')
                error_details = None # No error details on success
                result_row_count = len(result_from_execution) # Calculate row count
            else:
                print("Execution: FAILED") # Keep or remove print
                #query_result_data_json remains empty []
                error_details = f"Execution Error: {result_from_execution}" # Store error details
                result_row_count = 0 # Result count is 0 on failure


        end_time = time.time()
        duration = round(end_time - start_time, 2)
        print(f"\nExecution Time: {duration:.2f} seconds") # Keep or remove print
        print(f" ############################### ") # Keep or remove print
        results_data.append({
            "model" : test_model,
            "timestamp": timestamp,
            "nbr": i + 1,
            "question": x,
            "duration_s": duration, # Changed key name slightly for JSON convention
            "sql_query": sql_query_or_error, # Store the generated query string (could be AI error)
            "attempts": attempts_count,
            "result": query_result_data_json, # This is now a list of dictionaries
            "result_count": result_row_count, # Use the calculated row count
            "error_details": error_details # Store error details
        })

    os.makedirs(output_dir, exist_ok=True)
    sanitized_model = re.sub(r'[\\/*?:"<>|]', '_', test_model)
    output_filename = f"{timestamp}_{sanitized_model}.json"
    output_path = os.path.join(output_dir, output_filename)
    try:
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(results_data, f, indent=4)
        return f"Successfully processed {len(questions)} questions. Results saved to {output_path}"
    except IOError as e:
        return f"Error saving results to {output_path}: {e}"
    except Exception as e:
        return f"An unexpected error occurred during file saving: {e}"

In [None]:
questions = [
    # Easy Questions:
    "What is the overall total sales revenue?",
    "What is the total number of items sold across all transactions?",
    "What is the total monetary value of all returned items?",
    "List the names of all stores, order by store name.",
    "What are the different item categories available? Order alphabetically by category name.",

    # Medium Questions:
    "Show total sales revenue for each year, ordered chronologically by year.",
    "Which store generated the most total sales revenue? (To ensure a deterministic result if there's a tie, you might order by revenue descending and then store name alphabetically).",
    "What is the total quantity of items sold, broken down by item brand? Order by quantity sold descending, and then by brand name alphabetically for ties.",
    "Compare total sales from preferred customers versus non-preferred customers, ordered by total sales",
    "What is the total return amount for each city where stores are located, ordered alphabetically by city name.",

    # Hard Questions:
    "What is the net sales for each store name, order by net sales.",
    "Calculate the return rate for each item category, ordered alphabetically by item category name.",
    "What is the monthly trend of net sales during the year 2001, ordere by net sales.",
    "Which customer birth country exhibits the highest average return rate Order by average return rate descending ?",
    "List all item product names that have a return rate greater than 5% and their total sales, ordered by item product name alphabetically.",

    # Very Hard Questions:
    "For each store, what was the percentage change in net sales between two consecutive recent years (e.g., 2001 and 2002), ordered alphabetically by store name.",
    "What is the return rate for items sold on weekends versus weekdays, broken down by customer age groups (e.g., under 30, 30-45, over 45), ordered by age group.",
    "Which item brand has shown the largest decrease in its return rate when comparing the average rate of 2001 to 2002, specifically for stores located in the 'TN' state? (Order by the decrease in return rate descending, and then by brand name alphabetically for ties).",
    "For each item class, compare the average net sales value per sales transaction between preferred and non-preferred customers. Order by average net sales descending, and then by item class name alphabetically for ties.",
    "show all stores net sales , average net sales and percentage of net sales compared to average net sales for each store, ordered by percentage of net sales descending.",
]

In [None]:
%%time
ask_question(questions,"gpt-4o","azure_foundry",os.getenv("LLM_API_KEY"), os.getenv("llm_endpoint"),"2025-01-01-preview" )

In [None]:
%%time
genai.configure(api_key= os.getenv("G_API_KEY"))
ask_question(questions,"gemini-2.5-flash-preview-04-17","gemini")

In [None]:
%%time
ask_question(questions,'qwen3:4b',"ollama")

# Check Results

In [None]:
duckdb.sql(f""" from '{output_dir}/*.json' order by nbr,model """).to_view("results")
try:
 duckdb.sql(f""" load excel ;copy results to '{output_dir}/test.xlsx' (format 'xlsx', header 'true', overwrite)  """)
except Exception as e:
    print(f"Error exporting to Excel: {e}")
    print("close the excel file or tey again.")

┌──────────────────────┬─────────────────┬───────┬──────────────────────┬────────────┬──────────────────────┬──────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [None]:
# Queries with errors
duckdb.sql(""" from results where error_details is not null""")

┌──────────┬─────────────────┬───────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [None]:
# queries where the number of columns resturened by each model is not the same or the number of rows is not the same
duckdb.sql(f""" select nbr from results group by all having count(distinct(cardinality(result[1]))) > 1 or count(distinct(result_count )) > 1 order by nbr """).show(max_width=250)

┌───────┐
│  nbr  │
│ int64 │
├───────┤
│     5 │
│     7 │
│     9 │
│    12 │
│    13 │
│    14 │
│    15 │
│    16 │
│    17 │
│    18 │
│    19 │
│    20 │
└───────┘



In [None]:
# check number of rows returned by each model
duckdb.sql(f""" pivot( select nbr,question,model,result_count from results) on model using min(result_count ) order by nbr""").show(max_width=120)

┌───────┬─────────────────────────────────────────────────────────┬────────────────────────────────┬────────┬──────────┐
│  nbr  │                        question                         │ gemini-2.5-flash-preview-04-17 │ gpt-4o │ qwen3:4b │
│ int64 │                         varchar                         │             int64              │ int64  │  int64   │
├───────┼─────────────────────────────────────────────────────────┼────────────────────────────────┼────────┼──────────┤
│     1 │ What is the overall total sales revenue?                │                              1 │      1 │        1 │
│     2 │ What is the total number of items sold across all tra…  │                              1 │      1 │        1 │
│     3 │ What is the total monetary value of all returned items? │                              1 │      1 │        1 │
│     4 │ List the names of all stores, order by store name.      │                             12 │     12 │       12 │
│     5 │ What are the different

In [None]:
# check number of columns returned by each model
duckdb.sql(f""" pivot( select nbr,model,result from results) on model using min(MAP_KEYS(result[1]) ) order by nbr""").show(max_width=250)

┌───────┬────────────────────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────────────────────────────────┐
│  nbr  │                             gemini-2.5-flash-preview-04-17                             │                                  gpt-4o                                   │                                 qwen3:4b                                  │
│ int64 │                                       varchar[]                                        │                                 varchar[]                                 │                                 varchar[]                                 │
├───────┼────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────────────────────────

Manual check for smilar results ( same rows and columns)

In [30]:
manual = duckdb.sql("""
   with   zzzzz AS(
    SELECT 
        nbr, 
        model, 
        result
    FROM results 
    WHERE nbr not in ( select nbr from results group by all having count(distinct(cardinality(result[1]))) > 1 or count(distinct(result_count )) > 1 order by nbr))
pivot(from zzzzz ) on model using min(result) order by nbr
""")
manual.show(max_width=250)
try:
 duckdb.sql(f""" load excel ;copy manual to '{output_dir}/check.xlsx' (format 'xlsx', header 'true', overwrite)  """)
except Exception as e:
    print(f"Error exporting to Excel: {e}")
    print("close the excel file or tey again.")

┌───────┬──────────────────────┬──────────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│  nbr  │ gemini-2.5-flash-p…  │        gpt-4o        │                                                                                             qwen3:4b                                                                                             │
│ int64 │ map(varchar, json)[] │ map(varchar, json)[] │                                                                                       map(varchar, json)[]                                                                                       │
├───────┼──────────────────────┼──────────────────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────