# Install / Import / Config

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path
from dotenv import load_dotenv

from IPython.display import Markdown, display

import edurel.utils.dbcon as dbcu
import edurel.utils.db as dbu
import edurel.utils.duckdb as ddbu
import edurel.utils.llm as llmu
import edurel.utils.llmchat as llmc
import edurel.utils.yaml_utils as yu
import edurel.utils.mermaid as mmu
import edurel.widgets.mermaid_viz as mmw
import edurel.widgets.chatman as cmw
import edurel.utils.misc as mu

load_dotenv() 
BASE_DIR = os.getenv("BASE_DIR")
DB_DIR = f"{BASE_DIR}/databases"
LOG_DIR = f"{BASE_DIR}/logs"

DB_TAG = "adw-olap"

def run(tag, q, model, schema, db, dbrun=True):
    print(f"{tag}:\n{q}")
    sql = llmu.chat_text_to_sql(model, schema, q)
    print(sql)
    if dbrun:
        db.sql_print(sql)
    return(sql)

  from pydantic.v1.fields import FieldInfo as FieldInfoV1


# Database

In [3]:
con = dbcu.adw_olap()
additional_fks = {
    "DimAccount": ["DimAccount|ParentAccountKey|AccountKey"],
    "DimDepartmentGroup": ["DimDepartmentGroup|ParentDepartmentGroupKey|DepartmentGroupKey"],
    "DimEmployee": ["DimEmployee|ParentEmployeeKey|EmployeeKey"],
    "DimOrganization": ["DimOrganization|ParentOrganizationKey|OrganizationKey"],
    }

db = dbu.DbHandler(con,additional_fks=additional_fks)
schema_base = db.schema_yaml_str(["nullable", "fkname"])

In [4]:
spec = """
del table pattern: DimAccount
del table pattern: DimDepartmentGroup
del table pattern: DimEmployee
del table pattern: DimOrganization
del table pattern: DimReseller
del table pattern: DimScenario
del table pattern: DimSalesReason
del table pattern: FactCurrencyRate
del table pattern: FactAdditionalInternationalProductDescription
del table pattern: FactCallCenter
del table pattern: FactFinance
del table pattern: FactInternetSalesReason
del table pattern: FactProductInventory
del table pattern: FactResellerSales
del table pattern: FactSalesQuota
del table pattern: FactSurveyResponse
del table pattern: NewFactCurrencyRate
del table pattern: ProspectiveBuyer
del column FactInternetSales index: [8:17, 19:]
del column DimPromotion index: [1, 3, 4, 7, 8, 10, 11]
del column DimProduct index: [1, 3, 4, 6, 7, 8:]
del column DimProductSubcategory index: [1, 3, 4]
del column DimProductCategory index: [1, 3, 4]
del column DimCustomer index: [2, 3, 5, 7:]
del column DimGeography index: [6, 7, 10]

"""
schema = yu.yaml_to_yaml(schema_base, spec)
# print(schema)

In [5]:
# mmu.schema_mermaid_png(schema_reduced, output_path="schema.png", omit_tags=["nullable", "fkname"], direction="LR", scale=6)


## Order date range

In [6]:
sql = """
select min(orderdate) as min_order_date,
       max(orderdate) as max_order_date
from FactInternetSales;
"""
ddbu.sql_print(con, sql)

┌─────────────────────┬─────────────────────┐
│   min_order_date    │   max_order_date    │
│      timestamp      │      timestamp      │
├─────────────────────┼─────────────────────┤
│ 2010-12-29 00:00:00 │ 2014-01-28 00:00:00 │
└─────────────────────┴─────────────────────┘



# Prompts

In [7]:
system_prompt1 = """
You are an expert SQL query generator. 
Convert natural language questions into valid SQL queries.
Use duckdb syntax.
"""

# Questions

## q1

In [8]:
q1 = """
- internet sales amount of all product categories in 2012 on month level and also year over year in 2011
- Output Category, Year, Month, YearPrevious, MonthPrevious, Sales, SalesPrevious
- Sorted by Category, Year, Month
"""

In [9]:
q1 = """
- internet sales amount of all product categories
- Output Category, SalesAmount
- Sorted by Category
"""

## q2

In [10]:
q2 = """
- internet sales amount of top 3 product subcategories overall and top 3 cities with most customers
- Output Subcategory, City, SalesAmount
- Sorted by Subcategory, City
"""

## q3

In [11]:
q3 = """
- internet sales amount of top 3 product subcategories overall and top 3 cities with most customers
- in the output there might be more than 3 entries because of ties, use ranking function
- Output Subcategory, City, SalesAmount
- Sorted by Subcategory, City
"""

## q4

In [12]:
q4 = """
- internet sales amount of top 3 product subcategories overall and top 3 cities with most customers
- in the output there might be more than 3 entries because of ties, use ranking function
- Output Subcategory, City, SalesAmount
- Sorting should be done as follows
  - Subcategory according to their overall sales amount descending
  - City according to their number of customers descending
"""

## q5

## q6

## q7

## q8

## q9

## q10

## q11

## q12

## q13

## q14

## q15

## q16

## q17

## q18

## q19

## q20

## q21

## q22

## q23

In [13]:
q1 = """
- internet sales amount of all product categories
- Output Category, SalesAmount
- Sorted by Category
"""

## q24

In [14]:
q24 = """
create a SQL query that outputs the following columns:
- tag either 'shortest' or 'longest'
- persontype of person
- numberof no of customers in that group
the output should be calculated as follows:
- 'shortest' are customer with minimal modifieddate in the Customer table
- 'longest' are customer with maximal modifieddate in the Customer table    
- regard only customers that have placed orders in 2024
sorted by tag and persontype;
"""

# DEEPSEEK32

In [36]:
def conversation(chat, lastn_only=3):
    display(Markdown(chat.show_conversation(lastn_only=lastn_only)))
    
def init_chat(model, system_prompt, schema):
    chat = llmc.LLMChat(model)
    chat.set_system_prompt(system_prompt)
    chat.insert_message_at_end(mu.md_yaml(schema))
    return chat

def run_chat(chat, db, q):
    content = chat.add_user_message(q)
    if "err:" in content:
        chat.insert_message_at_end(content, msg_type="ai")
        return
    sql = mu.sql_extract(content)
    output = db.sql_str(sql)
    if "err:" in output:
        chat.insert_message_at_end(output, msg_type="ai")
        return
    chat.insert_message_at_end(mu.md_plain(output))

In [43]:
chat_deepseek32 = init_chat(llmu.stats_c(llmu.DEEPSEEK32), system_prompt1, schema)

In [44]:
run_chat(chat_deepseek32, db, q1)

In [45]:
conversation(chat_deepseek32, lastn_only=2)

[3] AI:
 ```sql
select count(*) as cnt from dimxcustomer;
```

[4] AI:
 err: Catalog Error: Table with name dimxcustomer does not exist!
Did you mean "DimCustomer"?

# GEMINI25FLASH

# GEMINI3FLASH

In [48]:
gemini3flash = llmu.stats_c(llmu.GEMINI3FLASH)
chat_gemini3flash = llmc.LLMChat(gemini3flash)
chat_gemini3flash.set_system_prompt(system_prompt1)

## q1

In [53]:
chat_gemini3flash.clear_conversation()
chat_gemini3flash.add_user_message(schema)
content_gemini3flash_q1 = chat_gemini3flash.add_user_message(q1)
sql_gemini3flash_q1 = mu.sql_extract(content_gemini3flash_q1)
output_gemini3flash_q1 = db.sql_str(sql_gemini3flash_q1)
chat_gemini3flash.insert_message_at_end(mu.md_plain(output_gemini3flash_q1))

APITimeoutError: Request timed out.

# GEMINI3PRO

In [19]:
gemini3_pro = llmu.stats_c(llmu.GEMINI3PRO)


In [20]:
sql = run("q1", q1, gemini3_pro, schema, db, dbrun=True)

q1:

Turn the following question into SQL
- internet sales amount of all product categories in 2012 on month level and also year over year in 2011
- Output Category, Year, Month, YearPrevious, MonthPrevious, Sales, SalesPrevious
- Sorted by Category, Year, Month


WITH MonthlySales AS (
    SELECT
        pc.EnglishProductCategoryName AS Category,
        d.CalendarYear AS Year,
        d.MonthNumberOfYear AS Month,
        SUM(fis.SalesAmount) AS Sales
    FROM FactInternetSales fis
    JOIN DimDate d ON fis.OrderDateKey = d.DateKey
    JOIN DimProduct p ON fis.ProductKey = p.ProductKey
    JOIN DimProductSubcategory psc ON p.ProductSubcategoryKey = psc.ProductSubcategoryKey
    JOIN DimProductCategory pc ON psc.ProductCategoryKey = pc.ProductCategoryKey
    WHERE d.CalendarYear IN (2011, 2012)
    GROUP BY pc.EnglishProductCategoryName, d.CalendarYear, d.MonthNumberOfYear
)
SELECT
    curr.Category,
    curr.Year,
    curr.Month,
    prev.Year AS YearPrevious,
    prev.Month AS Month

# GLM47

In [10]:
glm47 = llmu.stats_c(llmu.GLM47)
chat_glm47 = llmc.LLMChat(glm47)
chat_glm47.set_system_prompt(system_prompt1)

## q1

In [None]:
chat_glm47.clear_conversation()
chat_glm47.add_user_message(schema)
content_glm47_q1 = chat_glm47.add_user_message(q1)

In [30]:
sql_glm47_q1 = mu.sql_extract(content_glm47_q1)
output_glm47_q1 = db.sql_str(sql_glm47_q1)
print(output_glm47_q1)

Binder Error: aggregate function calls cannot contain window function calls


In [33]:
chat_glm47.insert_message_at_end(mu.md_plain(output_glm47_q1))

In [38]:
chat_glm47.delete_messages([6, 7])

2

In [39]:
display(Markdown(chat_glm47.show_conversation(lastn_only=2)))

[4] AI:
 ```sql
SELECT
    dpc.EnglishProductCategoryName AS Category,
    dd.CalendarYear AS Year,
    dd.MonthNumberOfYear AS Month,
    dd.CalendarYear - 1 AS YearPrevious,
    dd.MonthNumberOfYear AS MonthPrevious,
    SUM(fis.SalesAmount) AS Sales,
    SUM(LAG(fis.SalesAmount, 12, 0) OVER (PARTITION BY dpc.EnglishProductCategoryName ORDER BY dd.CalendarYear, dd.MonthNumberOfYear)) AS SalesPrevious
FROM
    FactInternetSales fis
    JOIN DimProduct dp ON fis.ProductKey = dp.ProductKey
    JOIN DimProductSubcategory dps ON dp.ProductSubcategoryKey = dps.ProductSubcategoryKey
    JOIN DimProductCategory dpc ON dps.ProductCategoryKey = dpc.ProductCategoryKey
    JOIN DimDate dd ON fis.OrderDateKey = dd.DateKey
WHERE
    dd.CalendarYear IN (2011, 2012)
GROUP BY
    dpc.EnglishProductCategoryName,
    dd.CalendarYear,
    dd.MonthNumberOfYear
ORDER BY
    Category,
    Year,
    Month
```

[5] USER:
 ```plaintext
Binder Error: aggregate function calls cannot contain window function calls
```

In [None]:
file_path = chat_glm47.log_conversation(LOG_DIR, DB_TAG, llmu.GLM47, "q1", "v1")
print(f"Conversation logged to: {file_path}")

## q2

In [67]:
chat.clear_conversation()
chat.add_user_message(schema)
r = chat.add_user_message(q2)

In [69]:
sql = mu.sql_extract(r)
db.sql_print(sql)

┌────────────────┬─────────┬───────────────┐
│  Subcategory   │  City   │  SalesAmount  │
│    varchar     │ varchar │ decimal(38,2) │
├────────────────┼─────────┼───────────────┤
│ Mountain Bikes │ Concord │    9952760.77 │
│ Mountain Bikes │ London  │    9952760.77 │
│ Mountain Bikes │ Paris   │    9952760.77 │
│ Road Bikes     │ Concord │   14520583.50 │
│ Road Bikes     │ London  │   14520583.50 │
│ Road Bikes     │ Paris   │   14520583.50 │
│ Touring Bikes  │ Concord │    3844801.05 │
│ Touring Bikes  │ London  │    3844801.05 │
│ Touring Bikes  │ Paris   │    3844801.05 │
└────────────────┴─────────┴───────────────┘



## q3

In [None]:
chat.clear_conversation()
chat.add_user_message(schema)
r = chat.add_user_message(q3)

In [None]:
sql = mu.sql_extract(r)
db.sql_print(sql)

## q4

In [70]:
chat.clear_conversation()
chat.add_user_message(schema)
r = chat.add_user_message(q4)

In [72]:
sql = mu.sql_extract(r)
db.sql_print(sql)

┌────────────────┬─────────┬───────────────┐
│  Subcategory   │  City   │  SalesAmount  │
│    varchar     │ varchar │ decimal(38,2) │
├────────────────┼─────────┼───────────────┤
│ Road Bikes     │ London  │   14520583.50 │
│ Road Bikes     │ Paris   │   14520583.50 │
│ Road Bikes     │ Concord │   14520583.50 │
│ Road Bikes     │ Burien  │   14520583.50 │
│ Mountain Bikes │ London  │    9952760.77 │
│ Mountain Bikes │ Paris   │    9952760.77 │
│ Mountain Bikes │ Concord │    9952760.77 │
│ Mountain Bikes │ Burien  │    9952760.77 │
│ Touring Bikes  │ London  │    3844801.05 │
│ Touring Bikes  │ Paris   │    3844801.05 │
│ Touring Bikes  │ Concord │    3844801.05 │
│ Touring Bikes  │ Burien  │    3844801.05 │
├────────────────┴─────────┴───────────────┤
│ 12 rows                        3 columns │
└──────────────────────────────────────────┘



# OPUS45

In [19]:
opus45 = llmu.stats_c(llmu.OPUS45)
chat_opus45 = llmc.LLMChat(opus45)
chat_opus45.set_system_prompt(system_prompt1)

## q1

In [20]:
chat_opus45.clear_conversation()
chat_opus45.add_user_message(schema)
content_opus45_q1 = chat_opus45.add_user_message(q1)
sql_opus45_q1 = mu.sql_extract(content_opus45_q1)
output_opus45_q1 = db.sql_str(sql_opus45_q1)
chat_opus45.insert_message_at_end(mu.md_plain(output_opus45_q1))

KeyboardInterrupt: 

In [47]:
file_path = chat_opus45.log_conversation(LOG_DIR, DB_TAG, llmu.OPUS45, "q1", "v1")
print(f"Conversation logged to: {file_path}")

Conversation logged to: /home/iclassen/1/edurel/logs/adw-olap/claude-opus-4-5-20251101/q1/v1/2026_01_21___15_22_45.json


In [21]:
display(Markdown(chat_opus45.show_conversation(lastn_only=3)))

[0] SYSTEM:
 
You are an expert SQL query generator. 
Convert natural language questions into valid SQL queries.
Use duckdb syntax.


[1] USER:
 tables:
- tablename: DimCurrency
  columns:
  - columnname: CurrencyKey
    type: INTEGER
  - columnname: CurrencyAlternateKey
    type: VARCHAR
  - columnname: CurrencyName
    type: VARCHAR
  primary_key:
  - CurrencyKey
- tablename: DimCustomer
  columns:
  - columnname: CustomerKey
    type: INTEGER
  - columnname: GeographyKey
    type: INTEGER
  - columnname: FirstName
    type: VARCHAR
  - columnname: LastName
    type: VARCHAR
  primary_key:
  - CustomerKey
  foreign_keys:
  - sourcecolumns:
    - GeographyKey
    targettable: DimGeography
    targetcolumns:
    - GeographyKey
- tablename: DimDate
  columns:
  - columnname: DateKey
    type: INTEGER
  - columnname: FullDateAlternateKey
    type: DATE
  - columnname: DayNumberOfWeek
    type: INTEGER
  - columnname: EnglishDayNameOfWeek
    type: VARCHAR
  - columnname: SpanishDayNameOfWeek
    type: VARCHAR
  - columnname: FrenchDayNameOfWeek
    type: VARCHAR
  - columnname: DayNumberOfMonth
    type: INTEGER
  - columnname: DayNumberOfYear
    type: INTEGER
  - columnname: WeekNumberOfYear
    type: INTEGER
  - columnname: EnglishMonthName
    type: VARCHAR
  - columnname: SpanishMonthName
    type: VARCHAR
  - columnname: FrenchMonthName
    type: VARCHAR
  - columnname: MonthNumberOfYear
    type: INTEGER
  - columnname: CalendarQuarter
    type: INTEGER
  - columnname: CalendarYear
    type: INTEGER
  - columnname: CalendarSemester
    type: INTEGER
  - columnname: FiscalQuarter
    type: INTEGER
  - columnname: FiscalYear
    type: INTEGER
  - columnname: FiscalSemester
    type: INTEGER
  primary_key:
  - DateKey
- tablename: DimGeography
  columns:
  - columnname: GeographyKey
    type: INTEGER
  - columnname: City
    type: VARCHAR
  - columnname: StateProvinceCode
    type: VARCHAR
  - columnname: StateProvinceName
    type: VARCHAR
  - columnname: CountryRegionCode
    type: VARCHAR
  - columnname: EnglishCountryRegionName
    type: VARCHAR
  - columnname: PostalCode
    type: VARCHAR
  - columnname: SalesTerritoryKey
    type: INTEGER
  primary_key:
  - GeographyKey
  foreign_keys:
  - sourcecolumns:
    - SalesTerritoryKey
    targettable: DimSalesTerritory
    targetcolumns:
    - SalesTerritoryKey
- tablename: DimProduct
  columns:
  - columnname: ProductKey
    type: INTEGER
  - columnname: ProductSubcategoryKey
    type: INTEGER
  - columnname: EnglishProductName
    type: VARCHAR
  primary_key:
  - ProductKey
  foreign_keys:
  - sourcecolumns:
    - ProductSubcategoryKey
    targettable: DimProductSubcategory
    targetcolumns:
    - ProductSubcategoryKey
- tablename: DimProductCategory
  columns:
  - columnname: ProductCategoryKey
    type: INTEGER
  - columnname: EnglishProductCategoryName
    type: VARCHAR
  primary_key:
  - ProductCategoryKey
- tablename: DimProductSubcategory
  columns:
  - columnname: ProductSubcategoryKey
    type: INTEGER
  - columnname: EnglishProductSubcategoryName
    type: VARCHAR
  - columnname: ProductCategoryKey
    type: INTEGER
  primary_key:
  - ProductSubcategoryKey
  foreign_keys:
  - sourcecolumns:
    - ProductCategoryKey
    targettable: DimProductCategory
    targetcolumns:
    - ProductCategoryKey
- tablename: DimPromotion
  columns:
  - columnname: PromotionKey
    type: INTEGER
  - columnname: EnglishPromotionName
    type: VARCHAR
  - columnname: DiscountPct
    type: FLOAT
  - columnname: EnglishPromotionType
    type: VARCHAR
  - columnname: EnglishPromotionCategory
    type: VARCHAR
  - columnname: StartDate
    type: TIMESTAMP
  - columnname: EndDate
    type: TIMESTAMP
  - columnname: MinQty
    type: INTEGER
  - columnname: MaxQty
    type: INTEGER
  primary_key:
  - PromotionKey
- tablename: DimSalesTerritory
  columns:
  - columnname: SalesTerritoryKey
    type: INTEGER
  - columnname: SalesTerritoryAlternateKey
    type: INTEGER
  - columnname: SalesTerritoryRegion
    type: VARCHAR
  - columnname: SalesTerritoryCountry
    type: VARCHAR
  - columnname: SalesTerritoryGroup
    type: VARCHAR
  - columnname: SalesTerritoryImage
    type: VARCHAR
  primary_key:
  - SalesTerritoryKey
- tablename: FactInternetSales
  columns:
  - columnname: ProductKey
    type: INTEGER
  - columnname: OrderDateKey
    type: INTEGER
  - columnname: DueDateKey
    type: INTEGER
  - columnname: ShipDateKey
    type: INTEGER
  - columnname: CustomerKey
    type: INTEGER
  - columnname: PromotionKey
    type: INTEGER
  - columnname: CurrencyKey
    type: INTEGER
  - columnname: SalesTerritoryKey
    type: INTEGER
  - columnname: TotalProductCost
    type: DECIMAL(13,2)
  - columnname: SalesAmount
    type: DECIMAL(13,2)
  primary_key:
  - SalesOrderNumber
  - SalesOrderLineNumber
  foreign_keys:
  - sourcecolumns:
    - CurrencyKey
    targettable: DimCurrency
    targetcolumns:
    - CurrencyKey
  - sourcecolumns:
    - CustomerKey
    targettable: DimCustomer
    targetcolumns:
    - CustomerKey
  - sourcecolumns:
    - OrderDateKey
    targettable: DimDate
    targetcolumns:
    - DateKey
  - sourcecolumns:
    - DueDateKey
    targettable: DimDate
    targetcolumns:
    - DateKey
  - sourcecolumns:
    - ShipDateKey
    targettable: DimDate
    targetcolumns:
    - DateKey
  - sourcecolumns:
    - ProductKey
    targettable: DimProduct
    targetcolumns:
    - ProductKey
  - sourcecolumns:
    - PromotionKey
    targettable: DimPromotion
    targetcolumns:
    - PromotionKey
  - sourcecolumns:
    - SalesTerritoryKey
    targettable: DimSalesTerritory
    targetcolumns:
    - SalesTerritoryKey


[2] USER:
 
- internet sales amount of all product categories
- Output Category, SalesAmount
- Sorted by Category


## q2

In [32]:
chat.clear_conversation()
chat.add_user_message(schema)
r = chat.add_user_message(q2)

In [37]:
sql = mu.sql_extract(r)
db.sql_print(sql)

┌────────────────┬────────────┬───────────────┐
│  Subcategory   │    City    │  SalesAmount  │
│    varchar     │  varchar   │ decimal(38,2) │
├────────────────┼────────────┼───────────────┤
│ Mountain Bikes │ Bellingham │      81316.70 │
│ Mountain Bikes │ Burien     │      65082.22 │
│ Mountain Bikes │ Concord    │      61956.51 │
│ Road Bikes     │ Bellingham │      82252.22 │
│ Road Bikes     │ Burien     │     115466.70 │
│ Road Bikes     │ Concord    │     104500.44 │
│ Touring Bikes  │ Bellingham │      33626.70 │
│ Touring Bikes  │ Burien     │      38303.58 │
│ Touring Bikes  │ Concord    │      33738.09 │
└────────────────┴────────────┴───────────────┘



In [None]:
chat.add_user_message(schema)
r = chat.add_user_message(q2)

## q3

In [52]:
chat.clear_conversation()
chat.add_user_message(schema)
r = chat.add_user_message(q3)

In [54]:
sql = mu.sql_extract(r)
db.sql_print(sql)

┌────────────────┬─────────┬───────────────┐
│  Subcategory   │  City   │  SalesAmount  │
│    varchar     │ varchar │ decimal(38,2) │
├────────────────┼─────────┼───────────────┤
│ Mountain Bikes │ Burien  │      65082.22 │
│ Mountain Bikes │ Concord │      61956.51 │
│ Mountain Bikes │ London  │     300619.75 │
│ Mountain Bikes │ Paris   │     186654.04 │
│ Road Bikes     │ Burien  │     115466.70 │
│ Road Bikes     │ Concord │     104500.44 │
│ Road Bikes     │ London  │     341432.71 │
│ Road Bikes     │ Paris   │     276410.82 │
│ Touring Bikes  │ Burien  │      38303.58 │
│ Touring Bikes  │ Concord │      33738.09 │
│ Touring Bikes  │ London  │     135471.93 │
│ Touring Bikes  │ Paris   │      57671.49 │
├────────────────┴─────────┴───────────────┤
│ 12 rows                        3 columns │
└──────────────────────────────────────────┘



## q4

In [59]:
chat.clear_conversation()
chat.add_user_message(schema)
r = chat.add_user_message(q4)

In [61]:
sql = mu.sql_extract(r)
db.sql_print(sql)

┌────────────────┬─────────┬───────────────┐
│  Subcategory   │  City   │  SalesAmount  │
│    varchar     │ varchar │ decimal(38,2) │
├────────────────┼─────────┼───────────────┤
│ Road Bikes     │ London  │     341432.71 │
│ Road Bikes     │ Paris   │     276410.82 │
│ Road Bikes     │ Burien  │     115466.70 │
│ Road Bikes     │ Concord │     104500.44 │
│ Mountain Bikes │ London  │     300619.75 │
│ Mountain Bikes │ Paris   │     186654.04 │
│ Mountain Bikes │ Burien  │      65082.22 │
│ Mountain Bikes │ Concord │      61956.51 │
│ Touring Bikes  │ London  │     135471.93 │
│ Touring Bikes  │ Paris   │      57671.49 │
│ Touring Bikes  │ Burien  │      38303.58 │
│ Touring Bikes  │ Concord │      33738.09 │
├────────────────┴─────────┴───────────────┤
│ 12 rows                        3 columns │
└──────────────────────────────────────────┘



# Chat History

In [71]:
display(Markdown(chat.show_conversation(lastn_only=2)))

[3] USER:
 
- internet sales amount of top 3 product subcategories overall and top 3 cities with most customers
- in the output there might be more than 3 entries because of ties, use ranking function
- Output Subcategory, City, SalesAmount
- Sorting should be done as follows
  - Subcategory according to their overall sales amount descending
  - City according to their number of customers descending


[4] AI:
 ```sql
WITH TopSubcategories AS (
    SELECT 
        dpsc.EnglishProductSubcategoryName AS Subcategory,
        SUM(fis.SalesAmount) AS TotalSalesAmount,
        RANK() OVER (ORDER BY SUM(fis.SalesAmount) DESC) AS SubcategoryRank
    FROM 
        FactInternetSales fis
        JOIN DimProduct dp ON fis.ProductKey = dp.ProductKey
        JOIN DimProductSubcategory dpsc ON dp.ProductSubcategoryKey = dpsc.ProductSubcategoryKey
    GROUP BY 
        dpsc.EnglishProductSubcategoryName
),
TopCities AS (
    SELECT 
        dg.City,
        COUNT(DISTINCT dc.CustomerKey) AS CustomerCount,
        RANK() OVER (ORDER BY COUNT(DISTINCT dc.CustomerKey) DESC) AS CityRank
    FROM 
        DimCustomer dc
        JOIN DimGeography dg ON dc.GeographyKey = dg.GeographyKey
    GROUP BY 
        dg.City
)
SELECT 
    tsc.Subcategory,
    tc.City,
    tsc.TotalSalesAmount AS SalesAmount
FROM 
    TopSubcategories tsc
CROSS JOIN 
    TopCities tc
WHERE 
    tsc.SubcategoryRank <= 3
    AND tc.CityRank <= 3
ORDER BY 
    tsc.TotalSalesAmount DESC,
    tc.CustomerCount DESC;
```

# SQL Run

In [None]:
db.sql_print("""

""")