# Install / Import / Config

In [2]:
%load_ext autoreload
%autoreload 2

In [16]:
import os
from pathlib import Path
from dotenv import load_dotenv

from IPython.display import Markdown, display

import edurel.utils.dbcon as dbcu
import edurel.utils.db as dbu
import edurel.utils.duckdb as ddbu
import edurel.utils.llm as llmu
import edurel.utils.llmchat as llmc
import edurel.widgets.mermaid_viz as mmw
import edurel.widgets.chatman as cmw
import edurel.utils.misc as mu

load_dotenv() 
BASE_DIR = os.getenv("BASE_DIR")
DB_DIR = f"{BASE_DIR}/databases"

def run(tag, q, model, schema, db, dbrun=True):
    print(f"{tag}:\n{q}")
    sql = llmu.chat_text_to_sql(model, schema, q)
    print(sql)
    if dbrun:
        db.sql_print(sql)
    return(sql)

# Database

In [4]:
con = dbcu.adw_olap()
additional_fks = {
    "DimAccount": ["DimAccount|ParentAccountKey|AccountKey"],
    "DimDepartmentGroup": ["DimDepartmentGroup|ParentDepartmentGroupKey|DepartmentGroupKey"],
    "DimEmployee": ["DimEmployee|ParentEmployeeKey|EmployeeKey"],
    "DimOrganization": ["DimOrganization|ParentOrganizationKey|OrganizationKey"],
    }

db = dbu.DbHandler(con,additional_fks=additional_fks)
schema = db.schema_yaml_str(["nullable", "fkname"])

In [5]:
db.schema_yaml_print(["nullable", "fkname"])

tables:
- tablename: DimAccount
  columns:
  - columnname: AccountKey
    type: INTEGER
  - columnname: ParentAccountKey
    type: INTEGER
  - columnname: AccountCodeAlternateKey
    type: INTEGER
  - columnname: ParentAccountCodeAlternateKey
    type: INTEGER
  - columnname: AccountDescription
    type: VARCHAR
  - columnname: AccountType
    type: VARCHAR
  - columnname: Operator
    type: VARCHAR
  - columnname: CustomMembers
    type: VARCHAR
  - columnname: ValueType
    type: VARCHAR
  - columnname: CustomMemberOptions
    type: VARCHAR
  primary_key:
  - AccountKey
  foreign_keys:
  - sourcecolumns:
    - ParentAccountKey
    targettable: DimAccount
    targetcolumns:
    - AccountKey
- tablename: DimCurrency
  columns:
  - columnname: CurrencyKey
    type: INTEGER
  - columnname: CurrencyAlternateKey
    type: VARCHAR
  - columnname: CurrencyName
    type: VARCHAR
  primary_key:
  - CurrencyKey
- tablename: DimCustomer
  columns:
  - columnname: CustomerKey
    type: INTEGER
  

In [None]:
# db.schema_mermaid_png(output_path="schema.png", omit_tags=["columns", "nullable", "fkname"], direction="LR", scale=8)

## Order date range

In [9]:
sql = """
select min(orderdate) as min_order_date,
       max(orderdate) as max_order_date
from FactInternetSales;
"""
ddbu.sql_print(con, sql)

┌─────────────────────┬─────────────────────┐
│   min_order_date    │   max_order_date    │
│      timestamp      │      timestamp      │
├─────────────────────┼─────────────────────┤
│ 2010-12-29 00:00:00 │ 2014-01-28 00:00:00 │
└─────────────────────┴─────────────────────┘



# Questions

## q1

In [13]:
q1 = """
Turn the following question into SQL
- internet sales amount of all product categories in 2012 on month level and also year over year in 2011
- Output Category, Year, Month, YearPrevious, MonthPrevious, Sales, SalesPrevious
- Sorted by Category, Year, Month
"""

## q2

In [None]:
q2 = """

"""

## q3

In [None]:
q3 = """

"""

## q4

## q5

## q6

## q7

## q8

## q9

## q10

## q11

## q12

## q13

## q14

## q15

## q16

## q17

## q18

## q19

## q20

## q21

## q22

## q23

## q24

In [None]:
q24 = """
create a SQL query that outputs the following columns:
- tag either 'shortest' or 'longest'
- persontype of person
- numberof no of customers in that group
the output should be calculated as follows:
- 'shortest' are customer with minimal modifieddate in the Customer table
- 'longest' are customer with maximal modifieddate in the Customer table    
- regard only customers that have placed orders in 2024
sorted by tag and persontype;
"""

# GEMINI25FLASH

In [17]:
gemini25_flash = llmu.stats_c(llmu.GEMINI25FLASH)


In [18]:
sql = run("q1", q1, gemini25_flash, schema, db, dbrun=True)


q1:

Turn the following question into SQL
- internet sales amount of all product categories in 2012 on month level and also year over year in 2011
- Output Category, Year, Month, YearPrevious, MonthPrevious, Sales, SalesPrevious
- Sorted by Category, Year, Month


WITH MonthlySales AS (
  SELECT
    dpc.EnglishProductCategoryName AS Category,
    dd.CalendarYear AS SalesYear,
    dd.EnglishMonthName AS SalesMonth,
    dd.MonthNumberOfYear AS MonthNum,
    SUM(fis.SalesAmount) AS Sales
  FROM FactInternetSales AS fis
  JOIN DimDate AS dd
    ON fis.OrderDateKey = dd.DateKey
  JOIN DimProduct AS dp
    ON fis.ProductKey = dp.ProductKey
  JOIN DimProductSubcategory AS dps
    ON dp.ProductSubcategoryKey = dps.ProductSubcategoryKey
  JOIN DimProductCategory AS dpc
    ON dps.ProductCategoryKey = dpc.ProductCategoryKey
  WHERE
    dd.CalendarYear IN (2011, 2012)
  GROUP BY
    dpc.EnglishProductCategoryName,
    dd.CalendarYear,
    dd.EnglishMonthName,
    dd.MonthNumberOfYear
)
SELECT
  m

## GEMINI3PRO

In [19]:
gemini3_pro = llmu.stats_c(llmu.GEMINI3PRO)


In [20]:
sql = run("q1", q1, gemini3_pro, schema, db, dbrun=True)

q1:

Turn the following question into SQL
- internet sales amount of all product categories in 2012 on month level and also year over year in 2011
- Output Category, Year, Month, YearPrevious, MonthPrevious, Sales, SalesPrevious
- Sorted by Category, Year, Month


WITH MonthlySales AS (
    SELECT
        pc.EnglishProductCategoryName AS Category,
        d.CalendarYear AS Year,
        d.MonthNumberOfYear AS Month,
        SUM(fis.SalesAmount) AS Sales
    FROM FactInternetSales fis
    JOIN DimDate d ON fis.OrderDateKey = d.DateKey
    JOIN DimProduct p ON fis.ProductKey = p.ProductKey
    JOIN DimProductSubcategory psc ON p.ProductSubcategoryKey = psc.ProductSubcategoryKey
    JOIN DimProductCategory pc ON psc.ProductCategoryKey = pc.ProductCategoryKey
    WHERE d.CalendarYear IN (2011, 2012)
    GROUP BY pc.EnglishProductCategoryName, d.CalendarYear, d.MonthNumberOfYear
)
SELECT
    curr.Category,
    curr.Year,
    curr.Month,
    prev.Year AS YearPrevious,
    prev.Month AS Month

# GLM47

In [21]:
glm47 = llmu.stats_c(llmu.GLM47)

In [22]:
sql = run("q1", q1, glm47, schema, db, dbrun=True)

q1:

Turn the following question into SQL
- internet sales amount of all product categories in 2012 on month level and also year over year in 2011
- Output Category, Year, Month, YearPrevious, MonthPrevious, Sales, SalesPrevious
- Sorted by Category, Year, Month

WITH MonthlySales AS (
    SELECT
        DPC.EnglishProductCategoryName AS Category,
        DD.CalendarYear AS Year,
        DD.MonthNumberOfYear AS Month,
        SUM(FIS.SalesAmount) AS Sales
    FROM
        FactInternetSales FIS
        JOIN DimProduct DP ON FIS.ProductKey = DP.ProductKey
        JOIN DimProductSubcategory DPS ON DP.ProductSubcategoryKey = DPS.ProductSubcategoryKey
        JOIN DimProductCategory DPC ON DPS.ProductCategoryKey = DPC.ProductCategoryKey
        JOIN DimDate DD ON FIS.OrderDateKey = DD.DateKey
    WHERE
        DD.CalendarYear IN (2011, 2012)
    GROUP BY
        DPC.EnglishProductCategoryName,
        DD.CalendarYear,
        DD.MonthNumberOfYear
)
SELECT
    Category,
    Year,
    Month,


# OPUS45

In [10]:
opus45 = llmu.stats_c(llmu.OPUS45)

In [12]:
def run(tag, q, model, schema, db, dbrun=True):
    print(f"{tag}:\n{q}")
    sql = llmu.chat_text_to_sql(model, schema, q)
    print(sql)
    if dbrun:
        db.sql_print(sql)
    return(sql)

In [14]:
sql = run("q1", q1, opus45, schema, db, dbrun=False)

q1:

Turn the following question into SQL
- internet sales amount of all product categories in 2012 on month level and also year over year in 2011
- Output Category, Year, Month, YearPrevious, MonthPrevious, Sales, SalesPrevious
- Sorted by Category, Year, Month


WITH sales_2012 AS (
    SELECT 
        pc.EnglishProductCategoryName AS Category,
        d.CalendarYear AS Year,
        d.MonthNumberOfYear AS Month,
        SUM(fis.SalesAmount) AS Sales
    FROM FactInternetSales fis
    JOIN DimProduct p ON fis.ProductKey = p.ProductKey
    JOIN DimProductSubcategory ps ON p.ProductSubcategoryKey = ps.ProductSubcategoryKey
    JOIN DimProductCategory pc ON ps.ProductCategoryKey = pc.ProductCategoryKey
    JOIN DimDate d ON fis.OrderDateKey = d.DateKey
    WHERE d.CalendarYear = 2012
    GROUP BY pc.EnglishProductCategoryName, d.CalendarYear, d.MonthNumberOfYear
),
sales_2011 AS (
    SELECT 
        pc.EnglishProductCategoryName AS Category,
        d.CalendarYear AS Year,
        d.Mo

In [15]:
db.sql_print(sql)

┌─────────────┬───────┬───────┬──────────────┬───────────────┬───────────────┬───────────────┐
│  Category   │ Year  │ Month │ YearPrevious │ MonthPrevious │     Sales     │ SalesPrevious │
│   varchar   │ int32 │ int32 │    int32     │     int32     │ decimal(38,2) │ decimal(38,2) │
├─────────────┼───────┼───────┼──────────────┼───────────────┼───────────────┼───────────────┤
│ Accessories │  2012 │    12 │         NULL │          NULL │       2147.08 │          NULL │
│ Bikes       │  2012 │     1 │         2011 │             1 │     495364.01 │     469823.94 │
│ Bikes       │  2012 │     2 │         2011 │             2 │     506994.08 │     466334.93 │
│ Bikes       │  2012 │     3 │         2011 │             3 │     373482.95 │     485198.69 │
│ Bikes       │  2012 │     4 │         2011 │             4 │     400335.59 │     502073.88 │
│ Bikes       │  2012 │     5 │         2011 │             5 │     358877.87 │     561681.51 │
│ Bikes       │  2012 │     6 │         2011 │    