# Install / Import / Config

In [57]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
import os
from pathlib import Path
from dotenv import load_dotenv
import edurel.utils.llm as llmu
import edurel.utils.duckdb as ddbu

load_dotenv() 
BASE_DIR = os.getenv("BASE_DIR")
DB_DIR = f"{BASE_DIR}/databases/"

# Database

In [59]:
con = ddbu.mem_con(DB_DIR + "db-company_en")
schema = ddbu.schema(con)

In [60]:
# print(ddbu.schema(con))
# con.close()

# Queries

## q1

In [61]:
q1 = """
create a SQL query that outputs the following columns:
- eid, lastname, hiredate from table employee
- entry year per employee 
sorted by eid
"""

## q2

In [62]:
q2 = """
create a SQL query that outputs the following columns:
- eid, lastname, hiredate from table employee
- entry year per employee 
only for department 14
sorted by eid
"""

## q3

In [63]:
q3 = """
create a SQL query that outputs the following columns:
- eid, lastname, hiredate from table employee
- number of years of deployment per employee 
only for department 14
sorted by eid
"""

## q4

In [64]:
q4 = """
create a SQL query that outputs the following columns:
- lastname, salary, bonus from table employee
only employees without bonus
sorted by eid
"""

## q5

In [65]:
q5 = """
create a SQL query that outputs the following columns:
- lastname, salary, bonus from table employee
only employees with bonus
sorted by eid
"""

## q6

In [66]:
q6 = """
create a SQL query that outputs the following columns:
- eid, lastname, salary, bonus from table employee
- total income per employee 
only for department 17
sorted by eid
"""

## q7

In [67]:
q7 = """
create a SQL query that outputs the following columns:
- eid, lastname, salary from table employee
- salary band per employee 
there are the following salary bands:
- low <= 35000
- medium > 35000
- high > 100000
sorted by salary
"""

## q8

In [68]:
q8 = """
create a SQL query that outputs the following columns:
- name from table orgunit with alias orgunit
- eid, lastname from table employee
sorted by orgunit, eid
"""

## q9

In [69]:
q9 = """
create a SQL query that outputs the following columns:
- title from table project with alias project
- eid, lastname from table employee
sorted by title, eid
"""

## q10

In [70]:
q10 = """
create a SQL query that outputs the following columns:
- name with alias parent from table orgunit
- bezeichnung mit alias child from table orgunit
child should be direct subunit of parent
sorted by parent, child
"""

## q11

In [71]:
q11 = """
create a SQL query that outputs the following columns:
- eid from table employee
- lastname with alias head from table employee
- name with alias orgunit from table orgunit
only for department heads
sorted by eid
"""

## q12

In [72]:
q12 = """
create a SQL query that outputs the following columns:
- eid from table employee
- lastname with alias employee from table employee
only for employees that are not department heads
sorted by eid
"""

## q13

In [73]:
q13 = """
create a SQL query that outputs the following columns:
- lastname with alias employee from table employee
- salary of the employee with alias esalary from table employee
- lastname with alias boss  from table employee
- salary of the boss with alias bsalary from table employee
boss is direct superior of employee
employee earns more than boss
sorted by employee
"""

## q14

In [74]:
q14 = """
create a SQL query that outputs the following columns:
- max_salary
max_salary should be the maximum salary of all employees
"""

## q15

In [75]:
q15 = """
create a SQL query that outputs the following columns:
- name with alias orgunit from table orgunit
- max_salary_dept
max_salary_dept should be the maximum salary of all employees per orgunit
sorted by descending max_salary_dept
"""

## q16

In [76]:
q16 = """
create a SQL query that outputs the following columns:
- entry_year calculated from hiredate
- salary_year should be the total salary of all employees per entry year
sorted by entry year
"""

## q17

In [77]:
q17 = """
create a SQL query that outputs the following columns:
- decade calculated from hiredate
- salary_decade should be the total salary of all employees per decade
sorted by decade
"""

## q18

In [78]:
q18 = """
create a SQL query that outputs the following columns:
- name with alias orgunit from table orgunit
- decade calculated from hiredate, use floor for calculation
- salary_decade should be the total salary of all employees per orgunit and decade
sorted by ouid, decade
"""

## q19

In [79]:
q19 = """
create a SQL query that outputs the following columns:
- name, salary from table employee
only employees who earn more than the average of all employees
sorted by eid
"""

## q20

In [80]:
q20 = """
create a SQL query that outputs the following columns:
- eid, name, salary from table employee
- average salary of all employees, alias as asalary
- diff to average salary, alias diff_asalary
only employees who earn more than the average of all employees
use cte for the calculation
round calculation to 2 decimal places
sorted by eid
"""

## q21

In [81]:
q21 = """
create a SQL query that outputs the following columns:
- name with alias orgunit from table orgunit
- eid, name, salary from table employee
- average salary of all employees in the same department, alias asalary_dept
- diff to average salary, alias diff_asalary_dept
use cte for the calculation
round calculation to 2 decimal places
sorted by ouid, eid
"""

## q22

In [82]:
q22 = """
create a SQL query that outputs the following columns:
- eid, lastname from table employee
only employees who work in a department that is led by 'Umarani'
"""

## q23

In [83]:
q23 = """
create a SQL query that outputs the following columns:
- ouid, name with alias orgunit from table orgunit
- number of employees in the orgunit, alias num_employees
- only orgunits with the least employees
sorted by ouid
"""

## q24

In [105]:
q24 = """
create a SQL query that outputs the following columns:
- ouid, name with alias orgunit from table orgunit
- average salary of all employees in that orgunit, alias asal
only orgunit with max avg salary
sorted by ouid
"""

# ARCTICTEXT2SQL

## q1

In [5]:
sql = llmu.chat_text_to_sql(llmu.ollama_c(llmu.ARCTICTEXT2SQL), schema, q1)
print(sql)

The user wants to extract specific information about employees including their ID, last name, and hire date. Additionally, they want a computed column for the year of employment based on the hire date. The sorting should be done in ascending order based on employee IDs.
</think>
<action>
Now, let's translate this into :
1. Select columns `eid`, `lastname`, and `hiredate` from the table `Employee`.
2. Add a computed column for the year of employment using the function `strftime('%Y', hiredate)`. This will extract the year part from the date.
3. Order the results by `eid`.

The  query is as follows:

SELECT 
  EID AS eid, 
  LastName AS lastname, 
  HireDate AS hiredate,
  strftime('%Y', HireDate) AS entry_year
FROM Employee
ORDER BY EID;


This solution directly addresses the user's request without unnecessary complexity or extraneous joins. It focuses on the `Employee` table and extracts only the required columns.
</action>


In [6]:
sql = """
SELECT 
  EID AS eid, 
  LastName AS lastname, 
  HireDate AS hiredate,
  strftime('%Y', HireDate) AS entry_year
FROM Employee
ORDER BY EID;
"""

In [7]:
ddbu.sql_print(con, sql)

┌───────┬──────────┬────────────┬────────────┐
│  eid  │ lastname │  hiredate  │ entry_year │
│ int32 │ varchar  │    date    │  varchar   │
├───────┼──────────┼────────────┼────────────┤
│   101 │ Patil    │ 2000-05-01 │ 2000       │
│   102 │ Durmaz   │ 2005-07-01 │ 2005       │
│   103 │ Blaschke │ 2002-11-01 │ 2002       │
│   104 │ Stone    │ 2006-06-01 │ 2006       │
│   105 │ Dalal    │ 2018-02-02 │ 2018       │
│   106 │ Li       │ 2002-12-01 │ 2002       │
│   107 │ Nguyen   │ 2006-07-01 │ 2006       │
│   108 │ Sanchez  │ 2014-04-01 │ 2014       │
│   109 │ Umarani  │ 2006-07-01 │ 2006       │
│   110 │ Ortega   │ 2005-09-02 │ 2005       │
│   111 │ Doshi    │ 2010-01-02 │ 2010       │
│   112 │ Singh    │ 2012-03-01 │ 2012       │
│   113 │ Jadhav   │ 2001-08-01 │ 2001       │
│   114 │ Popov    │ 2009-03-02 │ 2009       │
│   115 │ Kumar    │ 2013-05-01 │ 2013       │
│   116 │ Krause   │ 2011-08-01 │ 2011       │
│   117 │ Oezdem   │ 2014-08-01 │ 2014       │
│   118 │ Oke

# GLM46

In [42]:
glm46 = llmu.stats_c(llmu.GLM46)

In [None]:
q = q1
print(f"q1:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q1:
create a SQL query that outputs the following columns:
- eid, lastname, hiredate from table employee
- entry year per employee 
sorted by eid

┌───────┬──────────┬────────────┬───────────┐
│  EID  │ LastName │  Hiredate  │ EntryYear │
│ int32 │ varchar  │    date    │   int64   │
├───────┼──────────┼────────────┼───────────┤
│   101 │ Patil    │ 2000-05-01 │      2000 │
│   102 │ Durmaz   │ 2005-07-01 │      2005 │
│   103 │ Blaschke │ 2002-11-01 │      2002 │
│   104 │ Stone    │ 2006-06-01 │      2006 │
│   105 │ Dalal    │ 2018-02-02 │      2018 │
│   106 │ Li       │ 2002-12-01 │      2002 │
│   107 │ Nguyen   │ 2006-07-01 │      2006 │
│   108 │ Sanchez  │ 2014-04-01 │      2014 │
│   109 │ Umarani  │ 2006-07-01 │      2006 │
│   110 │ Ortega   │ 2005-09-02 │      2005 │
│   111 │ Doshi    │ 2010-01-02 │      2010 │
│   112 │ Singh    │ 2012-03-01 │      2012 │
│   113 │ Jadhav   │ 2001-08-01 │      2001 │
│   114 │ Popov    │ 2009-03-02 │      2009 │
│   115 │ Kumar    │ 2013

In [49]:
q = q2
print(f"q2:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q2:
create a SQL query that outputs the following columns:
- eid, lastname, hiredate from table employee
- entry year per employee 
only for department 14
sorted by eid

SELECT eid, lastname, hiredate, EXTRACT(YEAR FROM hiredate) AS entry_year
FROM Employee
WHERE OUID = 14
ORDER BY eid
┌───────┬──────────┬────────────┬────────────┐
│  EID  │ LastName │  Hiredate  │ entry_year │
│ int32 │ varchar  │    date    │   int64    │
├───────┼──────────┼────────────┼────────────┤
│   106 │ Li       │ 2002-12-01 │       2002 │
│   107 │ Nguyen   │ 2006-07-01 │       2006 │
│   108 │ Sanchez  │ 2014-04-01 │       2014 │
└───────┴──────────┴────────────┴────────────┘



In [50]:
q = q3
print(f"q3:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q3
create a SQL query that outputs the following columns:
- eid, lastname, hiredate from table employee
- number of years of deployment per employee 
only for department 14
sorted by eid


SELECT 
    e.eid, 
    e.lastname, 
    e.hiredate,
    EXTRACT(YEAR FROM AGE(CURRENT_DATE, e.hiredate)) AS years_of_deployment
FROM 
    Employee e
WHERE 
    e.ouid = 14
ORDER BY 
    e.eid;

┌───────┬──────────┬────────────┬─────────────────────┐
│  EID  │ LastName │  Hiredate  │ years_of_deployment │
│ int32 │ varchar  │    date    │        int64        │
├───────┼──────────┼────────────┼─────────────────────┤
│   106 │ Li       │ 2002-12-01 │                  23 │
│   107 │ Nguyen   │ 2006-07-01 │                  19 │
│   108 │ Sanchez  │ 2014-04-01 │                  11 │
└───────┴──────────┴────────────┴─────────────────────┘



In [52]:
q = q4
print(f"q4:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q4:
create a SQL query that outputs the following columns:
- lastname, salary, bonus from table employee
only employees without bonus
sorted by eid


SELECT
    LastName,
    Salary,
    Bonus
FROM
    Employee
WHERE
    Bonus IS NULL
ORDER BY
    EID;

┌──────────┬──────────────┬──────────────┐
│ LastName │    Salary    │    Bonus     │
│ varchar  │ decimal(9,2) │ decimal(9,2) │
├──────────┼──────────────┼──────────────┤
│ Patil    │    180000.00 │         NULL │
│ Durmaz   │    120000.00 │         NULL │
│ Blaschke │     93000.00 │         NULL │
│ Stone    │     42000.00 │         NULL │
│ Li       │     89000.00 │         NULL │
│ Nguyen   │     41000.00 │         NULL │
│ Umarani  │    142000.00 │         NULL │
│ Ortega   │     90000.00 │         NULL │
│ Doshi    │     42000.00 │         NULL │
│ Jadhav   │     91000.00 │         NULL │
│ Popov    │     34000.00 │         NULL │
│ Kumar    │     32000.00 │         NULL │
│ Krause   │     31000.00 │         NULL │
├──────────┴───

In [53]:
q = q5
print(f"q5:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q5:
create a SQL query that outputs the following columns:
- lastname, salary, bonus from table employee
only employees with bonus
sorted by eid


SELECT lastname, salary, bonus
FROM Employee
WHERE bonus IS NOT NULL
ORDER BY EID;

┌──────────┬──────────────┬──────────────┐
│ LastName │    Salary    │    Bonus     │
│ varchar  │ decimal(9,2) │ decimal(9,2) │
├──────────┼──────────────┼──────────────┤
│ Dalal    │     38000.00 │      1000.00 │
│ Sanchez  │     39000.00 │      1500.00 │
│ Singh    │     43000.00 │      2100.00 │
│ Oezdem   │     33000.00 │      1900.00 │
│ Okeke    │     32000.00 │      1900.00 │
└──────────┴──────────────┴──────────────┘



In [54]:
q = q6
print(f"q6:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q6:
create a SQL query that outputs the following columns:
- eid, lastname, salary, bonus from table employee
- total income per employee 
only for department 17
sorted by eid

SELECT eid, lastname, salary, bonus, salary + COALESCE(bonus, 0) AS total_income
FROM Employee
WHERE OUID = 17
ORDER BY eid;
┌───────┬──────────┬──────────────┬──────────────┬───────────────┐
│  EID  │ LastName │    Salary    │    Bonus     │ total_income  │
│ int32 │ varchar  │ decimal(9,2) │ decimal(9,2) │ decimal(13,2) │
├───────┼──────────┼──────────────┼──────────────┼───────────────┤
│   114 │ Popov    │     34000.00 │         NULL │      34000.00 │
│   115 │ Kumar    │     32000.00 │         NULL │      32000.00 │
│   116 │ Krause   │     31000.00 │         NULL │      31000.00 │
│   117 │ Oezdem   │     33000.00 │      1900.00 │      34900.00 │
│   118 │ Okeke    │     32000.00 │      1900.00 │      33900.00 │
└───────┴──────────┴──────────────┴──────────────┴───────────────┘



In [55]:
q = q7
print(f"q7:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q7:
create a SQL query that outputs the following columns:
- eid, lastname, salary from table employee
- salary band per employee 
there are the following salary bands:
- low <= 35000
- medium > 35000
- high > 100000
sorted by salary

SELECT eid, lastname, salary,
       CASE 
           WHEN salary <= 35000 THEN 'low'
           WHEN salary <= 100000 THEN 'medium'
           ELSE 'high'
       END AS salary_band
FROM Employee
ORDER BY salary;
┌───────┬──────────┬──────────────┬─────────────┐
│  EID  │ LastName │    Salary    │ salary_band │
│ int32 │ varchar  │ decimal(9,2) │   varchar   │
├───────┼──────────┼──────────────┼─────────────┤
│   116 │ Krause   │     31000.00 │ low         │
│   115 │ Kumar    │     32000.00 │ low         │
│   118 │ Okeke    │     32000.00 │ low         │
│   117 │ Oezdem   │     33000.00 │ low         │
│   114 │ Popov    │     34000.00 │ low         │
│   105 │ Dalal    │     38000.00 │ medium      │
│   108 │ Sanchez  │     39000.00 │ medium      │
│ 

In [85]:
q = q8
print(f"q8{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q8
create a SQL query that outputs the following columns:
- name from table orgunit with alias orgunit
- eid, lastname from table employee
sorted by orgunit, eid

SELECT OrgUnit.Name AS orgunit, Employee.EID, Employee.LastName
FROM Employee
JOIN OrgUnit ON Employee.OUID = OrgUnit.OUID
ORDER BY OrgUnit.Name, Employee.EID
┌────────────────┬───────┬──────────┐
│    orgunit     │  EID  │ LastName │
│    varchar     │ int32 │ varchar  │
├────────────────┼───────┼──────────┤
│ Accounting     │   106 │ Li       │
│ Accounting     │   107 │ Nguyen   │
│ Accounting     │   108 │ Sanchez  │
│ Administration │   102 │ Durmaz   │
│ Company        │   101 │ Patil    │
│ HR             │   103 │ Blaschke │
│ HR             │   104 │ Stone    │
│ HR             │   105 │ Dalal    │
│ Plant          │   110 │ Ortega   │
│ Plant          │   111 │ Doshi    │
│ Plant          │   112 │ Singh    │
│ Plant          │   113 │ Jadhav   │
│ Production     │   109 │ Umarani  │
│ Warehouse      │   114 │ Popov

In [86]:
q = q9
print(f"q9{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q9
create a SQL query that outputs the following columns:
- title from table project with alias project
- eid, lastname from table employee
sorted by title, eid

SELECT p.title AS project, e.eid, e.lastname
FROM Project p
JOIN EmpProj ep ON p.PID = ep.PID
JOIN Employee e ON ep.EID = e.EID
ORDER BY p.title, e.eid;
┌────────────────────────┬───────┬──────────┐
│        project         │  EID  │ LastName │
│        varchar         │ int32 │ varchar  │
├────────────────────────┼───────┼──────────┤
│ CRM                    │   102 │ Durmaz   │
│ CRM                    │   103 │ Blaschke │
│ Plant Restructurierung │   109 │ Umarani  │
│ Plant Restructurierung │   114 │ Popov    │
│ Plant Restructurierung │   117 │ Oezdem   │
│ Plant Restructurierung │   118 │ Okeke    │
│ Strategy               │   101 │ Patil    │
│ Strategy               │   102 │ Durmaz   │
│ Strategy               │   109 │ Umarani  │
└────────────────────────┴───────┴──────────┘



In [88]:
q = q10
print(f"q10:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q10:
create a SQL query that outputs the following columns:
- name with alias parent from table orgunit
- bezeichnung mit alias child from table orgunit
child should be direct subunit of parent
sorted by parent, child

SELECT parent.Name AS parent, child.Name AS child
FROM OrgUnit AS parent
JOIN OrgUnit AS child ON parent.OUID = child.SuperUnit
ORDER BY parent, child;
┌────────────────┬────────────────┐
│     parent     │     child      │
│    varchar     │    varchar     │
├────────────────┼────────────────┤
│ Administration │ Accounting     │
│ Administration │ HR             │
│ Company        │ Administration │
│ Company        │ Production     │
│ Production     │ Plant          │
│ Production     │ Warehouse      │
└────────────────┴────────────────┘



In [89]:
q = q11
print(f"q11:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q11:
create a SQL query that outputs the following columns:
- eid from table employee
- lastname with alias head from table employee
- name with alias orgunit from table orgunit
only for department heads
sorted by eid

SELECT e.EID, e.LastName AS head, o.Name AS orgunit
FROM Employee e
JOIN OrgUnit o ON e.EID = o.Head
ORDER BY e.EID;
┌───────┬──────────┬────────────────┐
│  EID  │   head   │    orgunit     │
│ int32 │ varchar  │    varchar     │
├───────┼──────────┼────────────────┤
│   101 │ Patil    │ Company        │
│   102 │ Durmaz   │ Administration │
│   103 │ Blaschke │ HR             │
│   106 │ Li       │ Accounting     │
│   109 │ Umarani  │ Warehouse      │
│   109 │ Umarani  │ Production     │
│   110 │ Ortega   │ Plant          │
└───────┴──────────┴────────────────┘



In [90]:
q = q12
print(f"q12:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q12:
create a SQL query that outputs the following columns:
- eid from table employee
- lastname with alias employee from table employee
only for employees that are not department heads
sorted by eid


SELECT e.EID, e.LastName AS employee
FROM Employee e
LEFT JOIN OrgUnit ou ON e.EID = ou.Head
WHERE ou.Head IS NULL
ORDER BY e.EID;

┌───────┬──────────┐
│  EID  │ employee │
│ int32 │ varchar  │
├───────┼──────────┤
│   104 │ Stone    │
│   105 │ Dalal    │
│   107 │ Nguyen   │
│   108 │ Sanchez  │
│   111 │ Doshi    │
│   112 │ Singh    │
│   113 │ Jadhav   │
│   114 │ Popov    │
│   115 │ Kumar    │
│   116 │ Krause   │
│   117 │ Oezdem   │
│   118 │ Okeke    │
├───────┴──────────┤
│     12 rows      │
└──────────────────┘



In [91]:
q = q13
print(f"q13:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q13:
create a SQL query that outputs the following columns:
- lastname with alias employee from table employee
- salary of the employee with alias esalary from table employee
- lastname with alias boss  from table employee
- salary of the boss with alias bsalary from table employee
boss is direct superior of employee
employee earns more than boss
sorted by employee

SELECT 
    e.LastName AS employee,
    e.Salary AS esalary,
    b.LastName AS boss,
    b.Salary AS bsalary
FROM Employee e
JOIN OrgUnit ou ON e.OUID = ou.OUID
JOIN Employee b ON ou.Head = b.EID
WHERE e.Salary > b.Salary
ORDER BY e.LastName;
┌──────────┬──────────────┬─────────┬──────────────┐
│ employee │   esalary    │  boss   │   bsalary    │
│ varchar  │ decimal(9,2) │ varchar │ decimal(9,2) │
├──────────┼──────────────┼─────────┼──────────────┤
│ Jadhav   │     91000.00 │ Ortega  │     90000.00 │
└──────────┴──────────────┴─────────┴──────────────┘



In [92]:
q = q14
print(f"q14:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q14:
create a SQL query that outputs the following columns:
- max_salary
max_salary should be the maximum salary of all employees

SELECT MAX(Salary) AS max_salary FROM Employee;
┌──────────────┐
│  max_salary  │
│ decimal(9,2) │
├──────────────┤
│    180000.00 │
└──────────────┘



In [93]:
q = q15
print(f"q15:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q15:
create a SQL query that outputs the following columns:
- name with alias orgunit from table orgunit
- max_salary_dept
max_salary_dept should be the maximum salary of all employees per orgunit
sorted by descending max_salary_dept

SELECT o.Name AS orgunit, MAX(e.Salary) AS max_salary_dept
FROM Employee e
JOIN OrgUnit o ON e.OUID = o.OUID
GROUP BY o.Name
ORDER BY max_salary_dept DESC;
┌────────────────┬─────────────────┐
│    orgunit     │ max_salary_dept │
│    varchar     │  decimal(9,2)   │
├────────────────┼─────────────────┤
│ Company        │       180000.00 │
│ Production     │       142000.00 │
│ Administration │       120000.00 │
│ HR             │        93000.00 │
│ Plant          │        91000.00 │
│ Accounting     │        89000.00 │
│ Warehouse      │        34000.00 │
└────────────────┴─────────────────┘



In [94]:
q = q16
print(f"q16:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q16:
create a SQL query that outputs the following columns:
- entry_year calculated from hiredate
- salary_year should be the total salary of all employees per entry year
sorted by entry year

SELECT EXTRACT(YEAR FROM hiredate) AS entry_year, SUM(salary) AS salary_year FROM Employee GROUP BY EXTRACT(YEAR FROM hiredate) ORDER BY entry_year
┌────────────┬───────────────┐
│ entry_year │  salary_year  │
│   int64    │ decimal(38,2) │
├────────────┼───────────────┤
│       2000 │     180000.00 │
│       2001 │      91000.00 │
│       2002 │     182000.00 │
│       2005 │     210000.00 │
│       2006 │     225000.00 │
│       2009 │      34000.00 │
│       2010 │      42000.00 │
│       2011 │      31000.00 │
│       2012 │      43000.00 │
│       2013 │      64000.00 │
│       2014 │      72000.00 │
│       2018 │      38000.00 │
├────────────┴───────────────┤
│ 12 rows          2 columns │
└────────────────────────────┘



In [95]:
q = q17
print(f"q17:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q17:
create a SQL query that outputs the following columns:
- decade calculated from hiredate
- salary_decade should be the total salary of all employees per decade
sorted by decade


SELECT 
    FLOOR(EXTRACT(YEAR FROM hiredate) / 10) * 10 AS decade,
    SUM(salary) AS salary_decade
FROM Employee
GROUP BY FLOOR(EXTRACT(YEAR FROM hiredate) / 10) * 10
ORDER BY decade;

┌────────┬───────────────┐
│ decade │ salary_decade │
│ double │ decimal(38,2) │
├────────┼───────────────┤
│ 2000.0 │     922000.00 │
│ 2010.0 │     290000.00 │
└────────┴───────────────┘



In [96]:
q = q18
print(f"q18:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q18:
create a SQL query that outputs the following columns:
- name with alias orgunit from table orgunit
- decade calculated from hiredate, use floor for calculation
- salary_decade should be the total salary of all employees per orgunit and decade
sorted by ouid, decade

SELECT 
    ou.name AS orgunit,
    FLOOR(EXTRACT(YEAR FROM e.hiredate) / 10) * 10 AS decade,
    SUM(e.salary) AS salary_decade
FROM 
    Employee e
JOIN 
    OrgUnit ou ON e.OUID = ou.OUID
GROUP BY 
    ou.name, 
    FLOOR(EXTRACT(YEAR FROM e.hiredate) / 10) * 10,
    ou.OUID
ORDER BY 
    ou.OUID, 
    decade;
┌────────────────┬────────┬───────────────┐
│    orgunit     │ decade │ salary_decade │
│    varchar     │ double │ decimal(38,2) │
├────────────────┼────────┼───────────────┤
│ Company        │ 2000.0 │     180000.00 │
│ Administration │ 2000.0 │     120000.00 │
│ HR             │ 2000.0 │     135000.00 │
│ HR             │ 2010.0 │      38000.00 │
│ Accounting     │ 2000.0 │     130000.00 │
│ Accounting    

In [97]:
q = q19
print(f"q19:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q19:
create a SQL query that outputs the following columns:
- name, salary from table employee
only employees who earn more than the average of all employees
sorted by eid


SELECT LastName AS name, Salary
FROM Employee
WHERE Salary > (SELECT AVG(Salary) FROM Employee)
ORDER BY EID

┌──────────┬──────────────┐
│   name   │    Salary    │
│ varchar  │ decimal(9,2) │
├──────────┼──────────────┤
│ Patil    │    180000.00 │
│ Durmaz   │    120000.00 │
│ Blaschke │     93000.00 │
│ Li       │     89000.00 │
│ Umarani  │    142000.00 │
│ Ortega   │     90000.00 │
│ Jadhav   │     91000.00 │
└──────────┴──────────────┘



In [98]:
q = q20
print(f"q20:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q20:
create a SQL query that outputs the following columns:
- eid, name, salary from table employee
- average salary of all employees, alias as asalary
- diff to average salary, alias diff_asalary
only employees who earn more than the average of all employees
use cte for the calculation
round calculation to 2 decimal places
sorted by eid


WITH avg_salary AS (
    SELECT ROUND(AVG(Salary), 2) as avg_sal
    FROM Employee
)
SELECT 
    e.EID as eid,
    e.LastName as name,
    ROUND(e.Salary, 2) as salary,
    a.avg_sal as asalary,
    ROUND(e.Salary - a.avg_sal, 2) as diff_asalary
FROM Employee e
CROSS JOIN avg_salary a
WHERE e.Salary > a.avg_sal
ORDER BY e.EID;

┌───────┬──────────┬──────────────┬──────────┬──────────────┐
│  eid  │   name   │    salary    │ asalary  │ diff_asalary │
│ int32 │ varchar  │ decimal(9,2) │  double  │    double    │
├───────┼──────────┼──────────────┼──────────┼──────────────┤
│   101 │ Patil    │    180000.00 │ 67333.33 │    112666.67 │
│   102 │ Durmaz  

In [99]:
q = q21
print(f"q21:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q21:
create a SQL query that outputs the following columns:
- name with alias orgunit from table orgunit
- eid, name, salary from table employee
- average salary of all employees in the same department, alias asalary_dept
- diff to average salary, alias diff_asalary_dept
use cte for the calculation
round calculation to 2 decimal places
sorted by ouid, eid


WITH dept_avg_salary AS (
    SELECT 
        OUID,
        ROUND(AVG(Salary), 2) AS avg_salary
    FROM Employee
    GROUP BY OUID
)
SELECT 
    ou.Name AS orgunit,
    e.EID,
    e.LastName AS name,
    e.Salary,
    d.avg_salary AS asalary_dept,
    ROUND(e.Salary - d.avg_salary, 2) AS diff_asalary_dept
FROM Employee e
JOIN OrgUnit ou ON e.OUID = ou.OUID
JOIN dept_avg_salary d ON e.OUID = d.OUID
ORDER BY e.OUID, e.EID;

┌────────────────┬───────┬──────────┬──────────────┬──────────────┬───────────────────┐
│    orgunit     │  EID  │   name   │    Salary    │ asalary_dept │ diff_asalary_dept │
│    varchar     │ int32 │ varchar  │

In [100]:
q = q22
print(f"q22:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q22:
create a SQL query that outputs the following columns:
- eid, lastname from table employee
only employees who work in a department that is led by 'Umarani'


SELECT e.eid, e.lastname
FROM Employee e
JOIN OrgUnit ou ON e.ouid = ou.ouid
JOIN Employee head_emp ON ou.head = head_emp.eid
WHERE head_emp.lastname = 'Umarani'

┌───────┬──────────┐
│  EID  │ LastName │
│ int32 │ varchar  │
├───────┼──────────┤
│   109 │ Umarani  │
│   114 │ Popov    │
│   115 │ Kumar    │
│   116 │ Krause   │
│   117 │ Oezdem   │
│   118 │ Okeke    │
└───────┴──────────┘



In [101]:
q = q23
print(f"q23:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q23:
create a SQL query that outputs the following columns:
- ouid, name with alias orgunit from table orgunit
- number of employees in the orgunit, alias num_employees
- only orgunits with the least employees
sorted by ouid


WITH orgunit_counts AS (
    SELECT 
        o.ouid, 
        o.name, 
        COUNT(e.eid) AS employee_count
    FROM 
        OrgUnit o
    LEFT JOIN 
        Employee e ON o.ouid = e.ouid
    GROUP BY 
        o.ouid, o.name
)
SELECT 
    ouid, 
    name AS orgunit, 
    employee_count AS num_employees
FROM 
    orgunit_counts
WHERE 
    employee_count = (SELECT MIN(employee_count) FROM orgunit_counts)
ORDER BY 
    ouid;

┌───────┬────────────────┬───────────────┐
│ OUID  │    orgunit     │ num_employees │
│ int32 │    varchar     │     int64     │
├───────┼────────────────┼───────────────┤
│    11 │ Company        │             1 │
│    12 │ Administration │             1 │
│    15 │ Production     │             1 │
└───────┴────────────────┴───────────────┘

In [106]:
q = q24
print(f"q24:{q}")
sql = llmu.chat_text_to_sql(glm46, schema, q)
print(sql)
ddbu.sql_print(con, sql)

q24:
create a SQL query that outputs the following columns:
- ouid, name with alias orgunit from table orgunit
- average salary of all employees in that orgunit, alias asal
only orgunit with max avg salary
sorted by ouid


WITH orgunit_avg AS (
    SELECT 
        o.OUID, 
        o.Name AS orgunit, 
        AVG(e.Salary) AS asal
    FROM OrgUnit o
    JOIN Employee e ON o.OUID = e.OUID
    GROUP BY o.OUID, o.Name
)
SELECT OUID, orgunit, asal
FROM orgunit_avg
WHERE asal = (SELECT MAX(asal) FROM orgunit_avg)
ORDER BY OUID;

┌───────┬─────────┬──────────┐
│ OUID  │ orgunit │   asal   │
│ int32 │ varchar │  double  │
├───────┼─────────┼──────────┤
│    11 │ Company │ 180000.0 │
└───────┴─────────┴──────────┘

