## Import libraries.

In [1]:
%load_ext pydough.jupyter_extensions

import pydough

%load_ext pydough.jupyter_extensions

#Necessary for comparison
import pandas as pd
from pandas.testing import assert_frame_equal, assert_series_equal
import re
import dfcompare

import collections
import numpy as np
import sqlite3 as sql
import os

pd.options.display.float_format = '{:.6f}'.format

The pydough.jupyter_extensions extension is already loaded. To reload it, use:
  %reload_ext pydough.jupyter_extensions


## Load database.

In [2]:
#YOUR .SQL FILE TO CREATE THE DATABASE, COPY IT TO THIS FOLDER.
SQL_filename = 'car_dealership.sql'

#METADATA FOR THE GRAPH .JSON
metadata_path = "../metadata/car_dealership_graphs.json"
#GRAPH NAME
graph_name = "Dealership"

#DESIRED DATABASE NAME
DB_name = "DATABASE.db"



with open(SQL_filename, 'r') as sql_file:
    sql_script = sql_file.read()

os.remove(DB_name)
connection = sql.connect(DB_name)
cursor = connection.cursor()
cursor.executescript(sql_script)

pydough.active_session.load_metadata_graph(metadata_path, graph_name)
pydough.active_session.connect_database("sqlite", database=DB_name)

DatabaseContext(connection=<pydough.database_connectors.database_connector.DatabaseConnection object at 0x7fd150f15ca0>, dialect=<DatabaseDialect.SQLITE: 'sqlite'>)

## Info

In [3]:
graph = pydough.active_session.metadata

print(pydough.explain_structure(graph))

print(pydough.explain(graph["Cars"], verbose=True))

Structure of PyDough graph: Dealership

  Cars
  ├── _id
  ├── color
  ├── cost
  ├── crtd_ts
  ├── engine_type
  ├── make
  ├── model
  ├── transmission
  ├── vin_number
  ├── year
  ├── inventory_snapshots [multiple InventorySnapshots] (reverse of InventorySnapshots.car)
  └── sale_records [multiple Sales] (reverse of Sales.car)

  Customers
  ├── _id
  ├── address
  ├── city
  ├── crtd_ts
  ├── email
  ├── first_name
  ├── last_name
  ├── phone
  ├── state
  ├── zip_code
  └── car_purchases [multiple Sales] (reverse of Sales.customer)

  InventorySnapshots
  ├── _id
  ├── car_id
  ├── crtd_ts
  ├── is_in_inventory
  ├── snapshot_date
  └── car [one member of Cars] (reverse of Cars.inventory_snapshots)

  PaymentsReceived
  ├── _id
  ├── crtd_ts
  ├── payment_amount
  ├── payment_date
  ├── payment_method
  ├── sale_id
  └── sale_record [one member of Sales] (reverse of Sales.payment)

  Sales
  ├── _id
  ├── car_id
  ├── crtd_ts
  ├── customer_id
  ├── sale_date
  ├── sale_price
  ├

### Notes: 

Queries 5 and 10, 11, 12, 13, 14, 15 are solved parcially because of lack of date manipulation.

Also check 5, 14 and 15 again. 

17 has two queries on the SQL part.

A table was missing from the metadata: PaymentsMade.



# Pydough: Car Dealership Queries.

Below, we demonstrate a series of queries typically executed on the car dealership database.
Each query has been converted from SQL to PyDough manually.

### 1. SQLite BasicQuery 1

Return the car ID, make, model and year for cars that have no sales records. By doing a left join from the cars to sales table.

```SQL
SELECT c.id AS car_id, c.make, c.model, c.year 
FROM cars AS c 
LEFT JOIN sales AS s 
ON c.id = s.car_id 
WHERE s.car_id IS NULL;
```



In [29]:
# Define la consulta SQL en PyDough
query = """
SELECT c._id AS car_id, c.make, c.model, c.year 
FROM cars AS c 
LEFT JOIN sales AS s 
ON c._id = s.car_id 
WHERE s.car_id IS NULL;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,car_id,make,model,year
0,11,Mazda,CX-5,2022
1,12,Hyundai,Tucson,2023
2,13,Kia,Sorento,2021
3,14,Jeep,Wrangler,2022
4,15,GMC,Sierra 1500,2023
5,16,Ram,1500,2022
6,17,Mercedes-Benz,E-Class,2021
7,18,Volkswagen,Tiguan,2022
8,19,Volvo,XC90,2023
9,20,Porsche,911,2022


In [28]:
%%pydough
result = Cars(_id, make, model, year).WHERE(HASNOT(sale_records))
pydough.to_df(result)

Unnamed: 0,_id,make,model,year
0,11,Mazda,CX-5,2022
1,12,Hyundai,Tucson,2023
2,13,Kia,Sorento,2021
3,14,Jeep,Wrangler,2022
4,15,GMC,Sierra 1500,2023
5,16,Ram,1500,2022
6,17,Mercedes-Benz,E-Class,2021
7,18,Volkswagen,Tiguan,2022
8,19,Volvo,XC90,2023
9,20,Porsche,911,2022


In [17]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

True

### 2. SQLite BasicQuery 2

Return the distinct list of customer IDs that have made a purchase, based on joining the customers and sales tables.

```SQL
SELECT DISTINCT c.id AS customer_id 
FROM customers  AS c 
JOIN sales  AS s 
ON c.id = s.customer_id;
```



In [4]:
# Define la consulta SQL en PyDough
query = """
SELECT DISTINCT c._id AS customer_id 
FROM customers  AS c 
JOIN sales  AS s 
ON c._id = s.customer_id;
"""
sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,customer_id
0,3
1,5
2,2
3,9
4,7
5,1
6,6
7,10
8,8
9,4


In [6]:
%%pydough
result = Customers.WHERE(HAS(car_purchases))(_id)

pydough.to_df(result)

Unnamed: 0,_id
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8
8,9
9,10


In [7]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

np.True_

### 3. SQLite BasicQuery 3

Return the distinct list of salesperson IDs that have received a cash payment, based on joining the salespersons, sales and payments_received tables.

```SQL
SELECT DISTINCT s.id AS salesperson_id 
FROM salespersons AS s 
JOIN sales AS sa 
ON s.id = sa.salesperson_id 
JOIN payments_received AS p 
ON sa.id = p.sale_id 
WHERE p.payment_method = 'cash';
```



In [8]:
# Define la consulta SQL en PyDough
query = """
SELECT DISTINCT s._id AS salesperson_id 
FROM salespersons AS s 
JOIN sales AS sa 
ON s._id = sa.salesperson_id 
JOIN payments_received AS p 
ON sa._id = p.sale_id 
WHERE p.payment_method = 'cash';
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,salesperson_id
0,4
1,7
2,1


In [9]:
%%pydough
result = Salespersons.WHERE(
    HAS(
        sales_made.WHERE(
            HAS(payment.WHERE(payment_method == "cash"))
        )
    )
)(_id)


pydough.to_df(result)

Unnamed: 0,_id
0,1
1,4
2,7


In [10]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

np.True_

### 4. SQLite BasicQuery 4

Return the salesperson ID, first name and last name for salespersons that have no sales records, by doing a left join from the salespersons to sales table.

```SQL
SELECT s.id AS salesperson_id, s.first_name, s.last_name 
FROM salespersons AS s 
LEFT JOIN sales AS sa 
ON s.id = sa.salesperson_id 
WHERE sa.salesperson_id IS NULL;
```



In [20]:
# Define la consulta SQL en PyDough
query = """
SELECT s._id AS salesperson_id, s.first_name, s.last_name 
FROM salespersons AS s 
LEFT JOIN sales AS sa 
ON s._id = sa.salesperson_id 
WHERE sa.salesperson_id IS NULL;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,salesperson_id,first_name,last_name
0,5,David,Wilson
1,8,Olivia,Thomas
2,9,James,Jackson
3,10,Sophia,White
4,11,Robert,Johnson
5,12,Jennifer,Davis
6,13,Jessica,Rodriguez


In [41]:
%%pydough
result = Salespersons(_id, first_name, last_name).WHERE(HASNOT(sales_made))

pydough.to_df(result)


Unnamed: 0,_id,first_name,last_name
0,5,David,Wilson
1,8,Olivia,Thomas
2,9,James,Jackson
3,10,Sophia,White
4,11,Robert,Johnson
5,12,Jennifer,Davis
6,13,Jessica,Rodriguez


In [11]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

np.True_

### 5. SQLite BasicQuery 5 (Date)

Return the top 5 salespersons by number of sales in the past 30 days? Return their first and last name, total sales count and total revenue amount.

```SQL
SELECT sp.first_name, sp.last_name, COUNT(s.id) AS total_sales, SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN salespersons AS sp 
ON s.salesperson_id = sp.id 
WHERE s.sale_date >= DATE('now', '-30 days') 
GROUP BY sp.first_name, sp.last_name, sp.id 
ORDER BY total_sales DESC LIMIT 5;
```

DATE OPERATIONS ARE NOT AVAILABLE


In [82]:
# Define la consulta SQL en PyDough
query = """
SELECT sp.first_name, sp.last_name, COUNT(s._id) AS total_sales, SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN salespersons AS sp 
ON s.salesperson_id = sp._id 
WHERE s.sale_date >= DATE('now', '-30 days') 
GROUP BY sp.first_name, sp.last_name, sp._id 
ORDER BY total_sales DESC LIMIT 5;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,first_name,last_name,total_sales,total_revenue
0,Jane,Smith,3,140000.0
1,Michael,Johnson,2,69700.0
2,Emily,Brown,1,29500.0
3,John,Doe,1,26000.0
4,Sarah,Taylor,1,43500.0


In [None]:
%%pydough 

sales_person_last_month = Salespersons.WHERE(
    HAS(sales_made.WHERE(sale_date >= '2025-01-10'))) #There is no way to calculate 30 days ago

result = PARTITION(sales_person_last_month,name="sp", by=(_id, first_name, last_name))(
    first_name,
    last_name,
    total_sales=COUNT(sp.sales_made),
    total_revenue=SUM(sp.sales_made.sale_price)
).TOP_K(5, by=total_sales.DESC())


pydough.to_df(result)


In [15]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

np.False_

### 6. SQLite BasicQuery 6

Return the top 5 states by total revenue, showing the number of unique customers and total revenue (based on sale price) for each state.

```SQL
SELECT c.state, COUNT(DISTINCT s.customer_id) AS unique_customers, SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN customers AS c 
ON s.customer_id = c.id 
GROUP BY c.state 
ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC LIMIT 5;
```



In [21]:
# Define la consulta SQL en PyDough
query = """
SELECT c.state, COUNT(DISTINCT s.customer_id) AS unique_customers, SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN customers AS c 
ON s.customer_id = c._id 
GROUP BY c.state 
ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC LIMIT 5;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,state,unique_customers,total_revenue
0,CA,3,257000.0
1,TX,3,198500.0
2,IL,1,124500.0
3,WA,1,90000.0
4,FL,1,54900.0


In [19]:
%%pydough
result = PARTITION(Customers, name="c", by=state)(
    state,
    total_customers=COUNT(c._id),
    total_revenue=SUM(c.car_purchases.sale_price)
).TOP_K(5, by=total_revenue.DESC())

pydough.to_df(result)


Unnamed: 0,state,total_customers,total_revenue
0,CA,4,257000.0
1,TX,3,198500.0
2,IL,1,124500.0
3,WA,1,90000.0
4,FL,1,54900.0


In [22]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

np.False_

### 7. SQLite BasicQuery 7

What are the top 3 payment methods by total payment amount received? Return the payment method, total number of payments and total amount.

```SQL
SELECT payment_method, COUNT(*) AS total_payments, 
SUM(payment_amount) AS total_amount 
FROM payments_received 
GROUP BY payment_method 
ORDER BY CASE WHEN total_amount IS NULL THEN 1 ELSE 0 END DESC, total_amount DESC LIMIT 3;
```



In [26]:
# Define la consulta SQL en PyDough
query = """
SELECT payment_method, COUNT(*) AS total_payments, 
SUM(payment_amount) AS total_amount 
FROM payments_received 
GROUP BY payment_method 
ORDER BY CASE WHEN total_amount IS NULL THEN 1 ELSE 0 END DESC, total_amount DESC LIMIT 3;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,payment_method,total_payments,total_amount
0,credit_card,7,426500.0
1,financing,5,252700.0
2,debit_card,5,216000.0


In [27]:
%%pydough

result = PARTITION(PaymentsReceived, name="p", by=payment_method)(
    payment_method,
    total_payments=COUNT(p._id),
    total_amount=SUM(p.payment_amount), 
).ORDER_BY(
    total_amount.DESC()
).TOP_K(3, by=total_amount.DESC())

pydough.to_df(result)

Unnamed: 0,payment_method,total_payments,total_amount
0,credit_card,7,426500.0
1,financing,5,252700.0
2,debit_card,5,216000.0


In [28]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

True

### 8. SQLite BasicQuery 8 

What are the top 5 best selling car models by total revenue? Return the make, model, total number of sales and total revenue.

```SQL
SELECT c.make, c.model, COUNT(s.id) AS total_sales, 
SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN cars AS c 
ON s.car_id = c.id 
GROUP BY c.make, c.model 
ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC LIMIT 5;

```



In [31]:
# Define la consulta SQL en PyDough
query = """
SELECT c.make, c.model, COUNT(s._id) AS total_sales, 
SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN cars AS c 
ON s.car_id = c._id 
GROUP BY c.make, c.model 
ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC LIMIT 5;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,make,model,total_sales,total_revenue
0,Ford,Mustang,5,233500.0
1,Tesla,Model 3,4,184500.0
2,Audi,A4,2,81500.0
3,BMW,X5,1,63000.0
4,Subaru,Outback,2,59500.0


In [32]:
%%pydough
result = Cars(
    make,
    model,
    total_sales=COUNT(sale_records._id),
    total_revenue=SUM(sale_records.sale_price)
).TOP_K(5, by=total_revenue.DESC())

pydough.to_df(result)


Unnamed: 0,make,model,total_sales,total_revenue
0,Ford,Mustang,5,233500.0
1,Tesla,Model 3,4,184500.0
2,Audi,A4,2,81500.0
3,BMW,X5,1,63000.0
4,Subaru,Outback,2,59500.0


In [33]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

True

### 9. SQLite BasicQuery 9

What are the total number of customer signups for the top 2 states? Return the state and total signups, starting from the top.

```SQL
SELECT c.make, c.model, COUNT(s.id) AS total_sales, 
SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN cars AS c 
ON s.car_id = c.id 
GROUP BY c.make, c.model 
ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC LIMIT 5;
```



In [None]:
# Define la consulta SQL en PyDough
query = """
SELECT c.make, c.model, COUNT(s._id) AS total_sales, 
SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN cars AS c 
ON s.car_id = c._id 
GROUP BY c.make, c.model 
ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC LIMIT 5;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,payment_method,total_payments,total_amount
0,credit_card,7,426500.0
1,financing,5,252700.0
2,debit_card,5,216000.0


In [None]:
%%pydough
grouped_customers = PARTITION(Customers, name="grouped", by=state)(
    state,
    total_signups=COUNT(grouped._id) 
).TOP_K(2, by=total_signups.DESC())

pydough.to_df(grouped_customers)



Unnamed: 0,state,total_signups
0,CA,4
1,TX,3


In [34]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

True

### 10. SQLite BasicQuery 10 (Date)

Who were the top 3 sales representatives by total revenue in the past 3 months, inclusive of today's date? Return their first name, last name, total number of sales and total revenue. Note that revenue refers to the sum of sale_price in the sales table.

```SQL
SELECT c.first_name, c.last_name, COUNT(s.id) AS total_sales, 
SUM(s.sale_price) AS total_revenue 
FROM sales AS s 
JOIN salespersons AS c ON s.salesperson_id = c.id 
WHERE s.sale_date >= DATE('now', '-3 months') 
GROUP BY c.first_name, c.last_name 
ORDER BY total_revenue DESC LIMIT 3;
```

PYDOUGH DOESN'T SUPPORT DATE OPERATIONS.



In [None]:
%%pydough

result = Salespersons(
    first_name,
    last_name,
    total_sales=COUNT(sales_made.WHERE(sale_date >= "2023-11-01")._id), #Should be a calculated 3 months ago.
    total_revenue=SUM(sales_made.WHERE(sale_date >= "2023-11-01").sale_price)  
).TOP_K(3, by=total_revenue.DESC())

pydough.to_df(result)


Unnamed: 0,first_name,last_name,total_sales,total_revenue
0,John,Doe,4,168000.0
1,Jane,Smith,3,140000.0
2,Michael,Johnson,2,69700.0


### 11. SQLite Generated Query 1 (Date)

Return the name and phone number of the salesperson with the shortest time from being hired to getting fired. Return the number of days he/she was employed for.

```SQL
SELECT s.first_name, s.last_name, s.phone, julianday(s.termination_date) - julianday(s.hire_date) AS days_employed 
FROM salespersons AS s 
ORDER BY CASE WHEN days_employed IS NULL THEN 1 ELSE 0 END, days_employed ASC LIMIT 1;
```



In [35]:
# Define la consulta SQL en PyDough
query = """
SELECT s.first_name, s.last_name, s.phone, julianday(s.termination_date) - julianday(s.hire_date) AS days_employed 
FROM salespersons AS s 
ORDER BY CASE WHEN days_employed IS NULL THEN 1 ELSE 0 END, days_employed ASC LIMIT 1;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,first_name,last_name,phone,days_employed
0,Olivia,Thomas,(333)-415-0000,181.0


In [None]:
%%pydough

result = Salespersons.WHERE(PRESENT(termination_date))(
    first_name,
    last_name,
    phone,
    termination_date,
    hire_date,
    days_employed=(DAY(termination_date) - DAY(hire_date)) #Doesn't work!
).TOP_K(1, by=days_employed.ASC())

pydough.to_df(result)


Unnamed: 0,first_name,last_name,phone,termination_date,hire_date,days_employed
0,Emily,Brown,(444)-111-2222,2025-01-10,2024-02-10,0


In [37]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

False

### 12. SQLite Generated Query 2 (PaymentsMade)

Return the number of payments made on weekends to the vendor named 'Utility Company'

```SQL
SELECT COUNT(*) AS weekend_payments 
FROM payments_made 
WHERE vendor_name = 'Utility Company' 
AND strftime('%w', payment_date) IN ('0', '6');
```



In [38]:
# Define la consulta SQL en PyDough
query = """
SELECT COUNT(*) AS weekend_payments 
FROM payments_made 
WHERE vendor_name = 'Utility Company' 
AND strftime('%w', payment_date) IN ('0', '6');
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,weekend_payments
0,1


In [None]:
%%pydough

result = PaymentsMade.WHERE(
    (vendor_name == "Utility Company") & (weekend_filter) #There is no way to calculate weekends.
)(
    weekend_payments=COUNT(_id)
)

pydough.to_df(result)

In [None]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

### 13. SQLite Generated Query 3 (Date)

Show me the daily total amount of payments received in the whole of the previous ISO week not including the current week, split by the payment_method.


```SQL
SELECT payment_date, payment_method, SUM(payment_amount) AS total_amount 
FROM payments_received 
WHERE payment_date >= DATE('now',  '-' || ((strftime('%w', 'now') + 6) % 7) || ' days', '-7 days') 
AND payment_date < DATE('now',  '-' || ((strftime('%w', 'now') + 6) % 7) || ' days') 
GROUP BY payment_date, payment_method ORDER BY payment_date DESC, payment_method ASC;
```



In [None]:
%%pydough

result = PaymentsReceived.WHERE(
    (payment_date >= start_date) & (payment_date <= end_date) #There is no way to calculate start_date or end_date
)(
    payment_date,
    payment_method,
    total_amount=SUM(payment_amount)
).ORDER_BY(payment_date.DESC(), payment_method.ASC())

pydough.to_df(result) 

In [None]:
pydough_output = pydough.to_df(result)

dfcompare.compare_df(pydough_output, sql_output, query_category="a", question="a")

### 14. SQLite Generated Query 3 !!!!

What were the total quarterly sales in 2023 grouped by customer's state? Represent each quarter as the first date in the quarter.

```SQL
SELECT CASE WHEN strftime('%m', s.sale_date) BETWEEN '01' AND '03' THEN '2023-01-01' 
WHEN strftime('%m', s.sale_date) BETWEEN '04' AND '06' THEN '2023-04-01' 
WHEN strftime('%m', s.sale_date) BETWEEN '07' AND '09' THEN '2023-07-01' ELSE '2023-10-01' END AS quarter, 
c.state, SUM(s.sale_price) AS total_sales 
FROM sales AS s 
JOIN customers AS c 
ON s.customer_id = c.id 
WHERE strftime('%Y', s.sale_date) = '2023' 
GROUP BY c.state, quarter 
HAVING SUM(s.sale_price) > 0 
ORDER BY quarter, c.state;
```



In [None]:
%%pydough
sales_with_state = Sales.WHERE(YEAR(sale_date) == 2023)(
    sale_price,
    sale_date,
    customer_state=customer.state, 
)

result = PARTITION(sales_with_state, name="s", by=(customer_state))(
    customer_state=s.customer_state, 
    total_sales=SUM(s.sale_price)
).WHERE(total_sales > 0)(
    quarter=IFF(
        MONTH(s.sale_date) <= 3, "2023-01-01",
        IFF(MONTH(s.sale_date) <= 6, "2023-04-01",
        IFF(MONTH(s.sale_date) <= 9, "2023-07-01", "2023-10-01"))
    ) 
).ORDER_BY(quarter.ASC(), customer_state.ASC())

pydough.to_df(result)




### 15. SQLite Generated Query 4

Which cars were in inventory in the latest snapshot for march 2023? Return the car id, make, model, and year. Cars are considered to be in inventory" if is_in_inventory is True."

```SQL
WITH latest_snapshot AS (SELECT MAX(snapshot_date) AS snapshot_date 
FROM inventory_snapshots 
WHERE snapshot_date BETWEEN '2023-03-01' AND '2023-03-31'), latest_snapshot_data AS 
(SELECT inv.car_id 
FROM inventory_snapshots AS inv 
JOIN latest_snapshot AS ls 
ON inv.snapshot_date = ls.snapshot_date WHERE inv.is_in_inventory = TRUE) 
SELECT c.id, c.make, c.model, c.year 
FROM cars AS c 
JOIN latest_snapshot_data AS lsd 
ON c.id = lsd.car_id;
```



In [None]:
%%pydough

# Step 1: Find the latest snapshot date within March 2023
latest_snapshot = InventorySnapshots.WHERE(
    (snapshot_date >= "2023-03-01") & (snapshot_date <= "2023-03-31")
)(
    latest_snapshot_date=MAX(snapshot_date)  # Get the latest snapshot date
)

# Step 2: Find inventory records for the latest snapshot
latest_inventory = InventorySnapshots.WHERE(
    (snapshot_date == latest_snapshot.latest_snapshot_date) & (is_in_inventory == 1)
)(
    car
)

# Step 3: Retrieve car details for the cars found in the latest snapshot
result = latest_inventory.car(
    _id,
    make,
    model,
    year
)

pydough.to_df(result)


### 16. SQLite Advanced Query 1 (DATE)

For sales with sale price over $30,000, how many payments were received in total and on weekends in each of the last 8 calendar weeks (excluding the current week)? Return the week (as a date), total payments received, and weekend payments received in ascending order.

Weekend days are Saturday (6) and Sunday (0). Truncate date to week for aggregation. A week begins on 'weekday 1'

To calculate the average days between sale date and payment received date, join the sales and payments received tables. Weekend days are Saturday (6) and Sunday (0). Truncate date to week for aggregation. When using car makes, model names, engine_type and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards. To get the total sales amount per salesperson, join the salespersons and sales tables, group by salesperson, and sum the sale_price

```SQL
SELECT date(p.payment_date,  '-' || ((strftime('%w', p.payment_date) + 6) % 7) || ' days') AS week, 
COUNT(p.id) AS total_payments, COUNT(CASE WHEN strftime('%w', p.payment_date) IN ('0', '6') THEN 1 END) AS weekend_payments 
FROM payments_received AS p 
JOIN sales AS s ON p.sale_id = s.id 
WHERE s.sale_price > 30000 AND p.payment_date >= date('now',  '-' || ((strftime('%w', 'now') + 6) % 7) || ' days', '-56 days') 
AND p.payment_date < date('now',  '-' || ((strftime('%w', 'now') + 6) % 7) || ' days') 
GROUP BY week ORDER BY week ASC;
```



In [None]:
%%pydough
# Step 1: Filter sales with sale price > 30,000
high_value_sales = Sales.WHERE(sale_price > 30000)(
    _id,
    sale_price
)

# Step 2: Join payments to these sales
valid_payments = PaymentsReceived.WHERE(HAS(high_value_sales))(
    payment_date,
    _id
)

# Step 3: Extract `month` as an alternative to `week` (since we cannot compute weeks)
payments_grouped = PARTITION(valid_payments, name="p", by=MONTH(payment_date))(
    month=MONTH(p.payment_date),  # Alternative to week-based grouping
    total_payments=COUNT(p._id)  # Total payments in each month
)

pydough.to_df(payments_grouped)


### 17. SQLite Advanced Query 2 (DATE)

How many sales did each salesperson make in the past 30 days, inclusive of today's date? Return their ID, first name, last name and number of sales made, ordered from most to least sales.

To get the number of sales made by each salesperson in the past 30 days, join the salespersons and sales tables and filter for sales in the last 30 days.

"When using car makes, model names, engine_type, and vin_number, ensure matching is case-insensitive and allows for partial matches using LIKE with wildcards.
To get the number of sales made by each salesperson in the past 30 days, join the salespersons and sales tables and filter for sales in the last 30 days.
ASP = Calculate the average sale price without specifying the period
GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost"

```SQL
WITH recent_sales AS (
    SELECT sp._id, sp.first_name, sp.last_name, COUNT(s._id) AS num_sales
    FROM salespersons AS sp
    LEFT JOIN sales AS s ON sp._id = s.salesperson_id
    WHERE s.sale_date >= DATE('now', '-30 days')
    GROUP BY sp._id
) 
SELECT _id, first_name, last_name, num_sales FROM recent_sales
ORDER BY num_sales DESC;
```


In [None]:
query = """
WITH recent_sales AS (
    SELECT sp._id, sp.first_name, sp.last_name, COUNT(s._id) AS num_sales
    FROM salespersons AS sp
    LEFT JOIN sales AS s ON sp._id = s.salesperson_id
    WHERE s.sale_date >= DATE('now', '-30 days')
    GROUP BY sp._id
) 
SELECT _id, first_name, last_name, num_sales FROM recent_sales
ORDER BY num_sales DESC;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,_id,first_name,last_name,num_sales
0,2,Jane,Smith,3
1,3,Michael,Johnson,2
2,1,John,Doe,1
3,4,Emily,Brown,1
4,6,Sarah,Taylor,1


In [25]:
%%pydough

result = Salespersons(
    _id,
    first_name,
    last_name,
    num_sales=COUNT(sales_made.WHERE(sale_date >= "2025-01-10")._id)
    ).WHERE(
        HAS(sales_made.WHERE(sale_date >= "2025-01-10")) 
).ORDER_BY(num_sales.DESC())

pydough.to_df(result)


Unnamed: 0,_id,first_name,last_name,num_sales
0,2,Jane,Smith,3
1,3,Michael,Johnson,2
2,1,John,Doe,1
3,4,Emily,Brown,1
4,6,Sarah,Taylor,1


### 18. SQLite Advanced Query 3

How many sales were made for each car model that has 'M5' in its VIN number? Return the make, model and number of sales.

When using car makes, model names, engine_type and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards.

To determine the total sales amount for each salesperson, combine data from the salespersons and sales tables, grouping by salesperson and summing the sale_price
When using car makes, model names, engine_type and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards.
To calculate the average selling price, join the sales and cars tables, and divide the total sales amount by the number of sales
For understanding the number of sales achieved by each salesperson within a specified period, merge the salespersons and sales tables and apply a filter based on the given time frame.

```SQL
SELECT c.make, c.model, COUNT(s.id) AS num_sales 
FROM cars AS c 
LEFT JOIN sales AS s ON c.id = s.car_id 
WHERE LOWER(c.vin_number) 
LIKE '%m5%' 
GROUP BY c.make, c.model;
```



In [119]:
query = """
SELECT c.make, c.model, COUNT(s._id) AS num_sales 
FROM cars AS c 
LEFT JOIN sales AS s ON c._id = s.car_id 
WHERE LOWER(c.vin_number) 
LIKE '%m5%' 
GROUP BY c.make, c.model;
"""

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,make,model,num_sales
0,Ford,Mustang,5


In [20]:
%%pydough

result = Cars(
    make, 
    model,
    num_sales=COUNT(sale_records._id)
    ).WHERE(
        LIKE(vin_number, "%m5%") 
).ORDER_BY(num_sales.DESC())

pydough.to_df(result)


Unnamed: 0,make,model,num_sales
0,Ford,Mustang,5


### 19. SQLite Advanced Query 4 (Date)

How many Toyota cars were sold in the last 30 days inclusive of today? Return the number of sales and total revenue.

To calculate the average days between sale date and payment received date, join the sales and payments_received tables
To get the list of cars that were sold and their sale price, join the cars and sales tables
Last 30 days = DATE('now', -'30 days') to DATE('now'). Always join sales with cars before using the sales table.
When using car makes, model names, engine_type, and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards.

```SQL
SELECT COUNT(s.id) AS num_sales, SUM(s.sale_price) AS total_revenue FROM sales AS s 
JOIN cars AS c 
ON s.car_id = c.id
WHERE c.make = 'Toyota' AND s.sale_date BETWEEN DATE('now', '-30 days') AND DATE('now');
```



In [22]:
%%pydough

result = Cars(
    num_sales=COUNT(sale_records.WHERE(sale_date >= "2024-01-01")._id),  
    total_revenue=SUM(sale_records.WHERE(sale_date >= "2024-01-01").sale_price)
).WHERE(
    LOWER(make) == "toyota" 
)

pydough.to_df(result)

Unnamed: 0,num_sales,total_revenue
0,1,28900.0


In [157]:
%%pydough

selected_sales = Sales.WHERE(
 (sale_date >= "2024-01-01") &
 (LOWER(car.make) == 'toyota')
)

result = Dealership(
 num_sales=COUNT(selected_sales),
 total_revenue=SUM(selected_sales.sale_price)
)

pydough.to_df(result)


Unnamed: 0,num_sales,total_revenue
0,1,28900.0


### 20. SQLite Advanced Query 5

Return the first name, last name, total sales amount, number of sales, and SR for each salesperson.

SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold.

```SQL
WITH salesperson_sales AS (
    SELECT 
        salesperson_id, 
        SUM(sale_price) AS total_sales, 
        COUNT(*) AS num_sales 
    FROM sales 
    GROUP BY salesperson_id
) 
SELECT 
    s.first_name, 
    s.last_name, 
    ss.total_sales, 
    ss.num_sales, 
    RANK() OVER (
        ORDER BY 
            CASE WHEN ss.total_sales IS NULL THEN 1 ELSE 0 END DESC, 
            ss.total_sales DESC
    ) AS sales_rank 
FROM salesperson_sales AS ss 
JOIN salespersons AS s ON ss.salesperson_id = s._id;

```



In [24]:
%%pydough

total_sales= SUM(sales_made.sale_price)

result = Salespersons(
    _id,
    first_name, 
    last_name,
    total_sales=SUM(sales_made.sale_price),  
    num_sales=COUNT(sales_made._id),
    sales_rank=RANKING(by=total_sales.DESC())  
).ORDER_BY(total_sales.DESC())

pydough.to_df(result)



Unnamed: 0,_id,first_name,last_name,total_sales,num_sales,sales_rank
0,2,Jane,Smith,278000.0,6,1
1,1,John,Doe,215000.0,5,2
2,6,Sarah,Taylor,116000.0,3,3
3,3,Michael,Johnson,96500.0,3,4
4,4,Emily,Brown,79500.0,3,5
5,7,Daniel,Anderson,66900.0,2,6
6,5,David,Wilson,0.0,0,7
7,8,Olivia,Thomas,0.0,0,8
8,9,James,Jackson,0.0,0,9
9,10,Sophia,White,0.0,0,10


### 21. SQLite Advanced Query 6

Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.

"Recall that a car can have multiple entries in the inventory_snapshot table. 
TSC = Count of sales within a specified period
MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month.
ASP = Mean sale price for a designated start period
When getting a car's inventory status, always take the latest status from the inventory_snapshots table"

```SQL
WITH latest_inventory_status AS (
    SELECT 
        car_id, 
        is_in_inventory, 
        ROW_NUMBER() OVER (
            PARTITION BY car_id 
            ORDER BY 
                CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, 
                snapshot_date DESC
        ) AS rn
    FROM inventory_snapshots
) 
SELECT 
    c.make, 
    c.model, 
    MAX(s.sale_price) AS highest_sale_price 
FROM cars AS c 
JOIN sales AS s ON c.id = s.car_id 
JOIN latest_inventory_status AS lis ON c.id = lis.car_id 
WHERE lis.is_in_inventory = FALSE 
AND lis.rn = 1 
GROUP BY c.make, c.model 
ORDER BY 
    CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, 
    highest_sale_price DESC;

```



In [14]:
%%pydough

# Step 1: Get the latest inventory snapshot for each car
latest_inventory = PARTITION(InventorySnapshots, name="inv", by=car)(
    car=inv.car,
    snapshot_date=MAX(inv.snapshot_date),
    latest_status=FIRST(inv.is_in_inventory.ORDER_BY(inv.snapshot_date.DESC()))  # Get latest inventory status
)

# Step 2: Filter out cars that are no longer in inventory
cars_no_longer_in_inventory = latest_inventory.WHERE(latest_status == False)(
    car
)

# Step 3: Find the highest sale price for each make and model
result = Cars.WHERE(HAS(cars_no_longer_in_inventory))(
    make,
    model,
    highest_sale_price=MAX(sale_records.sale_price)  # Get max sale price per make and model
).ORDER_BY(highest_sale_price.DESC())

pydough.to_df(result)


PyDoughQDAGException: Unrecognized term of simple table collection 'Cars' in graph 'Dealership': 'InventorySnapshots'

### . SQLite Advanced Query



```SQL
```



### . SQLite Advanced Query



```SQL
```



### . SQLite Advanced Query



```SQL
```



### . SQLite Advanced Query



```SQL
```



### . SQLite Advanced Query



```SQL
```

