In [96]:
%LOAD pizza_runner_changed.db rw

"Before you start writing your SQL queries however - you might want to investigate the data, you may want to do something with some of those null values and data types in the customer_orders and runner_orders tables!"

## Tables

In [97]:
SELECT name, sql
FROM sqlite_schema
WHERE type ='table';

name,sql
runners,"CREATE TABLE runners (  ""runner_id"" INTEGER,  ""registration_date"" DATE )"
customer_orders,"CREATE TABLE customer_orders (  ""order_id"" INTEGER,  ""customer_id"" INTEGER,  ""pizza_id"" INTEGER,  ""exclusions"" VARCHAR(4),  ""extras"" VARCHAR(4),  ""order_time"" TIMESTAMP )"
runner_orders,"CREATE TABLE runner_orders (  ""order_id"" INTEGER,  ""runner_id"" INTEGER,  ""pickup_time"" VARCHAR(19),  ""distance"" VARCHAR(7),  ""duration"" VARCHAR(10),  ""cancellation"" VARCHAR(23) )"
pizza_names,"CREATE TABLE pizza_names (  ""pizza_id"" INTEGER,  ""pizza_name"" TEXT )"
pizza_recipes,"CREATE TABLE pizza_recipes (  ""pizza_id"" INTEGER,  ""toppings"" TEXT )"
pizza_toppings,"CREATE TABLE pizza_toppings (  ""topping_id"" INTEGER,  ""topping_name"" TEXT )"


In [139]:
PRAGMA table_info(clean_runner_orders);

cid,name,type,notnull,dflt_value,pk
0,order_id,INTEGER,0,,0
1,runner_id,INTEGER,0,,0
2,pickup_time,,0,,0
3,distance,,0,,0
4,duration,,0,,0
5,cancellation,,0,,0


### Table customer_orders

In [133]:
select CAST(NULL AS VARCHAR(10)) from customer_orders

CAST(NULL AS VARCHAR(10))


In [131]:
--CREATE VIEW clean_customer_orders AS

WITH clean_exclusions AS (
    SELECT order_id,
        CAST
        CASE WHEN exclusions = 'null' THEN NULL 
        ELSE CAST(exclusions AS VARCHAR(10)) END AS exclusions
    FROM customer_orders),
    
clean_extras AS (
    SELECT order_id,
        CASE WHEN extras = 'null' THEN NULL 
        ELSE CAST(extras AS VARCHAR(10)) END AS extras
    FROM customer_orders)

SELECT co.order_id, co.customer_id, 
        co.pizza_id, co.order_time, ce_minus.exclusions, ce_plus.extras
FROM customer_orders AS co
INNER JOIN clean_exclusions AS ce_minus
USING (order_id)
INNER JOIN clean_extras AS ce_plus
USING (order_id);

order_id,customer_id,pizza_id,order_time,exclusions,extras
1,101,1,2020-01-01 18:05:02,,
2,101,1,2020-01-01 19:00:52,,
3,102,1,2020-01-02 23:51:23,,
3,102,1,2020-01-02 23:51:23,,
3,102,1,2020-01-02 23:51:23,,
3,102,1,2020-01-02 23:51:23,,
3,102,2,2020-01-02 23:51:23,,
3,102,2,2020-01-02 23:51:23,,
3,102,2,2020-01-02 23:51:23,,
3,102,2,2020-01-02 23:51:23,,


### Table runner_orders

In [143]:
WITH clean_pickup_time AS (
    SELECT order_id,
        CASE WHEN pickup_time = 'null' THEN NULL 
        ELSE pickup_time END AS pickup_time
    FROM runner_orders)

datetime(pickup_time)
2020-01-01 18:15:34
2020-01-01 19:10:54
2020-01-03 00:12:37
2020-01-04 13:53:03
2020-01-08 21:10:57
2020-01-08 21:30:45
2020-01-10 00:15:02
2020-01-11 18:50:20


In [140]:
--CREATE VIEW clean_runner_orders AS

WITH clean_pickup_time AS (
    SELECT order_id, 
        CASE WHEN pickup_time = 'null' THEN NULL 
        ELSE pickup_time END AS pickup_time
    FROM runner_orders),
    
clean_distance AS (
    SELECT order_id, CAST(
        CASE WHEN distance = 'null' THEN NULL
        ELSE distance END AS distance AS INT)
    FROM runner_orders),
    
clean_duration AS (
    SELECT order_id, CAST(
        CASE WHEN duration = 'null' THEN NULL
        ELSE duration END AS duration AS INT)
    FROM runner_orders),
    
clean_cancellation AS (
    SELECT order_id, CAST(
        CASE WHEN cancellation = 'null' THEN NULL
        ELSE cancellation END AS cancellation AS VARCHAR(20))
    FROM runner_orders)
    
SELECT ro.order_id, ro.runner_id, 
        datetime(cpt.pickup_time), cdi.distance, cdu.duration, cc.cancellation
FROM runner_orders AS ro
INNER JOIN clean_pickup_time AS cpt
USING (order_id)
INNER JOIN clean_duration AS cdu
USING (order_id)
INNER JOIN clean_distance AS cdi
USING (order_id)
INNER JOIN clean_cancellation AS cc
USING (order_id);

Error: near "AS": syntax error

In [130]:
select * from clean_runner_orders

order_id,runner_id,pickup_time,distance,duration,cancellation
1,1,2020-01-01 18:15:34,20.0,32.0,
2,1,2020-01-01 19:10:54,20.0,27.0,
3,1,2020-01-03 00:12:37,13.0,20.0,
4,2,2020-01-04 13:53:03,23.0,40.0,
5,3,2020-01-08 21:10:57,10.0,15.0,
6,3,,,,Restaurant Cancellation
7,2,2020-01-08 21:30:45,25.0,25.0,
8,2,2020-01-10 00:15:02,23.0,15.0,
9,2,,,,Customer Cancellation
10,1,2020-01-11 18:50:20,10.0,10.0,


## A. Pizza Metrics

### 1. How many pizzas were ordered?

In [76]:
SELECT COUNT(*) AS nr_of_pizzas
FROM clean_customer_orders;

nr_of_pizzas
14


### 2.How many unique customer orders were made?
I assume that we are looking for unique combinations of pizza_id, exclusions, and extras.

In [84]:
WITH distinct_orders AS (
    SELECT DISTINCT pizza_id, exclusions, extras
    FROM clean_customer_orders)
    
SELECT * 
FROM distinct_orders;

pizza_id,exclusions,extras
1,,
2,,
1,4,
2,4,
1,,1
2,,
2,,1
1,,
1,4,"1, 5"
1,"2, 6","1, 4"


In [79]:
SELECT * 
FROM clean_customer_orders;

order_id,customer_id,pizza_id,exclusions,extras,order_time
1,101,1,,,2020-01-01 18:05:02
2,101,1,,,2020-01-01 19:00:52
3,102,1,,,2020-01-02 23:51:23
3,102,2,,,2020-01-02 23:51:23
4,103,1,4,,2020-01-04 13:23:46
4,103,1,4,,2020-01-04 13:23:46
4,103,2,4,,2020-01-04 13:23:46
5,104,1,,1,2020-01-08 21:00:29
6,101,2,,,2020-01-08 21:03:13
7,105,2,,1,2020-01-08 21:20:29
