In [2]:
import pypyodbc as odbc
import pandas as pd

DRIVER_NAME = 'SQL SERVER'
SERVER_NAME = 'DESKTOP-I0V76P2\SQLEXPRESS' 
DATABASE_NAME = 'pizza_runner'

connection_string = f"""
    DRIVER={{{DRIVER_NAME}}};
    SERVER={SERVER_NAME};
    DATABASE={DATABASE_NAME};
    Trust_Connection=yes;
    # uid=;
    # pwd=;
"""

def execute_query_to_df(query):
    """Executes a SQL query and returns the results as a pandas DataFrame."""

    conn = odbc.connect(connection_string)
    cursor = conn.cursor()
    cursor.execute(query)
    rows = cursor.fetchall()
    df = pd.DataFrame(rows, columns=cursor.description)
    df.columns = [col[0] for col in df.columns]
    cursor.close()
    conn.close()
    return df

def execute_update(query):
    """Executes a SQL query that doesn't return results."""

    conn = odbc.connect(connection_string)
    cursor = conn.cursor()
    cursor.execute(query)
    conn.commit()  # Commit the changes to the database
    cursor.close()
    conn.close()

In [5]:
query = """
    SELECT TABLE_NAME
    FROM INFORMATION_SCHEMA.TABLES;
"""

execute_query_to_df(query)

Unnamed: 0,table_name
0,runners
1,runner_orders
2,pizza_names
3,pizza_recipes
4,pizza_toppings
5,customer_orders


In [2]:
# Cleaning the customer_orders table

query = """
    UPDATE customer_orders
        SET 
            extras = CASE 
                        WHEN extras = 'null' THEN ''
                        WHEN extras IS NULL THEN ''
                        ELSE extras
                    END,
                exclusions = CASE
                        WHEN exclusions = 'null' THEN ''
                        ELSE exclusions
                    END;
"""

execute_update(query)

In [31]:
# Cleaning the runner_orders table

query = """
    UPDATE runner_orders
        SET
            pickup_time =   CASE 
                                WHEN pickup_time = 'null' THEN '' 
                                ELSE pickup_time 
                            END,
            distance =      RTRIM(REPLACE(distance, 'km', '')),
            duration =      RTRIM(REPLACE(REPLACE(REPLACE(duration, 'minutes', ''), 'mins', ''), 'minute', '')),
            cancellation =  CASE 
                                WHEN cancellation = 'None' THEN ''
                                WHEN cancellation = 'null' THEN ''
                                WHEN cancellation IS NULL THEN ''
                                ELSE cancellation 
                            END;
"""

execute_update(query)

In [32]:
# Cleaning the runner_orders table

query = """
    UPDATE runner_orders
        SET
            distance =      CASE 
                                WHEN distance = 'null' 
                                THEN '' ELSE distance
                            END,
            duration =      CASE 
                                WHEN duration = 'null' THEN '' ELSE duration 
                            END;
"""

execute_update(query)

In [60]:
# Cleaning the pizza_names table

query = """
    ALTER TABLE pizza_names
    ALTER COLUMN pizza_name VARCHAR(20);
"""

execute_update(query)

In [65]:
# Cleaning the pizza_recipes table

query = """
    ALTER TABLE pizza_recipes
    ALTER COLUMN toppings VARCHAR(100);
"""

execute_update(query)

In [63]:
# Cleaning the pizza_toppings table

query = """
    ALTER TABLE pizza_toppings
    ALTER COLUMN topping_name VARCHAR(20);
"""

execute_update(query)

### A. Pizza Metrics

1. How many pizzas were ordered?

2. How many unique customer orders were made?

3. How many successful orders were delivered by each runner?

4. How many of each type of pizza was delivered?

5. How many Vegetarian and Meatlovers were ordered by each customer?

6. What was the maximum number of pizzas delivered in a single order?

7. For each customer, how many delivered pizzas had at least 1 change and how many had no changes?

8. How many pizzas were delivered that had both exclusions and extras?

9. What was the total volume of pizzas ordered for each hour of the day?

10. What was the volume of orders for each day of the week?

In [41]:
# 1. How many pizzas were ordered?

query = """
    SELECT COUNT(customer_id) FROM customer_orders;
"""

execute_query_to_df(query)

Unnamed: 0,Unnamed: 1
0,14


In [42]:
# 2. How many unique customer orders were made?

query = """
    SELECT COUNT(DISTINCT order_id) FROM customer_orders;
"""

execute_query_to_df(query)

Unnamed: 0,Unnamed: 1
0,10


In [45]:
# 3. How many successful orders were delivered by each runner?

query = """
    SELECT COUNT(DISTINCT order_id) FROM runner_orders
    WHERE cancellation NOT LIKE '%ancel%';
"""

execute_query_to_df(query)

Unnamed: 0,Unnamed: 1
0,8


In [62]:
# 4. How many of each type of pizza was delivered?

query = """
    SELECT 
        pn.pizza_name, 
        COUNT(co.order_id) 
    FROM customer_orders AS co 
        INNER JOIN runner_orders AS ro
            ON ro.order_id = co.order_id
        INNER JOIN pizza_names AS pn
            ON pn.pizza_id = co.pizza_id
    WHERE ro.cancellation NOT LIKE '%ancel%'
    GROUP BY pn.pizza_name;
"""

execute_query_to_df(query)

Unnamed: 0,pizza_name,Unnamed: 2
0,Meatlovers,9
1,Vegetarian,3


In [77]:
# 5. How many Vegetarian and Meatlovers were ordered by each customer?

query = """
    WITH cte AS(
        SELECT
            co.customer_id,
            CASE WHEN pn.pizza_name = 'Meatlovers' THEN 1 ELSE 0 END AS MeatLovers,
            CASE WHEN pn.pizza_name = 'Vegetarian' THEN 1 ELSE 0 END AS Vegetarian
        FROM customer_orders AS co
            INNER JOIN pizza_names AS pn
                ON co.pizza_id = pn.pizza_id
                )
                
    SELECT 
        customer_id, 
        SUM(meatlovers) AS meatlovers, 
        SUM(vegetarian) AS vegetarian
    FROM cte 
    GROUP BY customer_id;
"""

execute_query_to_df(query)

Unnamed: 0,customer_id,meatlovers,vegetarian
0,101,2,1
1,102,2,1
2,103,3,1
3,104,3,0
4,105,0,1


In [86]:
# 6. What was the maximum number of pizzas delivered in a single order?

query = """
    SELECT TOP (1)
        COUNT(co.pizza_id) AS count_of_pizza
    FROM customer_orders AS co
        INNER JOIN runner_orders AS ro
            ON co.order_id = ro.order_id
    WHERE ro.cancellation NOT LIKE '%ancel%'
    GROUP BY co.order_id
    ORDER BY count_of_pizza DESC;
"""

execute_query_to_df(query)

Unnamed: 0,count_of_pizza
0,3


In [95]:
# 7. For each customer, how many delivered pizzas had at least 1 change and how many had no changes?

query = """
    WITH cte as(
        SELECT 
            co.customer_id,
            CASE WHEN co.exclusions = '' OR co.extras = '' THEN 0 ELSE 1 END AS changes,
            CASE WHEN co.exclusions = '' OR co.extras = '' THEN 1 ELSE 0 END AS no_changes
        FROM customer_orders AS co
            INNER JOIN runner_orders AS ro
                ON co.order_id = ro.order_id
        WHERE ro.cancellation NOT LIKE '%ancel%'
        )
        
    SELECT customer_id, 
        SUM(changes) AS changes,
        SUM(no_changes) AS no_changes 
        FROM cte GROUP BY customer_id;
"""

execute_query_to_df(query)

Unnamed: 0,customer_id,changes,no_changes
0,101,0,2
1,102,0,3
2,103,0,3
3,104,1,2
4,105,0,1


In [99]:
# 8. How many pizzas were delivered that had both exclusions and extras?

query = """
    SELECT SUM(changes) FROM
        (SELECT 
            co.customer_id,
            CASE WHEN co.exclusions <> '' AND co.extras <> '' THEN 1 ELSE 0 END AS changes
        FROM customer_orders AS co
            INNER JOIN runner_orders AS ro
                ON co.order_id = ro.order_id
        WHERE ro.cancellation NOT LIKE '%ancel%') AS a;
"""

execute_query_to_df(query)

Unnamed: 0,Unnamed: 1
0,1


In [113]:
# 9. What was the total volume of pizzas ordered for each hour of the day?

query = """
    WITH cte AS(
        SELECT 
            DATEPART(hour, order_time) AS hour, 
            COUNT(pizza_id) AS count_of_orders
        FROM customer_orders
        GROUP BY DATEPART(hour, order_time)
        )
        
    SELECT 
        day.hour,
        cte.count_of_orders
    FROM (VALUES    (1),(2),(3),(4),(5),(6),(7),(8),
                    (9),(10),(11),(12),(13),(14),(15),
                    (16),(17),(18),(19),(20),(21),(22),
                    (23),(24)) AS day(hour)
        LEFT OUTER JOIN cte
            ON cte.hour = day.hour;
"""

execute_query_to_df(query)

Unnamed: 0,hour,count_of_orders
0,1,
1,2,
2,3,
3,4,
4,5,
5,6,
6,7,
7,8,
8,9,
9,10,


In [131]:
# 10. What was the volume of orders for each day of the week?

query = """
    SELECT 
        DATENAME(w, order_time) AS day, 
        COUNT(pizza_id) AS count_of_orders
    FROM customer_orders
    GROUP BY DATENAME(w, order_time);
"""

execute_query_to_df(query)

Unnamed: 0,day,count_of_orders
0,Friday,1
1,Saturday,5
2,Thursday,3
3,Wednesday,5


### B. Runner and Customer Experience

1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)

2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?

3. Is there any relationship between the number of pizzas and how long the order takes to prepare?

4. What was the average distance travelled for each customer?

5. What was the difference between the longest and shortest delivery times for all orders?

6. What was the average speed for each runner for each delivery and do you notice any trend for these values?

7. What is the successful delivery percentage for each runner?

In [150]:
# 1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)

query = """
    WITH cte AS(
        SELECT 
            *, 
            (DATEDIFF(day, '20210101', registration_date) / 7) + 1 AS signup_week
        FROM runners
    )

    SELECT
        signup_week,
        COUNT(signup_week) AS count_of_signups
    FROM cte
    GROUP BY signup_week;
"""

execute_query_to_df(query)

Unnamed: 0,signup_week,count_of_signups
0,1,2
1,2,1
2,3,1


In [161]:
# 2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?

query = """
    WITH cte AS(
        SELECT 
            co.order_id,
            ro.runner_id,
            AVG(DATEDIFF(minute, co.order_time, ro.pickup_time)) AS avg_time
        FROM runner_orders AS ro
            INNER JOIN customer_orders AS co
                ON ro.order_id = co.order_id
        WHERE ro.pickup_time <> ''
        GROUP BY co.order_id, ro.runner_id
        )
        
    SELECT 
        runner_id,
        AVG(avg_time) AS avg_time
    FROM cte
    GROUP BY runner_id;
"""

execute_query_to_df(query)

Unnamed: 0,runner_id,avg_time
0,1,14
1,2,20
2,3,10


In [173]:
# 3. Is there any relationship between the number of pizzas and how long the order takes to prepare?

query = """
    WITH cte AS(
        SELECT 
            co.order_id,
            DATEDIFF(minute,co.order_time, ro.pickup_time) AS pickup_time,
            (SELECT COUNT(order_id) FROM customer_orders WHERE order_id = co.order_id) AS count_pizzas
        FROM runner_orders AS ro
            INNER JOIN customer_orders AS co
                ON ro.order_id = co.order_id
        WHERE ro.pickup_time <> ''
    )

    SELECT
        count_pizzas,
        AVG(pickup_time) AS avg_time
    FROM cte
    GROUP BY count_pizzas;
"""

execute_query_to_df(query)

Unnamed: 0,count_pizzas,avg_time
0,1,12
1,2,18
2,3,30


In [185]:
# 4. What was the average distance travelled for each customer?

query = """
    SELECT 
        co.customer_id,
        CAST(AVG(CAST(ro.distance AS float)) AS decimal(10,2)) AS avg_distance
    FROM runner_orders AS ro
        INNER JOIN customer_orders AS co
            ON ro.order_id = co.order_id
    WHERE cancellation NOT LIKE '%ancel%'
    GROUP BY co.customer_id;
"""

execute_query_to_df(query)

Unnamed: 0,customer_id,avg_distance
0,101,20.0
1,102,16.73
2,103,23.4
3,104,10.0
4,105,25.0


In [203]:
# 5. What was the difference between the longest and shortest delivery times for all orders?

query = """
    SELECT
        MAX(CAST(duration AS INT)) - MIN(CAST(duration AS INT))
    FROM runner_orders
    WHERE duration <> '';
"""

execute_query_to_df(query)

Unnamed: 0,Unnamed: 1
0,30


In [209]:
# 6. What was the average speed for each runner for each delivery and do you notice any trend for these values?


query = """
    SELECT
        runner_id,
        AVG(distance * (60 / CAST(duration AS float))) AS avg_speed
    FROM runner_orders
    WHERE duration <> ''
    GROUP BY runner_id;
"""

execute_query_to_df(query)

Unnamed: 0,runner_id,avg_speed
0,1,45.536111
1,2,62.9
2,3,40.0


In [218]:
# 7. What is the successful delivery percentage for each runner?

query = """
    SELECT 
        ro. runner_id,
        CAST(
            (SELECT COUNT(runner_id) 
             FROM runner_orders 
             WHERE cancellation NOT LIKE '%ancel%' AND runner_id = ro.runner_id) AS FLOAT) / 
        COUNT(ro.runner_id) * 100 AS pct_success
    FROM runner_orders AS ro
    GROUP BY runner_id;
"""

execute_query_to_df(query)

Unnamed: 0,runner_id,pct_success
0,1,100.0
1,2,75.0
2,3,50.0


### C. Ingredient Optimisation

1. What are the standard ingredients for each pizza?

2. What was the most commonly added extra?

3. What was the most common exclusion?

4. Generate an order item for each record in the customers_orders table in the format of one of the following:
* Meat Lovers
* Meat Lovers - Exclude Beef
* Meat Lovers - Extra Bacon
* Meat Lovers - Exclude Cheese, Bacon - Extra Mushroom, Peppers

5. Generate an alphabetically ordered comma separated ingredient list for each pizza order from the customer_orders table and add a 2x in front of any relevant ingredients
* For example: "Meat Lovers: 2xBacon, Beef, ... , Salami"

6. What is the total quantity of each ingredient used in all delivered pizzas sorted by most frequent first?

In [235]:
# 1. What are the standard ingredients for each pizza?

query = """
    WITH cte AS(
        SELECT 
            pizza_id, 
            value AS topping
        FROM pizza_recipes
            CROSS APPLY STRING_SPLIT(toppings, ',')
        )
        
    SELECT
        pn.pizza_name,
        pt.topping_name
    FROM cte
        INNER JOIN pizza_names AS pn
            ON cte.pizza_id = pn.pizza_id
        INNER JOIN pizza_toppings AS pt
            ON cte.topping = pt.topping_id;
"""

execute_query_to_df(query)

Unnamed: 0,pizza_name,topping_name
0,Meatlovers,Bacon
1,Meatlovers,BBQ Sauce
2,Meatlovers,Beef
3,Meatlovers,Cheese
4,Meatlovers,Chicken
5,Meatlovers,Mushrooms
6,Meatlovers,Pepperoni
7,Meatlovers,Salami
8,Vegetarian,Cheese
9,Vegetarian,Mushrooms


In [250]:
# 2. What was the most commonly added extra?

query = """
    SELECT 
        pt.topping_name,
        COUNT(value) AS ct_topping
    FROM customer_orders 
        CROSS APPLY STRING_SPLIT(extras, ',')
        INNER JOIN pizza_toppings AS pt
            ON value = pt.topping_id
    WHERE extras <> ''
    GROUP BY pt.topping_name;
"""

execute_query_to_df(query)

Unnamed: 0,topping_name,ct_topping
0,Bacon,4
1,Cheese,1
2,Chicken,1


In [251]:
# 3. What was the most common exclusion?

query = """
    SELECT 
        pt.topping_name,
        COUNT(value) AS ct_topping
    FROM customer_orders 
        CROSS APPLY STRING_SPLIT(exclusions, ',')
        INNER JOIN pizza_toppings AS pt
            ON value = pt.topping_id
    WHERE exclusions <> ''
    GROUP BY pt.topping_name;
"""

execute_query_to_df(query)

Unnamed: 0,topping_name,ct_topping
0,BBQ Sauce,1
1,Cheese,4
2,Mushrooms,1


In [None]:
# 4. Generate an order item for each record in the customers_orders table in the format of one
# of the following:
# Meat Lovers
# Meat Lovers - Exclude Beef
# Meat Lovers - Extra Bacon
# Meat Lovers - Exclude Cheese, Bacon - Extra Mushroom, Peppers

In [8]:
# 5. Generate an alphabetically ordered comma separated ingredient list for each pizza order 
# from the customer_orders table and add a 2x in front of any relevant ingredients
# * For example: "Meat Lovers: 2xBacon, Beef, ... , Salami"

query = """
    WITH splitting_toppings AS(
        SELECT 
            pizza_id, 
            value AS topping
        FROM pizza_recipes
            CROSS APPLY STRING_SPLIT(toppings, ',')
        ),

    stdpizza_cte AS(    
        SELECT
            pn.pizza_id,
            pn.pizza_name,
            pt.topping_name
        FROM splitting_toppings AS st
            INNER JOIN pizza_names AS pn
                ON st.pizza_id = pn.pizza_id
            INNER JOIN pizza_toppings AS pt
                ON st.topping = pt.topping_id
        ),

    exclusions_cte AS(
        SELECT 
            co.order_id, co.customer_id, value AS topping_id
        FROM customer_orders AS co
            CROSS APPLY STRING_SPLIT(co.exclusions, ',')
        WHERE exclusions <> ''
        ),
        
    extras_cte AS(
        SELECT 
            co.order_id, co.customer_id, value AS topping_id
        FROM customer_orders AS co
            CROSS APPLY STRING_SPLIT(co.extras, ',')
        WHERE extras <> ''
        ),

    union_toppings AS(
        SELECT 
            ec.order_id, ec.customer_id, ec.topping_id,
            pt.topping_name
        FROM exclusions_cte AS ec
            INNER JOIN pizza_toppings AS pt
                ON ec.topping_id = pt.topping_id
                
        UNION ALL

        SELECT 
            ec.order_id, ec.customer_id, ec.topping_id,
            pt.topping_name
        FROM extras_cte AS ec
            INNER JOIN pizza_toppings AS pt
                ON ec.topping_id = pt.topping_id
                
        UNION ALL

        SELECT 
            co.order_id, co.customer_id, co.pizza_id,
            stdpizza_cte.topping_name
        FROM customer_orders AS co
        INNER JOIN stdpizza_cte
            ON stdpizza_cte.pizza_id = co.pizza_id
        ),
        
        toppings_counted AS(  
        SELECT 
            order_id, customer_id, topping_id, topping_name, 
            COUNT(topping_name) AS topping_count
        FROM union_toppings 
        GROUP BY order_id, customer_id, topping_id, topping_name
        ),
        
        toppings_concat AS(
        SELECT 
            tc.customer_id,
            tc.order_id,
            pn.pizza_name,
            STRING_AGG(CONCAT(tc.topping_name, ' x', tc.topping_count), ', ') AS toppings_order
        FROM toppings_counted AS tc
            INNER JOIN customer_orders AS co
                ON co.order_id = tc.order_id
            INNER JOIN pizza_names AS pn
                ON co.pizza_id = pn.pizza_id
        GROUP BY tc.customer_id, tc.order_id, co.pizza_id, pn.pizza_name
        )
        
    SELECT 
        customer_id, 
        order_id, 
        CONCAT(pizza_name, ': ', toppings_order) AS pizza_ordered
    FROM toppings_concat;
"""

execute_query_to_df(query)


Unnamed: 0,customer_id,order_id,pizza_ordered
0,101,1,"Meatlovers: Bacon x1, BBQ Sauce x1, Beef x1, C..."
1,101,2,"Meatlovers: Bacon x1, BBQ Sauce x1, Beef x1, C..."
2,101,6,"Vegetarian: Cheese x1, Mushrooms x1, Onions x1..."
3,102,3,"Meatlovers: Bacon x1, BBQ Sauce x1, Beef x1, C..."
4,102,3,"Vegetarian: Tomatoes x1, Tomato Sauce x1, Pepp..."
5,102,8,"Meatlovers: Bacon x1, BBQ Sauce x1, Beef x1, C..."
6,103,4,"Meatlovers: Cheese x3, Cheese x3, Peppers x1, ..."
7,103,4,"Vegetarian: Onions x1, Mushrooms x1, Cheese x1..."
8,103,9,"Meatlovers: Bacon x2, BBQ Sauce x1, Beef x1, C..."
9,104,5,"Meatlovers: Bacon x2, BBQ Sauce x1, Beef x1, C..."


In [50]:
# 6. What is the total quantity of each ingredient used in all delivered pizzas sorted by most
# frequent first?

query = """
    WITH pizza_toppings_cte AS(
        SELECT
            toppings
        FROM customer_orders AS co
            INNER JOIN pizza_recipes AS pr
                ON co.pizza_id = pr.pizza_id
        ),

    toppings_split_cte AS(    
        SELECT
            value AS topping_id
        FROM pizza_toppings_cte     
            CROSS APPLY STRING_SPLIT(toppings, ',')

        UNION ALL

        SELECT 
            value AS topping_id 
        FROM customer_orders
            CROSS APPLY STRING_SPLIT(exclusions, ',')
        WHERE exclusions <> ''

        UNION ALL

        SELECT 
            value AS topping_id 
        FROM customer_orders
            CROSS APPLY STRING_SPLIT(extras, ',')
        WHERE extras <> ''
    )

    SELECT
        pt.topping_name,
        COUNT(ts.topping_id) AS count_toppings
    FROM toppings_split_cte AS ts
        INNER JOIN pizza_toppings AS pt
            ON ts.topping_id = pt.topping_id
    GROUP BY pt.topping_name
    ORDER BY count_toppings DESC;
"""

execute_query_to_df(query)

Unnamed: 0,topping_name,count_toppings
0,Cheese,19
1,Mushrooms,15
2,Bacon,14
3,BBQ Sauce,11
4,Chicken,11
5,Salami,10
6,Pepperoni,10
7,Beef,10
8,Tomato Sauce,4
9,Tomatoes,4


### D. Pricing and Ratings

1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were no charges for changes - how much money has Pizza Runner made so far if there are no delivery fees?

2. What if there was an additional $1 charge for any pizza extras?
* Add cheese is $1 extra

3. The Pizza Runner team now wants to add an additional ratings system that allows customers to rate their runner, how would you design an additional table for this new dataset - generate a schema for this new table and insert your own data for ratings for each successful customer order between 1 to 5.

4. Using your newly generated table - can you join all of the information together to form a table which has the following information for successful deliveries?
* customer_id
* order_id
* runner_id
* rating
* order_time
* pickup_time
* Time between order and pickup
* Delivery duration
* Average speed
* Total number of pizzas

5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost for extras and each runner is paid $0.30 per kilometre traveled - how much money does Pizza Runner have left over after these deliveries?

In [44]:
# 1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were 
# no charges for changes - how much money has Pizza Runner made so far if 
# there are no delivery fees?

query = """
    SELECT
        SUM
            (CASE
                WHEN pn.pizza_name = 'Meatlovers' THEN 12
                WHEN pn.pizza_name = 'Vegetarian' THEN 10
            END) AS revenue
    FROM customer_orders AS co
        INNER JOIN pizza_names AS pn
            ON co.pizza_id = pn.pizza_id
        INNER JOIN runner_orders AS ro
            ON co.order_id = ro.order_id
        WHERE ro.cancellation NOT LIKE '%ancel%';
"""

execute_query_to_df(query)

Unnamed: 0,revenue
0,138


In [45]:
# 2. What if there was an additional $1 charge for any pizza extras?

query = """

    WITH pizza_revenue AS(
        SELECT
        SUM
            (CASE
                WHEN pn.pizza_name = 'Meatlovers' THEN 12
                WHEN pn.pizza_name = 'Vegetarian' THEN 10
            END) AS revenue
    FROM customer_orders AS co
        INNER JOIN pizza_names AS pn
            ON co.pizza_id = pn.pizza_id
        INNER JOIN runner_orders AS ro
            ON co.order_id = ro.order_id
    WHERE ro.cancellation NOT LIKE '%ancel%'
            ),

    extras_revenue AS(
        SELECT
            SUM(CASE WHEN VALUE <> '' THEN 1 END) AS revenue
        FROM customer_orders
            CROSS APPLY STRING_SPLIT(extras, ',')
            )
                
    SELECT SUM(revenue)
    FROM (  
            SELECT * FROM pizza_revenue
            UNION ALL
            SELECT * FROM extras_revenue
            ) AS u;
"""

execute_query_to_df(query)

Unnamed: 0,Unnamed: 1
0,144


In [65]:
# 3. The Pizza Runner team now wants to add an additional 
# ratings system that allows customers to rate their runner, 
# how would you design an additional table for this new dataset
# - generate a schema for this new table and insert your own data
# for ratings for each successful customer order between 1 to 5.

query = """
    CREATE TABLE customer_ratings
    (
        customer_id VARCHAR(30) NOT NULL,
        order_id VARCHAR(30) NOT NULL,
        runner_id VARCHAR(30) NOT NULL,
        rating INT NOT NULL
    );
"""

execute_update(query)

In [83]:
# 3. The Pizza Runner team now wants to add an additional 
# ratings system that allows customers to rate their runner, 
# how would you design an additional table for this new dataset
# - generate a schema for this new table and insert your own data
# for ratings for each successful customer order between 1 to 5.

query = """
    INSERT INTO customer_ratings(customer_id, order_id, runner_id, rating)
        SELECT 
            co.customer_id, 
            co.order_id, 
            ro.runner_id,
            CASE WHEN co.order_id <> '' THEN 5 END AS rating
        FROM customer_orders AS co
        INNER JOIN runner_orders AS ro
            ON co.order_id = ro.order_id  
        WHERE ro.cancellation NOT LIKE '%ancel%'
        GROUP BY co.customer_id, co.order_id, ro.runner_id;
"""

execute_update(query)

In [19]:
# 4. Using your newly generated table - can you join all of the information together 
# to form a table which has the following information for successful deliveries?
# * customer_id
# * order_id
# * runner_id
# * rating
# * order_time
# * pickup_time
# * Time between order and pickup
# * Delivery duration
# * Average speed
# * Total number of pizzas

query = """
    SELECT
        co.customer_id,
        co.order_id,
        ro.runner_id,
        cr.rating,
        CAST(co.order_time AS DATETIME) AS order_time,
        ro.pickup_time,
        DATEDIFF(minute, co.order_time, ro.pickup_time) AS time,
        ro.duration,
        CAST(AVG(ro.distance * (60 / CAST(ro.duration AS float))) AS DECIMAL(10,2)) AS avg_speed,
        COUNT(co.pizza_id) AS total_pizzas
    FROM customer_orders AS co
        INNER JOIN runner_orders AS ro
            ON co.order_id = ro.order_id
        INNER JOIN customer_ratings AS cr
            ON co.order_id = cr.order_id
    WHERE ro.cancellation NOT LIKE '%ancel%'
    GROUP BY    co.customer_id,
                co.order_id,
                ro.runner_id,
                cr.rating,
                co.order_time,
                ro.pickup_time,
                DATEDIFF(minute, co.order_time, ro.pickup_time),
                ro.duration;
"""

execute_query_to_df(query)

Unnamed: 0,customer_id,order_id,runner_id,rating,order_time,pickup_time,time,duration,avg_speed,total_pizzas
0,101,1,1,5,2020-01-01 18:05:02,2020-01-01 18:15:34,10,32,37.5,1
1,101,2,1,5,2020-01-01 19:00:52,2020-01-01 19:10:54,10,27,44.44,1
2,102,3,1,5,2020-01-02 23:51:23,2020-01-03 00:12:37,21,20,40.2,2
3,102,8,2,5,2020-01-09 23:54:33,2020-01-10 00:15:02,21,15,93.6,1
4,103,4,2,5,2020-01-04 13:23:46,2020-01-04 13:53:03,30,40,35.1,3
5,104,5,3,5,2020-01-08 21:00:29,2020-01-08 21:10:57,10,15,40.0,1
6,104,10,1,5,2020-01-11 18:34:49,2020-01-11 18:50:20,16,10,60.0,2
7,105,7,2,5,2020-01-08 21:20:29,2020-01-08 21:30:45,10,25,60.0,1


In [43]:
# 5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost
# for extras and each runner is paid $0.30 per kilometre traveled - how much money
# does Pizza Runner have left over after these deliveries?

query = """
    WITH revenue_cte AS(
        SELECT
            SUM
                (CASE
                    WHEN pn.pizza_name = 'Meatlovers' THEN 12
                    WHEN pn.pizza_name = 'Vegetarian' THEN 10
                END) AS revenue
        FROM customer_orders AS co
            INNER JOIN pizza_names AS pn
                ON co.pizza_id = pn.pizza_id
            INNER JOIN runner_orders AS ro
                ON co.order_id = ro.order_id
            WHERE ro.duration <> ''
            ),

    runnerfee_cte AS(
        SELECT 
            SUM(CAST(distance AS FLOAT)) * 0.3 AS runner_fee 
        FROM runner_orders
        )

    SELECT 
        revenue - runner_fee AS revenue_minus_runner_fee
    FROM revenue_cte 
    CROSS JOIN runnerfee_cte;
"""

execute_query_to_df(query)

Unnamed: 0,revenue_minus_runner_fee
0,94.44


In [47]:
query = """
    WITH revenue_cte AS(
        SELECT
            co.order_id,
            SUM
                (CASE
                    WHEN pn.pizza_name = 'Meatlovers' THEN 12
                    WHEN pn.pizza_name = 'Vegetarian' THEN 10
                END) AS revenue
        FROM customer_orders AS co
            INNER JOIN pizza_names AS pn
                ON co.pizza_id = pn.pizza_id
        GROUP BY co.order_id
            ),

    runnerfee_cte AS(
        SELECT 
            ro.order_id,
            CAST(ro.distance AS FLOAT) * 0.3 AS runner_fee
        FROM runner_orders AS ro
        WHERE distance <> ''

        )

    SELECT 
        rf.order_id, 
        r.revenue, 
        rf.runner_fee, 
        r.revenue - rf.runner_fee as net_revenue
        FROM runnerfee_cte AS rf
        INNER JOIN revenue_cte AS r
            ON r.order_id = rf.order_id;
"""

execute_query_to_df(query)

Unnamed: 0,order_id,revenue,runner_fee,net_revenue
0,1,12,6.0,6.0
1,2,12,6.0,6.0
2,3,22,4.02,17.98
3,4,34,7.02,26.98
4,5,12,3.0,9.0
5,7,10,7.5,2.5
6,8,12,7.02,4.98
7,10,24,3.0,21.0


### E. Bonus Questions

If Danny wants to expand his range of pizzas - how would this impact the existing data design? Write an INSERT statement to demonstrate what would happen if a new Supreme pizza with all the toppings was added to the Pizza Runner menu?

In [61]:
query = """
    INSERT INTO pizza_names(pizza_id, pizza_name)
        VALUES
        (3, 'Supreme');
"""

execute_update(query)

In [64]:
query = """
    INSERT INTO pizza_recipes(pizza_id, toppings)
        SELECT 
            '3' AS pizza_id, 
            STRING_AGG(topping_id, ', ') AS toppings
        FROM pizza_toppings;
"""

execute_update(query)