## Instacart Orders and USDA ERS Food Prices Cross Analysis

In [1]:
dataset_id = "reporting"

In [2]:
!bq --location=US mk --dataset {dataset_id}#Note: This will not work if you already have a dataset with this name

Too many positional args, still have ['will', 'not', 'work', 'if', 'you', 'already', 'have', 'a', 'dataset', 'with', 'this', 'name']


#### Query 1

By predicting the 2017 food prices based on linear regression, we are able to approximate how much Instcart users spent on each order. Then, we are able to calculate the average total for Instacart orders by day.

In [1]:
%%bigquery
SELECT d.day_id, d.day_name as Day, AVG(p.Total) as Average
FROM
(SELECT op.order_id, SUM(ap.price) as Total
FROM `responsive-cab-267123.instacart_modeled.Order_Products` op
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m ON op.product_id = m.product_id
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF` ap ON m.food_id = ap.food_id
WHERE op.product_id IN 
(SELECT m1.product_id FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m1)
GROUP BY op.order_id) p
INNER JOIN `responsive-cab-267123.instacart_modeled.Orders` o ON p.order_id = o.order_id
INNER JOIN `responsive-cab-267123.instacart_modeled.Week_Days` d ON o.order_dow = d.day_id
GROUP BY d.day_name, d.day_id
ORDER BY d.day_id ASC

Unnamed: 0,day_id,Day,Average
0,0,Monday,6.578466
1,1,Tuesday,6.018205
2,2,Wednesday,5.624942
3,3,Thursday,5.478441
4,4,Friday,5.545686
5,5,Saturday,5.825353
6,6,Sunday,6.304218


Creating view for query

In [3]:
%%bigquery
CREATE OR REPLACE VIEW reporting.v_Week_Day_Price_Totals AS
(SELECT d.day_id, d.day_name as Day, AVG(p.Total) as Average
FROM
(SELECT op.order_id, SUM(ap.price) as Total
FROM `responsive-cab-267123.instacart_modeled.Order_Products` op
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m ON op.product_id = m.product_id
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF` ap ON m.food_id = ap.food_id
WHERE op.product_id IN 
(SELECT m1.product_id FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m1)
GROUP BY op.order_id) p
INNER JOIN `responsive-cab-267123.instacart_modeled.Orders` o ON p.order_id = o.order_id
INNER JOIN `responsive-cab-267123.instacart_modeled.Week_Days` d ON o.order_dow = d.day_id
GROUP BY d.day_name, d.day_id
ORDER BY d.day_id ASC)

#### Queries 2 and 3

From these queries we can see how the sale of the most expensive product versus the least expensive product was affected. We break the sales down to the day of the week.

##### minimum

In [8]:
%%bigquery
SELECT p.product_name AS Product, count(1) AS Frequency
FROM `responsive-cab-267123.instacart_modeled.Order_Products` op
INNER JOIN `responsive-cab-267123.instacart_modeled.Products` p ON op.product_id=p.product_id
WHERE op.product_id IN
(SELECT m.product_id
FROM
((SELECT f1.food_id
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f1
WHERE f1.price=
(SELECT MIN(f2.price)
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f2)) mfp
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m ON mfp.food_id=m.food_id))
GROUP BY p.product_name
ORDER BY Frequency DESC
limit 10

Unnamed: 0,Product,Frequency
0,Cold Brew Coffee,9243
1,New Orleans Iced Coffee,7192
2,Premium Unsweetened Iced Tea,6458
3,Major Dickason's Blend Ground Coffee Dark Roast,6186
4,Cold-Brew Black Coffee,5914
5,French Roast Ground Coffee,5302
6,Pink Lady Apple Kombucha,5186
7,Zen Tea,4984
8,Unsweetened Premium Iced Tea,4043
9,House Blend Deep Roast Ground Coffee,3926


In [9]:
%%bigquery
CREATE OR REPLACE VIEW reporting.v_Min_Price_Sales AS
(SELECT p.product_name AS Product, count(1) AS Frequency
FROM `responsive-cab-267123.instacart_modeled.Order_Products` op
INNER JOIN `responsive-cab-267123.instacart_modeled.Products` p ON op.product_id=p.product_id
WHERE op.product_id IN
(SELECT m.product_id
FROM
((SELECT f1.food_id
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f1
WHERE f1.price=
(SELECT MIN(f2.price)
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f2)) mfp
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m ON mfp.food_id=m.food_id))
GROUP BY p.product_name
ORDER BY Frequency DESC
limit 10)

##### maximum 

In [10]:
%%bigquery
SELECT p.product_name AS Product, count(1) AS Frequency
FROM `responsive-cab-267123.instacart_modeled.Order_Products` op
INNER JOIN `responsive-cab-267123.instacart_modeled.Products` p ON op.product_id=p.product_id
WHERE op.product_id IN
(SELECT m.product_id
FROM
((SELECT f1.food_id
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f1
WHERE f1.price=
(SELECT MAX(f2.price)
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f2)) mfp
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m ON mfp.food_id=m.food_id))
GROUP BY p.product_name
ORDER BY Frequency DESC
limit 10

Unnamed: 0,Product,Frequency
0,Macaroni & Cheese,18132
1,Organic Brown Rice,14848
2,Pesto Tortellini Bowls,8785
3,Organic Beans & Rice Cheddar Cheese Burrito,8291
4,Broccoli & Cheddar Bake Meal Bowl,7543
5,Organic Jasmine Rice,6909
6,Bean & Cheese Burrito,6330
7,Chicken Pot Pie,5843
8,Pad Thai,5561
9,Organic Gluten Free Non-Dairy Beans & Rice Bur...,5195


In [11]:
%%bigquery
CREATE OR REPLACE VIEW reporting.v_Max_Price_Sales AS
(SELECT p.product_name AS Product, count(1) AS Frequency
FROM `responsive-cab-267123.instacart_modeled.Order_Products` op
INNER JOIN `responsive-cab-267123.instacart_modeled.Products` p ON op.product_id=p.product_id
WHERE op.product_id IN
(SELECT m.product_id
FROM
((SELECT f1.food_id
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f1
WHERE f1.price=
(SELECT MAX(f2.price)
FROM `responsive-cab-267123.USDA_ERS_modeled.Food_Market_Beam_DF`  f2)) mfp
INNER JOIN `responsive-cab-267123.USDA_ERS_modeled.Food_Map_Beam_DF` m ON mfp.food_id=m.food_id))
GROUP BY p.product_name
ORDER BY Frequency DESC
limit 10)