In [10]:
import google.datalab.bigquery as bq

## User-Product Features

Some features for how often a specific user purchases each product

In [14]:
%%bq query -n user_products_query
SELECT orders.user_id, op.product_id, products.aisle_id, products.department_id,
  COUNT(*) / ANY_VALUE(orders.num_orders) AS `perc_all_orders`,
  SUM(op.reordered) / NULLIF(ANY_VALUE(orders.num_orders) - MIN(orders.order_number),0) AS `perc_reorder`,
  COUNT(*) / NULLIF(ANY_VALUE(orders.days_bw_first_last_order),0) * 30 AS `orders_per_month`,
  SUM(op.reordered) / NULLIF(ANY_VALUE(orders.days_bw_first_last_order) - MIN(days_since_first_order),0) * 30 AS `reorders_per_month`,
  MIN(orders.order_number) AS `first_order_number`,
  CASE WHEN MIN(orders.order_number) = 1 THEN 0 ELSE MIN(days_since_first_order) END AS `first_order_days`
FROM instacart.order_products__prior AS op
INNER JOIN (
  SELECT *, 
  SUM(days_since_prior_order) OVER (PARTITION BY user_id ORDER BY order_number ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `days_since_first_order`,
  SUM(days_since_prior_order) OVER (PARTITION BY user_id) AS `days_bw_first_last_order`,
  COUNT(*) OVER (PARTITION BY user_id) AS `num_orders`
  FROM instacart.orders WHERE eval_set = "prior"
) AS orders ON orders.order_id = op.order_id
INNER JOIN instacart.products AS products ON op.product_id = products.product_id
GROUP BY 1,2,3,4

In [15]:
%%bq execute -q user_products_query -t instacart.user_products  -m overwrite

user_id,product_id,aisle_id,department_id,perc_all_orders,perc_reorder,orders_per_month,reorders_per_month,first_order_number,first_order_days
69182,31990,102,18,0.010101010101,0.0,0.0849858356941,0.0,51,216.0
4558,24369,125,19,0.030303030303,0.153846153846,0.348837209302,1.93548387097,86,227.0
150487,13838,74,17,0.0222222222222,0.0,0.0874635568513,0.0,34,229.0
3103,24753,13,20,0.0526315789474,0.333333333333,0.178041543027,1.07142857143,35,309.0
138791,17149,34,1,0.0707070707071,0.139534883721,0.697674418605,1.53846153846,56,184.0
201456,9550,57,14,0.0281690140845,0.0625,0.165289256198,0.535714285714,55,307.0
178209,23446,56,18,0.030303030303,0.0571428571429,0.252100840336,0.504201680672,64,238.0
140154,40285,125,19,0.0816326530612,0.157894736842,0.37037037037,0.927835051546,30,227.0
22836,10984,11,11,0.0169491525424,0.0,0.0842696629213,0.0,49,281.0
189058,28522,6,2,0.027027027027,,0.0847457627119,,37,354.0


## Time-of-Day Features

For each product/aisle/department, compute how much more likely to purchase at that time of day

In [24]:
%%bq query -n products_tod_query
WITH temp AS (
  SELECT orders.order_hour_of_day, orders.order_dow, op.product_id, products.aisle_id, products.department_id, COUNT(*) AS `n_product`
  FROM instacart.order_products__prior AS op
  INNER JOIN instacart.orders AS orders ON orders.order_id = op.order_id
  INNER JOIN instacart.products AS products ON op.product_id = products.product_id
  GROUP BY 1,2,3,4,5
)
SELECT order_hour_of_day, order_dow, product_id, aisle_id, department_id,
  n_product / n_product_total / (n_hod_dow / n_total) AS `product_tod_factor`,
  n_aisle / n_aisle_total / (n_hod_dow / n_total) AS `aisle_tod_factor`,
  n_department / n_department_total / (n_hod_dow / n_total) AS `department_tod_factor`
FROM (
  SELECT order_hour_of_day, order_dow, product_id, aisle_id, department_id, n_product,
  SUM(n_product) OVER (PARTITION BY product_id) AS `n_product_total`,
  SUM(n_product) OVER (PARTITION BY order_hour_of_day, order_dow, aisle_id) AS `n_aisle`,
  SUM(n_product) OVER (PARTITION BY aisle_id) AS `n_aisle_total`,
  SUM(n_product) OVER (PARTITION BY order_hour_of_day, order_dow, department_id) AS `n_department`,
  SUM(n_product) OVER (PARTITION BY department_id) AS `n_department_total`,
  SUM(n_product) OVER (PARTITION BY order_hour_of_day, order_dow) AS `n_hod_dow`,
  SUM(n_product) OVER () AS `n_total`
  FROM temp  
) AS x

In [25]:
%%bq execute -q products_tod_query -t instacart.products_tod  -m overwrite

order_hour_of_day,order_dow,product_id,aisle_id,department_id,product_tod_factor,aisle_tod_factor,department_tod_factor
4,4,20119,115,7,1.88359293565,1.02994327385,1.15291585491
4,4,1896,98,7,1.92403852171,1.37492644464,1.15291585491
4,4,5883,98,7,3.65304779756,1.37492644464,1.15291585491
4,4,27554,93,3,1.4098807705,1.10227297612,0.94354629231
4,4,5928,42,1,9.54182036736,1.13154818111,1.06000410609
4,4,25154,38,1,10.7950087566,1.10434079932,1.06000410609
4,4,24041,3,19,6.99258776175,0.76991304516,0.919954343583
4,4,28571,107,19,1.36391085892,0.966227021545,0.919954343583
4,4,15539,129,1,14.4154886165,0.963097059415,1.06000410609
4,4,9434,34,1,1.74945250201,1.56729407054,1.06000410609
