# Test Database - Gaps Table
First, make a simple **gaps** table, then fill it with data from other tables.

In [6]:
%%bigquery execute -t datalab-projects-1331:xjk_algo_comp_test.gaps -m create
SELECT district_id, FIRST(orders.timeslot) AS timeslot, FIRST(orders.date) AS date,
  FIRST(day_in_week) AS day_in_week, FIRST(timeofday_slot) AS timeofday_slot,
  SUM(price) AS sum_price, AVG(price) AS avg_price,
  SUM(IF(driver_id = 'NULL', 1, 0)) AS gap
FROM [datalab-projects-1331:xjk_algo_comp_test.orders] AS orders
JOIN [datalab-projects-1331:xjk_algo_comp_test.districts] AS districts 
  ON orders.start_district_hash = districts.district_hash
GROUP BY district_id, orders.timeslot

district_id,timeslot,date,day_in_week,timeofday_slot,sum_price,avg_price,gap
50,2016-01-22-127,2016-01-22,5,127,42.0,21.0,0
50,2016-01-22-105,2016-01-22,5,105,60.0,15.0,0
50,2016-01-22-45,2016-01-22,5,45,27.0,13.5,0
50,2016-01-22-139,2016-01-22,5,139,61.0,20.3333333333,1
50,2016-01-22-117,2016-01-22,5,117,13.0,13.0,0
50,2016-01-22-104,2016-01-22,5,104,52.0,26.0,0
50,2016-01-22-140,2016-01-22,5,140,31.0,31.0,0
43,2016-01-22-105,2016-01-22,5,105,111.0,13.875,5
43,2016-01-22-93,2016-01-22,5,93,164.0,32.8,2
43,2016-01-22-79,2016-01-22,5,79,28.0,28.0,1


In [None]:
%% sql -q tester
SELECT SUM(price) FROM datalab-projects-1331:xjk_algo_comp_test.orders
WHERE start_district_hash = 'd4ec2125aff74eded207d2d915ef682f'
  AND (time LIKE "%2016-01-01 00:5%")
  
# Reference only. Use above query to check if aggregation was correct, by comparing the
# result of that from sum of district_hash = 51 and timeslot "2016-01-01-6".

# +1 timeslot
We are going to use past data to predict the gaps for the next 10 minutes (or in other words, the next timeslot). To do this, we need to update timeslot related information. Specifically we are going to do the following:
1. Add `timeslot` feature by one:
  - '2016-01-45' to '2016-01-46'
  - '2016-01-144' to '2016-02-1'
2. Update `date` and `slot` features accordingly.
3. Recalculate `day_in_week` feature.

In [12]:
%%bigquery udf --module gaps_plus_one_timeslot
/**
 * Pad with 0 or given string.
 *
 * @param int n Number to add padding to.
 * @param int width Width of number + padding.
 * @param string z (Optional) Other string to replace '0' as padding.
 */
function pad(n, width, z) {
  z = z || '0';
  n = n + '';
  return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n;
}

/**
 * Add one timeslot and adjust other relevant tables.
 *
 * @param {{district_id: integer, timeslot: string, date: string, day_in_week: integer,
            timeofday_slot: integer, sum_price: float, avg_price: float, gap: integer}} r
 * @param function({{district_id: integer, timeslot: string, date: string, day_in_week: integer,
                timeofday_slot: integer, sum_price: float, avg_price: float, gap: integer}}) emitFn
 */
function(r, emitFn) {
  
  var t = r.timeslot.split(/-/);
  var oldslot = parseInt(t[3]);
  var newslot = oldslot + 1;
  var d = new Date(parseInt(t[0]), parseInt(t[1])-1, parseInt(t[2]));
  
  if (oldslot == 144) {
    newslot = 1;
    d = d.setDate(d.getDate() + 1);
    r.day_in_week = d.getDay();
    r.date = d.getFullYear() + '-' + pad(d.getMonth()+1, 2) +
             '-' + pad(d.getDate(), 2);
  }
  r.timeslot = d.getFullYear() + '-' + pad(d.getMonth()+1, 2) +
               '-' + pad(d.getDate(), 2) + '-' + newslot;

  r.timeofday_slot = newslot;
  emitFn(r);
}

In [13]:
%%bigquery execute -t datalab-projects-1331:xjk_algo_comp_test.future_gaps -m create

SELECT district_id, timeslot, date, day_in_week, timeofday_slot, sum_price, avg_price, gap
FROM gaps_plus_one_timeslot([datalab-projects-1331:xjk_algo_comp_test.gaps])

district_id,timeslot,date,day_in_week,timeofday_slot,sum_price,avg_price,gap
50,2016-01-22-128,2016-01-22,5,128,42.0,21.0,0
50,2016-01-22-106,2016-01-22,5,106,60.0,15.0,0
50,2016-01-22-46,2016-01-22,5,46,27.0,13.5,0
50,2016-01-22-140,2016-01-22,5,140,61.0,20.3333333333,1
50,2016-01-22-118,2016-01-22,5,118,13.0,13.0,0
50,2016-01-22-105,2016-01-22,5,105,52.0,26.0,0
50,2016-01-22-141,2016-01-22,5,141,31.0,31.0,0
43,2016-01-22-106,2016-01-22,5,106,111.0,13.875,5
43,2016-01-22-94,2016-01-22,5,94,164.0,32.8,2
43,2016-01-22-80,2016-01-22,5,80,28.0,28.0,1


# Gaps Table + Other Tables

In [1]:
%%bigquery execute -t datalab-projects-1331:xjk_algo_comp_test.future_gaps_processed -m create

SELECT FIRST(gaps.district_id) AS district_id, FIRST(gaps.timeslot) AS timeslot, FIRST(gaps.date) AS date,
  FIRST(gaps.day_in_week) AS day_in_week, IF(FIRST(gaps.day_in_week) = 0, 1, 0) AS is_sunday,
  FIRST(gaps.timeofday_slot) AS timeofday_slot,
  FIRST(gaps.sum_price) AS sum_price, FIRST(gaps.avg_price) AS avg_price, FIRST(gaps.gap) AS gap,
  FIRST(pois.f1) AS f1, FIRST(pois.f11) AS f11, FIRST(pois.f11_1) AS f11_1, 
  FIRST(pois.f11_2) AS f11_2, FIRST(pois.f11_3) AS f11_3, FIRST(pois.f11_4) AS f11_4, 
  FIRST(pois.f11_5) AS f11_5, FIRST(pois.f11_6) AS f11_6, FIRST(pois.f11_7) AS f11_7, 
  FIRST(pois.f11_8) AS f11_8, FIRST(pois.f13_4) AS f13_4, FIRST(pois.f13_8) AS f13_8, 
  FIRST(pois.f14) AS f14, FIRST(pois.f14_1) AS f14_1, FIRST(pois.f14_10) AS f14_10, 
  FIRST(pois.f14_2) AS f14_2, FIRST(pois.f14_3) AS f14_3, FIRST(pois.f14_6) AS f14_6, 
  FIRST(pois.f14_8) AS f14_8, FIRST(pois.f15) AS f15, FIRST(pois.f15_1) AS f15_1, 
  FIRST(pois.f15_2) AS f15_2, FIRST(pois.f15_3) AS f15_3, FIRST(pois.f15_4) AS f15_4, 
  FIRST(pois.f15_6) AS f15_6, FIRST(pois.f15_7) AS f15_7, FIRST(pois.f15_8) AS f15_8, 
  FIRST(pois.f16) AS f16, FIRST(pois.f16_1) AS f16_1, FIRST(pois.f16_10) AS f16_10, 
  FIRST(pois.f16_11) AS f16_11, FIRST(pois.f16_12) AS f16_12, FIRST(pois.f16_3) AS f16_3, 
  FIRST(pois.f16_4) AS f16_4, FIRST(pois.f16_6) AS f16_6, FIRST(pois.f17) AS f17, 
  FIRST(pois.f17_2) AS f17_2, FIRST(pois.f17_3) AS f17_3, FIRST(pois.f17_4) AS f17_4, 
  FIRST(pois.f17_5) AS f17_5, FIRST(pois.f19) AS f19, FIRST(pois.f19_1) AS f19_1, 
  FIRST(pois.f19_2) AS f19_2, FIRST(pois.f19_3) AS f19_3, FIRST(pois.f19_4) AS f19_4, 
  FIRST(pois.f1_1) AS f1_1, FIRST(pois.f1_10) AS f1_10, FIRST(pois.f1_11) AS f1_11, 
  FIRST(pois.f1_2) AS f1_2, FIRST(pois.f1_3) AS f1_3, FIRST(pois.f1_4) AS f1_4, 
  FIRST(pois.f1_5) AS f1_5, FIRST(pois.f1_6) AS f1_6, FIRST(pois.f1_7) AS f1_7, 
  FIRST(pois.f1_8) AS f1_8, FIRST(pois.f20) AS f20, FIRST(pois.f20_1) AS f20_1, 
  FIRST(pois.f20_2) AS f20_2, FIRST(pois.f20_4) AS f20_4, FIRST(pois.f20_5) AS f20_5, 
  FIRST(pois.f20_6) AS f20_6, FIRST(pois.f20_7) AS f20_7, FIRST(pois.f20_8) AS f20_8, 
  FIRST(pois.f20_9) AS f20_9, FIRST(pois.f21_1) AS f21_1, FIRST(pois.f21_2) AS f21_2, 
  FIRST(pois.f22) AS f22, FIRST(pois.f22_1) AS f22_1, FIRST(pois.f22_2) AS f22_2, 
  FIRST(pois.f22_3) AS f22_3, FIRST(pois.f22_4) AS f22_4, FIRST(pois.f22_5) AS f22_5, 
  FIRST(pois.f23) AS f23, FIRST(pois.f23_1) AS f23_1, FIRST(pois.f23_2) AS f23_2, 
  FIRST(pois.f23_3) AS f23_3, FIRST(pois.f23_4) AS f23_4, FIRST(pois.f23_5) AS f23_5, 
  FIRST(pois.f23_6) AS f23_6, FIRST(pois.f24) AS f24, FIRST(pois.f24_1) AS f24_1, 
  FIRST(pois.f24_2) AS f24_2, FIRST(pois.f24_3) AS f24_3, FIRST(pois.f25) AS f25, 
  FIRST(pois.f25_1) AS f25_1, FIRST(pois.f25_3) AS f25_3, FIRST(pois.f25_7) AS f25_7, 
  FIRST(pois.f25_8) AS f25_8, FIRST(pois.f25_9) AS f25_9, FIRST(pois.f2_1) AS f2_1, 
  FIRST(pois.f2_10) AS f2_10, FIRST(pois.f2_11) AS f2_11, FIRST(pois.f2_12) AS f2_12, 
  FIRST(pois.f2_13) AS f2_13, FIRST(pois.f2_2) AS f2_2, FIRST(pois.f2_4) AS f2_4, 
  FIRST(pois.f2_5) AS f2_5, FIRST(pois.f2_6) AS f2_6, FIRST(pois.f2_7) AS f2_7, 
  FIRST(pois.f2_8) AS f2_8, FIRST(pois.f3_1) AS f3_1, FIRST(pois.f3_2) AS f3_2, 
  FIRST(pois.f3_3) AS f3_3, FIRST(pois.f4) AS f4, FIRST(pois.f4_1) AS f4_1, 
  FIRST(pois.f4_10) AS f4_10, FIRST(pois.f4_11) AS f4_11, FIRST(pois.f4_13) AS f4_13, 
  FIRST(pois.f4_14) AS f4_14, FIRST(pois.f4_16) AS f4_16, FIRST(pois.f4_17) AS f4_17, 
  FIRST(pois.f4_18) AS f4_18, FIRST(pois.f4_2) AS f4_2, FIRST(pois.f4_3) AS f4_3, 
  FIRST(pois.f4_5) AS f4_5, FIRST(pois.f4_6) AS f4_6, FIRST(pois.f4_7) AS f4_7, 
  FIRST(pois.f4_8) AS f4_8, FIRST(pois.f4_9) AS f4_9, FIRST(pois.f5) AS f5, 
  FIRST(pois.f5_1) AS f5_1, FIRST(pois.f5_3) AS f5_3, FIRST(pois.f5_4) AS f5_4, 
  FIRST(pois.f6) AS f6, FIRST(pois.f6_1) AS f6_1, FIRST(pois.f6_2) AS f6_2, 
  FIRST(pois.f6_4) AS f6_4, FIRST(pois.f7) AS f7, FIRST(pois.f8) AS f8, 
  FIRST(pois.f8_1) AS f8_1, FIRST(pois.f8_2) AS f8_2, FIRST(pois.f8_3) AS f8_3, 
  FIRST(pois.f8_4) AS f8_4, FIRST(pois.f8_5) AS f8_5,
  FIRST(weather.weather) AS weather,
  FIRST(weather.temperature) AS weather_temperature, FIRST(weather.pm25) AS weather_pm25,
  FIRST(traffic.tj_level1) AS traffic_tj_level1, FIRST(traffic.tj_level2) AS traffic_tj_level2,
  FIRST(traffic.tj_level3) AS traffic_tj_level3, FIRST(traffic.tj_level4) AS traffic_tj_level4
FROM [datalab-projects-1331:xjk_algo_comp_test.future_gaps] as gaps
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.districts] as districts
  ON districts.district_id = gaps.district_id
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.pois] as pois
  ON pois.district_hash = districts.district_hash
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.weather] as weather
  ON weather.timeslot = gaps.timeslot
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.traffic] as traffic
  ON traffic.timeslot = gaps.timeslot
  AND traffic.district_hash = districts.district_hash
GROUP BY gaps.district_id, gaps.timeslot

district_id,timeslot,date,day_in_week,is_sunday,timeofday_slot,sum_price,avg_price,gap,f1,f11,f11_1,f11_2,f11_3,f11_4,f11_5,f11_6,f11_7,f11_8,f13_4,f13_8,f14,f14_1,f14_10,f14_2,f14_3,f14_6,f14_8,f15,f15_1,f15_2,f15_3,f15_4,f15_6,f15_7,f15_8,f16,f16_1,f16_10,f16_11,f16_12,f16_3,f16_4,f16_6,f17,f17_2,f17_3,f17_4,f17_5,f19,f19_1,f19_2,f19_3,f19_4,f1_1,f1_10,f1_11,f1_2,f1_3,f1_4,f1_5,f1_6,f1_7,f1_8,f20,f20_1,f20_2,f20_4,f20_5,f20_6,f20_7,f20_8,f20_9,f21_1,f21_2,f22,f22_1,f22_2,f22_3,f22_4,f22_5,f23,f23_1,f23_2,f23_3,f23_4,f23_5,f23_6,f24,f24_1,f24_2,f24_3,f25,f25_1,f25_3,f25_7,f25_8,f25_9,f2_1,f2_10,f2_11,f2_12,f2_13,f2_2,f2_4,f2_5,f2_6,f2_7,f2_8,f3_1,f3_2,f3_3,f4,f4_1,f4_10,f4_11,f4_13,f4_14,f4_16,f4_17,f4_18,f4_2,f4_3,f4_5,f4_6,f4_7,f4_8,f4_9,f5,f5_1,f5_3,f5_4,f6,f6_1,f6_2,f6_4,f7,f8,f8_1,f8_2,f8_3,f8_4,f8_5,weather,weather_temperature,weather_pm25,traffic_tj_level1,traffic_tj_level2,traffic_tj_level3,traffic_tj_level4
43,2016-01-22-94,2016-01-22,5,0,94,164.0,32.8,2,83,332,498,0,0,166,0,0,332,2324,913,83,4731,0,415,0,249,913,83,332,0,249,415,83,249,415,0,415,0,2158,415,1245,0,1079,0,0,83,0,0,166,8051,415,498,2075,332,332,0,83,83,0,166,0,249,83,332,83,996,747,0,83,0,249,249,0,83,415,0,0,0,0,0,83,0,0,0,0,0,83,0,83,249,0,0,415,0,0,249,0,83,0,332,0,249,166,0,0,83,83,166,0,2739,0,0,664,83,0,0,0,83,166,664,83,249,0,0,0,1162,0,0,0,83,83,83,83,0,0,0,2324,0,0,249,0,166,0,,,,,,,
43,2016-01-22-80,2016-01-22,5,0,80,28.0,28.0,1,83,332,498,0,0,166,0,0,332,2324,913,83,4731,0,415,0,249,913,83,332,0,249,415,83,249,415,0,415,0,2158,415,1245,0,1079,0,0,83,0,0,166,8051,415,498,2075,332,332,0,83,83,0,166,0,249,83,332,83,996,747,0,83,0,249,249,0,83,415,0,0,0,0,0,83,0,0,0,0,0,83,0,83,249,0,0,415,0,0,249,0,83,0,332,0,249,166,0,0,83,83,166,0,2739,0,0,664,83,0,0,0,83,166,664,83,249,0,0,0,1162,0,0,0,83,83,83,83,0,0,0,2324,0,0,249,0,166,0,,,,178.0,4.0,4.0,2.0
46,2016-01-22-70,2016-01-22,5,0,70,2515.9,18.4992647059,37,2075,13529,332,3569,5561,32785,1577,12865,2905,31374,11039,1162,83,0,83,0,249,498,83,2324,0,1328,2656,83,1494,498,0,5146,0,7968,5644,1162,166,4731,83,2324,8798,249,2822,6557,52290,2739,166,16517,3071,166,249,1909,1328,249,166,7304,0,83,3154,17596,10043,3818,6972,2573,0,6889,39010,0,0,83,2490,747,1245,166,1245,498,498,332,0,498,0,249,0,664,38678,4648,581,1494,0,249,1162,830,5063,83,1577,166,4980,2241,4980,249,415,747,747,0,3818,166,83,5644,415,1577,2075,2158,2656,332,1826,1909,2324,1162,3486,581,498,332,2241,249,3652,1909,83,9130,1079,996,664,5810,83,83,5810,664,2573,0,,,,,,,
46,2016-01-22-141,2016-01-22,5,0,141,1298.0,24.037037037,7,2075,13529,332,3569,5561,32785,1577,12865,2905,31374,11039,1162,83,0,83,0,249,498,83,2324,0,1328,2656,83,1494,498,0,5146,0,7968,5644,1162,166,4731,83,2324,8798,249,2822,6557,52290,2739,166,16517,3071,166,249,1909,1328,249,166,7304,0,83,3154,17596,10043,3818,6972,2573,0,6889,39010,0,0,83,2490,747,1245,166,1245,498,498,332,0,498,0,249,0,664,38678,4648,581,1494,0,249,1162,830,5063,83,1577,166,4980,2241,4980,249,415,747,747,0,3818,166,83,5644,415,1577,2075,2158,2656,332,1826,1909,2324,1162,3486,581,498,332,2241,249,3652,1909,83,9130,1079,996,664,5810,83,83,5810,664,2573,0,6.0,1.0,98.0,1472.0,269.0,89.0,58.0
56,2016-01-22-57,2016-01-22,5,0,57,55.8,13.95,0,0,83,0,83,83,996,0,166,0,1079,747,166,0,0,0,0,0,0,0,0,0,0,83,0,0,0,0,166,0,249,166,498,83,581,0,0,166,0,0,0,4233,0,0,913,0,0,0,166,0,83,0,249,0,0,0,2324,664,83,415,0,166,249,10956,0,0,0,166,0,0,0,0,166,0,0,0,0,0,0,0,83,664,0,0,0,0,0,0,0,166,0,83,0,0,0,415,0,83,83,0,0,166,0,0,0,0,0,0,0,0,0,166,0,1245,0,0,0,0,0,0,0,0,0,0,166,83,0,83,2241,0,0,83,0,0,0,4.0,1.0,53.0,268.0,32.0,12.0,12.0
56,2016-01-22-104,2016-01-22,5,0,104,665.8,39.1647058824,0,0,83,0,83,83,996,0,166,0,1079,747,166,0,0,0,0,0,0,0,0,0,0,83,0,0,0,0,166,0,249,166,498,83,581,0,0,166,0,0,0,4233,0,0,913,0,0,0,166,0,83,0,249,0,0,0,2324,664,83,415,0,166,249,10956,0,0,0,166,0,0,0,0,166,0,0,0,0,0,0,0,83,664,0,0,0,0,0,0,0,166,0,83,0,0,0,415,0,83,83,0,0,166,0,0,0,0,0,0,0,0,0,166,0,1245,0,0,0,0,0,0,0,0,0,0,166,83,0,83,2241,0,0,83,0,0,0,,,,242.0,37.0,9.0,6.0
6,2016-01-22-117,2016-01-22,5,0,117,163.1,14.8272727273,1,0,3071,249,1079,1660,15106,249,4648,1411,11122,3320,0,166,0,0,83,166,166,0,747,0,249,747,83,249,166,0,166,0,830,1743,415,83,1660,0,83,1992,0,166,498,5229,166,249,830,249,0,0,1162,498,0,83,664,0,0,1079,1245,3071,498,1079,498,83,913,3569,0,0,83,498,0,498,0,249,0,83,0,0,0,0,83,0,249,2490,166,0,249,0,0,332,83,1079,0,415,0,3403,2075,581,0,0,1328,166,0,1992,0,0,664,83,249,415,415,332,83,83,996,581,332,415,83,0,83,83,0,747,332,0,6225,166,830,0,1328,0,0,913,166,83,0,4.0,1.0,52.0,379.0,127.0,45.0,31.0
6,2016-01-22-142,2016-01-22,5,0,142,157.0,22.4285714286,0,0,3071,249,1079,1660,15106,249,4648,1411,11122,3320,0,166,0,0,83,166,166,0,747,0,249,747,83,249,166,0,166,0,830,1743,415,83,1660,0,83,1992,0,166,498,5229,166,249,830,249,0,0,1162,498,0,83,664,0,0,1079,1245,3071,498,1079,498,83,913,3569,0,0,83,498,0,498,0,249,0,83,0,0,0,0,83,0,249,2490,166,0,249,0,0,332,83,1079,0,415,0,3403,2075,581,0,0,1328,166,0,1992,0,0,664,83,249,415,415,332,83,83,996,581,332,415,83,0,83,83,0,747,332,0,6225,166,830,0,1328,0,0,913,166,83,0,,,,,,,
66,2016-01-22-81,2016-01-22,5,0,81,154.0,6.69565217391,0,830,1826,830,581,498,2324,166,1411,332,15189,2573,332,747,0,166,0,913,166,83,2324,83,2075,5478,166,2656,415,83,1743,0,2988,83,0,83,0,0,83,1245,0,0,1494,10043,498,83,4316,0,166,0,249,913,249,166,1494,83,83,1411,4731,2241,2656,1494,1328,83,3486,11703,0,0,83,332,747,581,249,166,83,166,332,83,415,0,332,249,166,2573,415,0,415,0,0,747,498,1577,166,332,83,581,83,830,0,332,747,166,83,664,83,0,1743,249,83,415,332,664,249,1245,664,166,166,747,83,415,166,0,166,830,664,0,5229,498,415,498,3237,83,0,1660,1328,1162,249,,,,241.0,69.0,10.0,16.0
18,2016-01-22-116,2016-01-22,5,0,116,190.5,23.8125,0,83,1411,415,249,166,3237,0,747,249,5893,3320,0,0,0,83,0,249,0,0,830,0,415,1577,83,1328,166,0,2656,83,1079,1660,913,83,1909,0,498,1909,0,0,830,29216,2075,83,4897,664,0,0,664,249,0,0,1992,83,0,1079,2822,4316,830,1743,830,0,3984,8715,0,0,249,913,0,581,415,747,415,83,0,83,0,0,415,0,166,2324,166,0,498,0,0,415,0,1909,83,581,0,747,0,498,0,498,166,664,0,3901,0,83,913,332,0,664,415,830,166,83,581,913,0,664,83,0,166,249,0,415,166,0,1245,166,0,0,3154,83,83,830,166,747,166,,,,437.0,85.0,29.0,21.0
