# Test Database - Gaps Table
First, make a simple **gaps** table, then fill it with data from other tables.

In [None]:
%%bigquery execute -t datalab-projects-1331:xjk_algo_comp_test.gaps -m create
SELECT district_id, FIRST(orders.timeslot) AS timeslot, FIRST(orders.date) AS date,
  FIRST(day_in_week) AS day_in_week, FIRST(timeofday_slot) AS timeofday_slot,
  SUM(price) AS sum_price, AVG(price) AS avg_price,
  SUM(IF(driver_id = 'NULL', 1, 0)) AS gap
FROM [datalab-projects-1331:xjk_algo_comp_test.orders] AS orders
JOIN [datalab-projects-1331:xjk_algo_comp_test.districts] AS districts 
  ON orders.start_district_hash = districts.district_hash
GROUP BY district_id, orders.timeslot

In [None]:
%% sql -q tester
SELECT SUM(price) FROM datalab-projects-1331:xjk_algo_comp_test.orders
WHERE start_district_hash = 'd4ec2125aff74eded207d2d915ef682f'
  AND (time LIKE "%2016-01-01 00:5%")
  
# Reference only. Use above query to check if aggregation was correct, by comparing the
# result of that from sum of district_hash = 51 and timeslot "2016-01-01-6".

# +1 timeslot
We are going to use past data to predict the gaps for the next 10 minutes (or in other words, the next timeslot). To do this, we need to update timeslot related information. Specifically we are going to do the following:
1. Add `timeslot` feature by one:
  - '2016-01-45' to '2016-01-46'
  - '2016-01-144' to '2016-02-1'
2. Update `date` and `slot` features accordingly.
3. Recalculate `day_in_week` feature.

In [None]:
%%bigquery udf --module gaps_plus_one_timeslot
/**
 * Pad with 0 or given string.
 *
 * @param int n Number to add padding to.
 * @param int width Width of number + padding.
 * @param string z (Optional) Other string to replace '0' as padding.
 */
function pad(n, width, z) {
  z = z || '0';
  n = n + '';
  return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n;
}

/**
 * Add one timeslot and adjust other relevant tables.
 *
 * @param {{district_id: integer, timeslot: string, date: string, day_in_week: integer,
            timeofday_slot: integer, sum_price: float, avg_price: float, gap: integer}} r
 * @param function({{district_id: integer, timeslot: string, date: string, day_in_week: integer,
                timeofday_slot: integer, sum_price: float, avg_price: float, gap: integer}}) emitFn
 */
function(r, emitFn) {
  
  var t = r.timeslot.split(/-/);
  var oldslot = parseInt(t[3]);
  var newslot = oldslot + 1;
  var d = new Date(parseInt(t[0]), parseInt(t[1])-1, parseInt(t[2]));
  
  if (oldslot == 144) {
    newslot = 1;
    d = d.setDate(d.getDate() + 1);
    r.day_in_week = d.getDay();
    r.date = d.getFullYear() + '-' + pad(d.getMonth()+1, 2) +
             '-' + pad(d.getDate(), 2);
  }
  r.timeslot = d.getFullYear() + '-' + pad(d.getMonth()+1, 2) +
               '-' + pad(d.getDate(), 2) + '-' + newslot;

  r.timeofday_slot = newslot;
  emitFn(r);
}

In [None]:
%%bigquery execute -t datalab-projects-1331:xjk_algo_comp_test.future_gaps -m create

SELECT district_id, timeslot, date, day_in_week, timeofday_slot, sum_price, avg_price, gap
FROM gaps_plus_one_timeslot([datalab-projects-1331:xjk_algo_comp_test.gaps])

# Gaps Table + Other Tables

In [None]:
%%bigquery execute -t datalab-projects-1331:xjk_algo_comp_test.future_gaps_processed -m create

SELECT FIRST(gaps.district_id) AS district_id, FIRST(gaps.timeslot) AS timeslot, FIRST(gaps.date) AS date,
  FIRST(gaps.day_in_week) AS day_in_week, IF(FIRST(gaps.day_in_week) = 0, 1, 0) AS is_sunday,
  FIRST(gaps.timeofday_slot) AS timeofday_slot,
  FIRST(gaps.sum_price) AS sum_price, FIRST(gaps.avg_price) AS avg_price, FIRST(gaps.gap) AS gap,
  FIRST(pois.f1) AS f1, FIRST(pois.f11) AS f11, FIRST(pois.f11_1) AS f11_1, 
  FIRST(pois.f11_2) AS f11_2, FIRST(pois.f11_3) AS f11_3, FIRST(pois.f11_4) AS f11_4, 
  FIRST(pois.f11_5) AS f11_5, FIRST(pois.f11_6) AS f11_6, FIRST(pois.f11_7) AS f11_7, 
  FIRST(pois.f11_8) AS f11_8, FIRST(pois.f13_4) AS f13_4, FIRST(pois.f13_8) AS f13_8, 
  FIRST(pois.f14) AS f14, FIRST(pois.f14_1) AS f14_1, FIRST(pois.f14_10) AS f14_10, 
  FIRST(pois.f14_2) AS f14_2, FIRST(pois.f14_3) AS f14_3, FIRST(pois.f14_6) AS f14_6, 
  FIRST(pois.f14_8) AS f14_8, FIRST(pois.f15) AS f15, FIRST(pois.f15_1) AS f15_1, 
  FIRST(pois.f15_2) AS f15_2, FIRST(pois.f15_3) AS f15_3, FIRST(pois.f15_4) AS f15_4, 
  FIRST(pois.f15_6) AS f15_6, FIRST(pois.f15_7) AS f15_7, FIRST(pois.f15_8) AS f15_8, 
  FIRST(pois.f16) AS f16, FIRST(pois.f16_1) AS f16_1, FIRST(pois.f16_10) AS f16_10, 
  FIRST(pois.f16_11) AS f16_11, FIRST(pois.f16_12) AS f16_12, FIRST(pois.f16_3) AS f16_3, 
  FIRST(pois.f16_4) AS f16_4, FIRST(pois.f16_6) AS f16_6, FIRST(pois.f17) AS f17, 
  FIRST(pois.f17_2) AS f17_2, FIRST(pois.f17_3) AS f17_3, FIRST(pois.f17_4) AS f17_4, 
  FIRST(pois.f17_5) AS f17_5, FIRST(pois.f19) AS f19, FIRST(pois.f19_1) AS f19_1, 
  FIRST(pois.f19_2) AS f19_2, FIRST(pois.f19_3) AS f19_3, FIRST(pois.f19_4) AS f19_4, 
  FIRST(pois.f1_1) AS f1_1, FIRST(pois.f1_10) AS f1_10, FIRST(pois.f1_11) AS f1_11, 
  FIRST(pois.f1_2) AS f1_2, FIRST(pois.f1_3) AS f1_3, FIRST(pois.f1_4) AS f1_4, 
  FIRST(pois.f1_5) AS f1_5, FIRST(pois.f1_6) AS f1_6, FIRST(pois.f1_7) AS f1_7, 
  FIRST(pois.f1_8) AS f1_8, FIRST(pois.f20) AS f20, FIRST(pois.f20_1) AS f20_1, 
  FIRST(pois.f20_2) AS f20_2, FIRST(pois.f20_4) AS f20_4, FIRST(pois.f20_5) AS f20_5, 
  FIRST(pois.f20_6) AS f20_6, FIRST(pois.f20_7) AS f20_7, FIRST(pois.f20_8) AS f20_8, 
  FIRST(pois.f20_9) AS f20_9, FIRST(pois.f21_1) AS f21_1, FIRST(pois.f21_2) AS f21_2, 
  FIRST(pois.f22) AS f22, FIRST(pois.f22_1) AS f22_1, FIRST(pois.f22_2) AS f22_2, 
  FIRST(pois.f22_3) AS f22_3, FIRST(pois.f22_4) AS f22_4, FIRST(pois.f22_5) AS f22_5, 
  FIRST(pois.f23) AS f23, FIRST(pois.f23_1) AS f23_1, FIRST(pois.f23_2) AS f23_2, 
  FIRST(pois.f23_3) AS f23_3, FIRST(pois.f23_4) AS f23_4, FIRST(pois.f23_5) AS f23_5, 
  FIRST(pois.f23_6) AS f23_6, FIRST(pois.f24) AS f24, FIRST(pois.f24_1) AS f24_1, 
  FIRST(pois.f24_2) AS f24_2, FIRST(pois.f24_3) AS f24_3, FIRST(pois.f25) AS f25, 
  FIRST(pois.f25_1) AS f25_1, FIRST(pois.f25_3) AS f25_3, FIRST(pois.f25_7) AS f25_7, 
  FIRST(pois.f25_8) AS f25_8, FIRST(pois.f25_9) AS f25_9, FIRST(pois.f2_1) AS f2_1, 
  FIRST(pois.f2_10) AS f2_10, FIRST(pois.f2_11) AS f2_11, FIRST(pois.f2_12) AS f2_12, 
  FIRST(pois.f2_13) AS f2_13, FIRST(pois.f2_2) AS f2_2, FIRST(pois.f2_4) AS f2_4, 
  FIRST(pois.f2_5) AS f2_5, FIRST(pois.f2_6) AS f2_6, FIRST(pois.f2_7) AS f2_7, 
  FIRST(pois.f2_8) AS f2_8, FIRST(pois.f3_1) AS f3_1, FIRST(pois.f3_2) AS f3_2, 
  FIRST(pois.f3_3) AS f3_3, FIRST(pois.f4) AS f4, FIRST(pois.f4_1) AS f4_1, 
  FIRST(pois.f4_10) AS f4_10, FIRST(pois.f4_11) AS f4_11, FIRST(pois.f4_13) AS f4_13, 
  FIRST(pois.f4_14) AS f4_14, FIRST(pois.f4_16) AS f4_16, FIRST(pois.f4_17) AS f4_17, 
  FIRST(pois.f4_18) AS f4_18, FIRST(pois.f4_2) AS f4_2, FIRST(pois.f4_3) AS f4_3, 
  FIRST(pois.f4_5) AS f4_5, FIRST(pois.f4_6) AS f4_6, FIRST(pois.f4_7) AS f4_7, 
  FIRST(pois.f4_8) AS f4_8, FIRST(pois.f4_9) AS f4_9, FIRST(pois.f5) AS f5, 
  FIRST(pois.f5_1) AS f5_1, FIRST(pois.f5_3) AS f5_3, FIRST(pois.f5_4) AS f5_4, 
  FIRST(pois.f6) AS f6, FIRST(pois.f6_1) AS f6_1, FIRST(pois.f6_2) AS f6_2, 
  FIRST(pois.f6_4) AS f6_4, FIRST(pois.f7) AS f7, FIRST(pois.f8) AS f8, 
  FIRST(pois.f8_1) AS f8_1, FIRST(pois.f8_2) AS f8_2, FIRST(pois.f8_3) AS f8_3, 
  FIRST(pois.f8_4) AS f8_4, FIRST(pois.f8_5) AS f8_5,
  FIRST(weather.weather) AS weather,
  FIRST(weather.temperature) AS weather_temperature, FIRST(weather.pm25) AS weather_pm25,
  FIRST(traffic.tj_level1) AS traffic_tj_level1, FIRST(traffic.tj_level2) AS traffic_tj_level2,
  FIRST(traffic.tj_level3) AS traffic_tj_level3, FIRST(traffic.tj_level4) AS traffic_tj_level4
FROM [datalab-projects-1331:xjk_algo_comp_test.future_gaps] as gaps
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.districts] as districts
  ON districts.district_id = gaps.district_id
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.pois] as pois
  ON pois.district_hash = districts.district_hash
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.weather] as weather
  ON weather.timeslot = gaps.timeslot
LEFT JOIN [datalab-projects-1331:xjk_algo_comp_test.traffic] as traffic
  ON traffic.timeslot = gaps.timeslot
  AND traffic.district_hash = districts.district_hash
GROUP BY gaps.district_id, gaps.timeslot

# PCA
Combine POIs (features starting with 'f...') into n number of features.

In [5]:
from sklearn.decomposition import PCA
import pandas as pd
import gcp.bigquery as bq
import numpy as np

import pdb

In [6]:
pois = """
f1	f11	f11_1	f11_2	f11_3	f11_4	f11_5	f11_6	f11_7	f11_8	f13_4	f13_8	
f14	f14_1	f14_10	f14_2	f14_3	f14_6	f14_8	f15	f15_1	f15_2	f15_3	f15_4	
f15_6	f15_7	f15_8	f16	f16_1	f16_10	f16_11	f16_12	f16_3	f16_4	f16_6	f17	
f17_2	f17_3	f17_4	f17_5	f19	f19_1	f19_2	f19_3	f19_4	f1_1	f1_10	f1_11	
f1_2	f1_3	f1_4	f1_5	f1_6	f1_7	f1_8	f20	f20_1	f20_2	f20_4	f20_5	
f20_6	f20_7	f20_8	f20_9	f21_1	f21_2	f22	f22_1	f22_2	f22_3	f22_4	f22_5	
f23	f23_1	f23_2	f23_3	f23_4	f23_5	f23_6	f24	f24_1	f24_2	f24_3	f25	f25_1	
f25_3	f25_7	f25_8	f25_9	f2_1	f2_10	f2_11	f2_12	f2_13	f2_2	f2_4	f2_5	
f2_6	f2_7	f2_8	f3_1	f3_2	f3_3	f4	f4_1	f4_10	f4_11	f4_13	f4_14	
f4_16	f4_17	f4_18	f4_2	f4_3	f4_5	f4_6	f4_7	f4_8	f4_9	f5	f5_1	
f5_3	f5_4	f6	f6_1	f6_2	f6_4	f7	f8	f8_1	f8_2	f8_3	f8_4	f8_5
"""
pois = map(lambda x: "{}".format(x.strip()), pois.split('\t'))
print(pois)

['f1', 'f11', 'f11_1', 'f11_2', 'f11_3', 'f11_4', 'f11_5', 'f11_6', 'f11_7', 'f11_8', 'f13_4', 'f13_8', 'f14', 'f14_1', 'f14_10', 'f14_2', 'f14_3', 'f14_6', 'f14_8', 'f15', 'f15_1', 'f15_2', 'f15_3', 'f15_4', 'f15_6', 'f15_7', 'f15_8', 'f16', 'f16_1', 'f16_10', 'f16_11', 'f16_12', 'f16_3', 'f16_4', 'f16_6', 'f17', 'f17_2', 'f17_3', 'f17_4', 'f17_5', 'f19', 'f19_1', 'f19_2', 'f19_3', 'f19_4', 'f1_1', 'f1_10', 'f1_11', 'f1_2', 'f1_3', 'f1_4', 'f1_5', 'f1_6', 'f1_7', 'f1_8', 'f20', 'f20_1', 'f20_2', 'f20_4', 'f20_5', 'f20_6', 'f20_7', 'f20_8', 'f20_9', 'f21_1', 'f21_2', 'f22', 'f22_1', 'f22_2', 'f22_3', 'f22_4', 'f22_5', 'f23', 'f23_1', 'f23_2', 'f23_3', 'f23_4', 'f23_5', 'f23_6', 'f24', 'f24_1', 'f24_2', 'f24_3', 'f25', 'f25_1', 'f25_3', 'f25_7', 'f25_8', 'f25_9', 'f2_1', 'f2_10', 'f2_11', 'f2_12', 'f2_13', 'f2_2', 'f2_4', 'f2_5', 'f2_6', 'f2_7', 'f2_8', 'f3_1', 'f3_2', 'f3_3', 'f4', 'f4_1', 'f4_10', 'f4_11', 'f4_13', 'f4_14', 'f4_16', 'f4_17', 'f4_18', 'f4_2', 'f4_3', 'f4_5', 'f4_6', 'f

In [7]:
%%sql --module q
SELECT * FROM [datalab-projects-1331:xjk_algo_comp_test.future_gaps_processed]

In [11]:
query = bq.Query(q)
tableresult = query.results()

poi_fields = ['f1', 'f11', 'f11_1', 'f11_2', 'f11_3', 'f11_4', 'f11_5', 'f11_6', 'f11_7', 'f11_8',
              'f13_4', 'f13_8', 'f14', 'f14_1', 'f14_10', 'f14_2', 'f14_3', 'f14_6', 'f14_8', 'f15',
              'f15_1', 'f15_2', 'f15_3', 'f15_4', 'f15_6', 'f15_7', 'f15_8', 'f16', 'f16_1', 'f16_10',
              'f16_11', 'f16_12', 'f16_3', 'f16_4', 'f16_6', 'f17', 'f17_2', 'f17_3', 'f17_4', 'f17_5',
              'f19', 'f19_1', 'f19_2', 'f19_3', 'f19_4', 'f1_1', 'f1_10', 'f1_11', 'f1_2', 'f1_3', 'f1_4',
              'f1_5', 'f1_6', 'f1_7', 'f1_8', 'f20', 'f20_1', 'f20_2', 'f20_4', 'f20_5', 'f20_6', 'f20_7',
              'f20_8', 'f20_9', 'f21_1', 'f21_2', 'f22', 'f22_1', 'f22_2', 'f22_3', 'f22_4', 'f22_5',
              'f23', 'f23_1', 'f23_2', 'f23_3', 'f23_4', 'f23_5', 'f23_6', 'f24', 'f24_1', 'f24_2',
              'f24_3', 'f25', 'f25_1', 'f25_3', 'f25_7', 'f25_8', 'f25_9', 'f2_1', 'f2_10', 'f2_11',
              'f2_12', 'f2_13', 'f2_2', 'f2_4', 'f2_5', 'f2_6', 'f2_7', 'f2_8', 'f3_1', 'f3_2', 'f3_3',
              'f4', 'f4_1', 'f4_10', 'f4_11', 'f4_13', 'f4_14', 'f4_16', 'f4_17', 'f4_18', 'f4_2',
              'f4_3', 'f4_5', 'f4_6', 'f4_7', 'f4_8', 'f4_9', 'f5', 'f5_1', 'f5_3', 'f5_4', 'f6', 'f6_1',
              'f6_2', 'f6_4', 'f7', 'f8', 'f8_1', 'f8_2', 'f8_3', 'f8_4', 'f8_5']
all_fields = tableresult[0].keys()
all_data = []
pois_data = np.zeros((tableresult.length, len(poi_fields)))
print 'there are {} rows'.format(tableresult.length)
for rcounter, row in enumerate(tableresult):
  for fcounter, field in enumerate(poi_fields):
    pois_data[rcounter, fcounter] = row.pop(field, None)
  all_data.append(row)
  if rcounter % 5000 == 0:
    print 'processed {} rows'.format(rcounter)
    


there are 7924 rows
processed 0 rows
processed 5000 rows


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

{u'avg_price': 32.8,
 u'date': u'2016-01-22',
 u'day_in_week': 5,
 u'district_id': 43,
 u'gap': 2,
 u'is_sunday': 0,
 u'sum_price': 164.0,
 u'timeofday_slot': 94,
 u'timeslot': u'2016-01-22-94',
 u'traffic_tj_level1': None,
 u'traffic_tj_level2': None,
 u'traffic_tj_level3': None,
 u'traffic_tj_level4': None,
 u'weather': None,
 u'weather_pm25': None,
 u'weather_temperature': None}

In [9]:
pca = PCA(n_components=5)
pois_data_s = pca.fit_transform(pois_data)

for rcounter, row in enumerate(all_data):
  for index in range(pois_data_s.shape[1]):
    all_data[rcounter]['poi{}'.format(index+1)] = pois_data_s[rcounter, index]

In [19]:
all_data_df = pd.DataFrame(all_data)
all_data_df

Unnamed: 0,avg_price,date,day_in_week,district_id,gap,is_sunday,sum_price,timeofday_slot,timeslot,traffic_tj_level1,traffic_tj_level2,traffic_tj_level3,traffic_tj_level4,weather,weather_pm25,weather_temperature
0,32.800000,2016-01-22,5,43,2,0,164.0,94,2016-01-22-94,,,,,,,
1,28.000000,2016-01-22,5,43,1,0,28.0,80,2016-01-22-80,178.0,4.0,4.0,2.0,,,
2,18.499265,2016-01-22,5,46,37,0,2515.9,70,2016-01-22-70,,,,,,,
3,24.037037,2016-01-22,5,46,7,0,1298.0,141,2016-01-22-141,1472.0,269.0,89.0,58.0,6.0,98.0,1.0
4,13.950000,2016-01-22,5,56,0,0,55.8,57,2016-01-22-57,268.0,32.0,12.0,12.0,4.0,53.0,1.0
5,39.164706,2016-01-22,5,56,0,0,665.8,104,2016-01-22-104,242.0,37.0,9.0,6.0,,,
6,14.827273,2016-01-22,5,6,1,0,163.1,117,2016-01-22-117,379.0,127.0,45.0,31.0,4.0,52.0,1.0
7,22.428571,2016-01-22,5,6,0,0,157.0,142,2016-01-22-142,,,,,,,
8,6.695652,2016-01-22,5,66,0,0,154.0,81,2016-01-22-81,241.0,69.0,10.0,16.0,,,
9,23.812500,2016-01-22,5,18,0,0,190.5,116,2016-01-22-116,437.0,85.0,29.0,21.0,,,


In [20]:
df.where((pd.notnull(df)), None)
all_data_df['traffic_tj_level1'] = all_data_df['traffic_tj_level1'].astype('int64')
all_data_df['traffic_tj_level2'] = all_data_df['traffic_tj_level2'].astype('int64')
all_data_df['traffic_tj_level3'] = all_data_df['traffic_tj_level3'].astype('int64')
all_data_df['traffic_tj_level4'] = all_data_df['traffic_tj_level4'].astype('int64')
all_data_df['weather'] = all_data_df['weather'].astype('int64')
all_data_df.dtypes

ValueError: Cannot convert NA to integer

In [21]:
all_data_df['traffic_tj_level1'] = all_data_df['traffic_tj_level1']

0          NaN
1        178.0
2          NaN
3       1472.0
4        268.0
5        242.0
6        379.0
7          NaN
8        241.0
9        437.0
10       414.0
11      1582.0
12      1635.0
13      1607.0
14      1491.0
15         NaN
16       297.0
17       818.0
18         NaN
19      1061.0
20      1078.0
21         NaN
22         NaN
23         NaN
24       455.0
25         NaN
26        87.0
27         NaN
28       301.0
29       331.0
         ...  
7894       NaN
7895       NaN
7896       NaN
7897       NaN
7898       NaN
7899       NaN
7900    1439.0
7901       NaN
7902    1491.0
7903      80.0
7904     284.0
7905     359.0
7906     190.0
7907     200.0
7908       NaN
7909     800.0
7910       NaN
7911       NaN
7912     699.0
7913       NaN
7914     378.0
7915     428.0
7916     389.0
7917       NaN
7918     299.0
7919     306.0
7920    1392.0
7921    1508.0
7922       NaN
7923      93.0
Name: traffic_tj_level1, dtype: float64

In [15]:
schema = bq.Schema([
    {'name': 'district_id', 'type': 'INTEGER'},
    {'name': 'timeslot', 'type': 'STRING'},
    {'name': 'date', 'type': 'STRING'},
    {'name': 'timeofday_slot', 'type': 'INTEGER'},
    {'name': 'day_in_week', 'type': 'INTEGER'},
    {'name': 'is_sunday', 'type': 'INTEGER'},
    {'name': 'sum_price', 'type': 'FLOAT'},
    {'name': 'avg_price', 'type': 'FLOAT'},
    {'name': 'poi1', 'type': 'FLOAT'},
    {'name': 'poi2', 'type': 'FLOAT'},
    {'name': 'poi3', 'type': 'FLOAT'},
    {'name': 'poi4', 'type': 'FLOAT'},
    {'name': 'poi5', 'type': 'FLOAT'},
    {'name': 'traffic_tj_level1', 'type': 'INTEGER'},
    {'name': 'traffic_tj_level2', 'type': 'INTEGER'},
    {'name': 'traffic_tj_level3', 'type': 'INTEGER'},
    {'name': 'traffic_tj_level4', 'type': 'INTEGER'},
    {'name': 'weather', 'type': 'INTEGER'},
    {'name': 'weather_pm25', 'type': 'FLOAT'},
    {'name': 'weather_temperature', 'type': 'FLOAT'},
    {'name': 'gap', 'type': 'INTEGER'}
])
table = bq.Table('datalab-projects-1331:xjk_algo_comp_test.future_gaps_final1')
table.create(schema)
table.insert_data(all_data_df)

Exception: Field traffic_tj_level1 in data has type FLOAT but in table has type INTEGER