### Imports 

In [1]:
import os
import urllib
from blazingsql import BlazingContext
# connect to BlazingSQL
bc = BlazingContext(pool=False)

BlazingContext ready


### Data check / Download
The next cell will check if you have the data for this demo, and, if you don't, will download it for you.

In [2]:
%%time
# tag url, data directory & file name
base_url = 'https://s3.amazonaws.com/nyc-tlc/trip+data/'
data_dir = 'data/'
fn = f'yellow_tripdata_2015-01.csv'

# tag cloud & relative local paths to data
local_data = data_dir + fn
cloud_data = base_url + fn

# does data directory exist?
if not os.path.exists(data_dir):
    print('creating data directory')
    # create folder
    os.system('mkdir data')

# do we already have the file?
if not os.path.isfile(local_data):
    # we don't, let me know we're downloading it now, then download it
    print(f'Downloading {cloud_data[8:]} to {local_data}')
    urllib.request.urlretrieve(cloud_data, local_data)
# we already have data
else:
    # let me know this is the case
    print(f'{local_data} already downloaded')
    
# tag path to the data
data_path = f'{os.getcwd()}/{data_dir}{fn}'

data/yellow_tripdata_2015-01.csv already downloaded
CPU times: user 287 µs, sys: 117 µs, total: 404 µs
Wall time: 226 µs


### Create Table

In [3]:
%%time
# create a BlazingSQL table from the data
bc.create_table('taxi', data_path, header=0)

CPU times: user 15.1 ms, sys: 0 ns, total: 15.1 ms
Wall time: 13 ms


<pyblazing.apiv2.context.BlazingTable at 0x7f9228c09898>

In [4]:
# how's the table look? (> 12.7M rows)
bc.sql('select * from taxi').tail(3)

Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount
12748983,1,2015-01-10 19:01:44,2015-01-10 19:15:01,1,3.4,-73.979324,40.74955,1,N,-73.969101,40.7878,2,13.5,0.0,0.5,0.0,0.0,0.3,14.3
12748984,1,2015-01-10 19:01:44,2015-01-10 19:17:03,1,1.3,-73.999565,40.738483,1,N,-73.981819,40.737652,1,10.5,0.0,0.5,2.25,0.0,0.3,13.55
12748985,1,2015-01-10 19:01:45,2015-01-10 19:07:33,1,0.7,-73.96035,40.766399,1,N,-73.968643,40.760777,2,5.5,0.0,0.5,0.0,0.0,0.3,6.3


# BlazingSQL DateTime Functions
In the next cell, we define a query which first converts our pickup times (`tpep_pickup_datetime`) from string values to timestamp values (utilizing concat `||` to add milliseconds so the conversion is clean [BlazingDB/blazingsql#269](https://github.com/BlazingDB/blazingsql/issues/269)), and then call on BlazingSQL's DateTime Functions to pull the YEAR, MONTH, DAYOFMONTH, HOUR, MINUTE, SECOND values from each instance.

For more on DateTime Functions in BlazingSQL, see [the docs]((https://docs.blazingdb.com/docs/datetime-functions)).

In [5]:
# add miliseconds to datetime values, then convert to timestamp & extract broken down values
time_query = '''
             select
                 year(ts) as years,
                 month(ts) as months,
                 dayofmonth(ts) as days,
                 hour(ts) as hours,
                 minute(ts) as minutes,
                 second(ts) as seconds,
                 ts
             from (
                 select
                     cast(tpep_pickup_datetime || '.000' as timestamp) ts
                 from 
                     taxi
                     )
                     '''

In [6]:
%%time
# execute query 
bc.sql(time_query)

CPU times: user 9.41 s, sys: 6.02 s, total: 15.4 s
Wall time: 15.2 s


Unnamed: 0,years,months,days,hours,minutes,seconds,ts
0,2015,1,15,19,5,39,2015-01-15 19:05:39
1,2015,1,10,20,33,38,2015-01-10 20:33:38
2,2015,1,10,20,33,38,2015-01-10 20:33:38
3,2015,1,10,20,33,39,2015-01-10 20:33:39
4,2015,1,10,20,33,39,2015-01-10 20:33:39
...,...,...,...,...,...,...,...
12748981,2015,1,10,19,1,44,2015-01-10 19:01:44
12748982,2015,1,10,19,1,44,2015-01-10 19:01:44
12748983,2015,1,10,19,1,44,2015-01-10 19:01:44
12748984,2015,1,10,19,1,44,2015-01-10 19:01:44


### Nested DateTime Functions
By nesting our origional query, we can easily focus our date to specific date ranges. Say, January 25-31 of 2015.

In [7]:
%%time
# pull broken down datetime values from 1-7 Jan 2017
fin_jan = f'''
          select
              *
          from (
              {time_query}
              )
              where
                  years = 2015
                  and months = 1
                  and days > 24
                  '''
# run the query
bc.sql(fin_jan)

CPU times: user 9.59 s, sys: 5.54 s, total: 15.1 s
Wall time: 15.1 s


Unnamed: 0,years,months,days,hours,minutes,seconds,ts
0,2015,1,25,0,13,4,2015-01-25 00:13:04
1,2015,1,25,0,13,5,2015-01-25 00:13:05
2,2015,1,25,0,13,5,2015-01-25 00:13:05
3,2015,1,25,0,13,5,2015-01-25 00:13:05
4,2015,1,25,0,13,6,2015-01-25 00:13:06
...,...,...,...,...,...,...,...
2588953,2015,1,26,6,45,6,2015-01-26 06:45:06
2588954,2015,1,26,6,45,7,2015-01-26 06:45:07
2588955,2015,1,26,6,45,7,2015-01-26 06:45:07
2588956,2015,1,26,6,45,8,2015-01-26 06:45:08


Or say we wanted all values from this date range, but not the broken down datetime values.

In [8]:
%%time
# pull all values from Jan 25-31 2015
query = '''
        select 
            *
        from (
            select 
                *,
                cast(tpep_pickup_datetime || '.000' as timestamp) ts
            from
                taxi
                )
            where
                year(ts) = 2015
                and month(ts) = 1
                and dayofmonth(ts) > 24
                '''
# run & display query 
bc.sql(query)

CPU times: user 5.37 s, sys: 3.06 s, total: 8.43 s
Wall time: 8.38 s


Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,ts
0,2,2015-01-25 00:13:04,2015-01-25 00:13:18,1,0.02,-73.994812,40.727741,1,N,-73.996941,40.725559,2,2.5,0.5,0.5,0.00,0.0,0.3,3.80,2015-01-25 00:13:04
1,2,2015-01-25 00:13:05,2015-01-25 00:29:11,1,1.73,-73.985939,40.726765,1,N,-74.006546,40.737865,2,11.0,0.5,0.5,0.00,0.0,0.3,12.30,2015-01-25 00:13:05
2,2,2015-01-25 00:13:05,2015-01-25 00:25:43,1,2.25,-73.979988,40.743275,1,N,-74.008087,40.739056,1,10.5,0.5,0.5,2.36,0.0,0.3,14.16,2015-01-25 00:13:05
3,2,2015-01-25 00:13:05,2015-01-25 00:25:50,1,3.53,-73.988968,40.721680,1,N,-73.978729,40.762470,2,12.5,0.5,0.5,0.00,0.0,0.3,13.80,2015-01-25 00:13:05
4,2,2015-01-25 00:13:06,2015-01-25 00:23:26,1,2.01,-73.948685,40.782360,1,N,-73.957085,40.801991,1,9.5,0.5,0.5,2.16,0.0,0.3,12.96,2015-01-25 00:13:06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2588953,2,2015-01-26 06:45:06,2015-01-26 06:52:29,1,1.65,-74.016037,40.715496,1,N,-74.007500,40.703526,2,8.0,0.0,0.5,0.00,0.0,0.3,8.80,2015-01-26 06:45:06
2588954,2,2015-01-26 06:45:07,2015-01-26 06:48:36,1,1.63,-73.984161,40.737431,1,N,-73.972000,40.757381,1,6.5,0.0,0.5,1.46,0.0,0.3,8.76,2015-01-26 06:45:07
2588955,2,2015-01-26 06:45:07,2015-01-26 06:54:27,1,2.08,-73.985512,40.778576,1,N,-73.987740,40.759686,1,9.0,0.0,0.5,1.96,0.0,0.3,11.76,2015-01-26 06:45:07
2588956,2,2015-01-26 06:45:08,2015-01-26 06:50:45,6,0.97,-74.006798,40.715832,1,N,-74.007805,40.705593,1,5.5,0.0,0.5,1.00,0.0,0.3,7.30,2015-01-26 06:45:08
