In [1]:
! pip install -U scikit-learn

Requirement already up-to-date: scikit-learn in /opt/conda/lib/python3.6/site-packages
[33mYou are using pip version 9.0.1, however version 10.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [4]:
import matplotlib.pyplot as plt 
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pyspark.sql.functions as F

from pyspark import SparkContext as sc
from pyspark.sql.functions import col, split
import pyspark.sql as SQL
from pyspark.sql.functions import unix_timestamp, from_unixtime, date_format, \
        from_utc_timestamp, to_utc_timestamp, date_format, dayofmonth
import datetime
import calendar
import pandas as pd
from pyspark.ml.linalg import Vectors
from pyspark.ml.regression import LinearRegression
#from pyspark.ml.feature import OneHotEncoderEstimator


In [5]:
spark = SparkSession.builder.appName('Jan-01').getOrCreate()

In [6]:
# Download and decompress data into your Jupyter environment; abreviated jan 2017 data
jan_2017 = spark.read.format("csv").load('yellow_tripdata_half.csv', header = True).cache()

In [7]:
#need to get two dataframes to merge on, or else get cartesian product error
taxi_zone = spark.read.format("csv").load('taxi+_zone_lookup.csv', header = True)
taxi_zone2 = spark.read.format("csv").load('taxi+_zone_lookup.csv', header = True)


In [8]:
taxi_zone.printSchema()

root
 |-- LocationID: string (nullable = true)
 |-- Borough: string (nullable = true)
 |-- Zone: string (nullable = true)
 |-- service_zone: string (nullable = true)



In [9]:
#merging to get destination information
jan_2017 = jan_2017.join(taxi_zone, jan_2017.PULocationID == taxi_zone.LocationID, "left_outer"). \
                withColumnRenamed("Borough", "PUBorough").withColumnRenamed("Zone", "PUZone").withColumnRenamed("service_zone", "PUServiceZone").cache()

In [10]:
#merging to get destination information
jan_2017 = jan_2017.join(taxi_zone2, jan_2017.DOLocationID == taxi_zone2.LocationID, "left_outer"). \
                withColumnRenamed("Borough", "DOBorough").withColumnRenamed("Zone", "DOZone").withColumnRenamed("service_zone", "DOServiceZone")

In [11]:
#jan_2017 = jan_2017.drop("LocationID")

In [12]:
#encoding if destination is an aiport
jan_2017 = jan_2017.withColumn("AirportDO", \
                               F.when((jan_2017["DOLocationID"] == '138' ) & \
                                      (jan_2017["DOLocationID"] == '132'),1).otherwise(0))

In [13]:
jan_2017.count()

5044667

In [14]:
jan_2017.printSchema()

root
 |-- VendorID: string (nullable = true)
 |-- tpep_pickup_datetime: string (nullable = true)
 |-- tpep_dropoff_datetime: string (nullable = true)
 |-- passenger_count: string (nullable = true)
 |-- trip_distance: string (nullable = true)
 |-- RatecodeID: string (nullable = true)
 |-- store_and_fwd_flag: string (nullable = true)
 |-- PULocationID: string (nullable = true)
 |-- DOLocationID: string (nullable = true)
 |-- payment_type: string (nullable = true)
 |-- fare_amount: string (nullable = true)
 |-- extra: string (nullable = true)
 |-- mta_tax: string (nullable = true)
 |-- tip_amount: string (nullable = true)
 |-- tolls_amount: string (nullable = true)
 |-- improvement_surcharge: string (nullable = true)
 |-- total_amount: string (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- PUBorough: string (nullable = true)
 |-- PUZone: string (nullable = true)
 |-- PUServiceZone: string (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- DOBorough: string (

In [15]:
jan_2017.show(10)

+--------+--------------------+---------------------+---------------+-------------+----------+------------------+------------+------------+------------+-----------+-----+-------+----------+------------+---------------------+------------+----------+---------+---------+-------------+----------+---------+-------------------+-------------+---------+
|VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|passenger_count|trip_distance|RatecodeID|store_and_fwd_flag|PULocationID|DOLocationID|payment_type|fare_amount|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|LocationID|PUBorough|   PUZone|PUServiceZone|LocationID|DOBorough|             DOZone|DOServiceZone|AirportDO|
+--------+--------------------+---------------------+---------------+-------------+----------+------------------+------------+------------+------------+-----------+-----+-------+----------+------------+---------------------+------------+----------+---------+---------+-------------+----------+---------+-

In [16]:
#splitting date and time into different columns, casting date into date type
split_pickup_col = split(jan_2017['tpep_pickup_datetime'], ' ')
split_dropoff_col = split(jan_2017['tpep_pickup_datetime'], ' ')
jan_2017 = jan_2017.withColumn("PUDate", split_pickup_col.getItem(0).cast(DateType()))
jan_2017 = jan_2017.withColumn("PUTime", split_pickup_col.getItem(1))
jan_2017 = jan_2017.withColumn("DODate", split_dropoff_col.getItem(0).cast(DateType()))
jan_2017 = jan_2017.withColumn("DOTime", split_dropoff_col.getItem(1))

In [17]:
#splitting time into hour and minute; will round minute to nearest 5 minutes
split_PUTime = split(jan_2017['PUTime'], ':')
jan_2017 = jan_2017.withColumn("PUHour", split_PUTime.getItem(0).cast(IntegerType()))
jan_2017 = jan_2017.withColumn("PUMinute", split_PUTime.getItem(1).cast(IntegerType()))

split_DOTime = split(jan_2017['DOTime'], ':')
jan_2017 = jan_2017.withColumn("DOHour", split_DOTime.getItem(0).cast(IntegerType()))
jan_2017 = jan_2017.withColumn("DOMinute", split_DOTime.getItem(1).cast(IntegerType()))

In [19]:
#rounding down mintue to closest 5 minute mark (computationally easier)
jan_2017 = jan_2017.withColumn("DOMinute", (jan_2017.DOMinute - jan_2017.DOMinute%5))
jan_2017 = jan_2017.withColumn("PUMinute", (jan_2017.PUMinute - jan_2017.PUMinute%5))

In [27]:
#DOW gives you 1 (Monday) - 7 (Sunday)
jan_2017 = jan_2017.withColumn("PU_DOW",  date_format(jan_2017.PUDate, 'u').cast(ShortType()))
jan_2017 = jan_2017.withColumn("DO_DOW",  date_format(jan_2017.DODate, 'u').cast(ShortType()))

In [26]:
#encoding if destination is a weekend
jan_2017 = jan_2017.withColumn("Weekend", \
                               F.when((jan_2017["PU_DOW"] == 7) | \
                                      (jan_2017["PU_DOW"] == 6) | \
                                      (jan_2017["DO_DOW"] == 7) |
                                      (jan_2017["PU_DOW"] == 7),1).otherwise(0))

In [30]:
#casting data types to primitives

#1= Creative Mobile Technologies, LLC; 2= VeriFone Inc.
jan_2017 = jan_2017.withColumn("VendorID", jan_2017["VendorID"].cast(ShortType()))

jan_2017 = jan_2017.withColumn("passenger_count", jan_2017["passenger_count"].cast(ShortType()))

#in miles
jan_2017 = jan_2017.withColumn("trip_distance", jan_2017["trip_distance"].cast(FloatType()))

#1= Credit card
#2= Cash
#3= No charge
#4= Dispute
#5= Unknown
#6= Voided trip
jan_2017 = jan_2017.withColumn("payment_type", jan_2017["payment_type"].cast(ShortType()))
jan_2017 = jan_2017.withColumn("fare_amount", jan_2017["fare_amount"].cast(FloatType()))

#0.50 and $1 rush hour and overnight charges.
jan_2017 = jan_2017.withColumn("extra", jan_2017["extra"].cast(FloatType()))
#.50, automatic MTA charge
jan_2017 = jan_2017.withColumn("mta_tax", jan_2017["mta_tax"].cast(FloatType()))


jan_2017 = jan_2017.withColumn("tip_amount", jan_2017["tip_amount"].cast(FloatType()))
jan_2017 = jan_2017.withColumn("tolls_amount", jan_2017["tolls_amount"].cast(FloatType()))
jan_2017 = jan_2017.withColumn("improvement_surcharge", jan_2017["improvement_surcharge"].cast(FloatType()))
jan_2017 = jan_2017.withColumn("total_amount", jan_2017["total_amount"].cast(FloatType()))


jan_2017 = jan_2017.withColumn("RateCodeID", jan_2017["RateCodeID"].cast(ShortType()))
#1= Standard rate
#2=JFK -> $52 flat fare
#3=Newark
#4=Nassau or Westchester
#5=Negotiated fare
#6=Group ride

In [28]:
jan_2017.printSchema()

root
 |-- VendorID: short (nullable = true)
 |-- tpep_pickup_datetime: string (nullable = true)
 |-- tpep_dropoff_datetime: string (nullable = true)
 |-- passenger_count: short (nullable = true)
 |-- trip_distance: float (nullable = true)
 |-- RatecodeID: string (nullable = true)
 |-- store_and_fwd_flag: string (nullable = true)
 |-- PULocationID: string (nullable = true)
 |-- DOLocationID: string (nullable = true)
 |-- payment_type: short (nullable = true)
 |-- fare_amount: float (nullable = true)
 |-- extra: float (nullable = true)
 |-- mta_tax: float (nullable = true)
 |-- tip_amount: float (nullable = true)
 |-- tolls_amount: float (nullable = true)
 |-- improvement_surcharge: float (nullable = true)
 |-- total_amount: float (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- PUBorough: string (nullable = true)
 |-- PUZone: string (nullable = true)
 |-- PUServiceZone: string (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- DOBorough: string (nullable = 

In [29]:
#column for looking at average dollar/mile, will used for filtering: should be around $2.5 per mile according to MTA
#use fare amount because tips can be large if generous 
jan_2017 = jan_2017.withColumn("cost_per_mile", (jan_2017["fare_amount"]/jan_2017["trip_distance"]).cast(FloatType()))
#column for looking at average miles per dollar, will be used for filtering: should be around 0.4 miles per dollar
#use fare amount because tips can be large if generous 
jan_2017 = jan_2017.withColumn("miles_per_dollar", (jan_2017["trip_distance"]/jan_2017["fare_amount"]).cast(FloatType()))

#use fare amount because tips can be large if generous 
jan_2017 = jan_2017.withColumn("missing_money", (jan_2017["total_amount"]-jan_2017["fare_amount"]-jan_2017["extra"] \
                                                 - jan_2017["mta_tax"] - jan_2017["tip_amount"] - jan_2017["tolls_amount"] \
                                                 - jan_2017["improvement_surcharge"]).cast(FloatType()))
                                                
                                    

In [31]:
#basic fare cleaning, ensure that all values are above zero
jan_2017 = jan_2017.filter(jan_2017.tip_amount >= 0)
jan_2017 = jan_2017.filter(jan_2017.tolls_amount >= 0.0) 
jan_2017 = jan_2017.filter(jan_2017.total_amount >= 3.30)
jan_2017 = jan_2017.filter(jan_2017.extra >= 0.00)
jan_2017 = jan_2017.filter(jan_2017.cost_per_mile.isNotNull())


#minimum fare amounts according to NYC Taxi data standards
jan_2017 = jan_2017.filter((jan_2017.fare_amount >= 2.50))
jan_2017 = jan_2017.filter(jan_2017.improvement_surcharge >= 0.3)
jan_2017 = jan_2017.filter(jan_2017.mta_tax >= 0.5)

#maximum fare amount, no (logical) fares were greater than 600 although some tips might be 
#jan_2017 = jan_2017.filter((jan_2017.fare_amount < 600.0))
#all trips being filtered out of the max cost per mile going < 0.1 miles 
#there were MANY trips that had a cost per mile of 1733.3334 with the total trip distance being exactly 0.03 and the fare amount being exactly 52




jan_2017.describe("fare_amount").show()

jan_2017.describe("DOLocationID").show()

jan_2017.count()
#0.646167527% of trips invalidated using basic filters of fare amounts

+-------+------------------+
|summary|       fare_amount|
+-------+------------------+
|  count|           4992919|
|   mean|12.063494577015529|
| stddev|10.042401989316064|
|    min|               2.5|
|    max|             512.0|
+-------+------------------+

+-------+------------------+
|summary|      DOLocationID|
+-------+------------------+
|  count|           4992919|
|   mean|161.34633047321617|
| stddev| 70.69606380549166|
|    min|                 1|
|    max|                99|
+-------+------------------+



4992919

In [32]:
#missing money description
jan_2017.describe("missing_money").show()
jan_2017.sort('missing_money', ascending=False).select(["missing_money"]).show(50)

+-------+--------------------+
|summary|       missing_money|
+-------+--------------------+
|  count|             4992919|
|   mean|0.004416116147229376|
| stddev| 0.09976741859311734|
|    min|        -1.603365E-5|
|    max|            9.900002|
+-------+--------------------+

+-------------+
|missing_money|
+-------------+
|     9.900002|
|     9.900001|
|     9.899997|
|     9.899997|
|    4.9500027|
|    4.9500027|
|    4.9500027|
|    4.9500027|
|    4.9500027|
|    4.9500017|
|    4.9500017|
|    4.9500012|
|    4.9500012|
|    4.9500012|
|    4.9500012|
|    4.9500012|
|    4.9500012|
|    4.9500012|
|    4.9500012|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|
|     4.950001|


In [33]:
jan_2017.sort('fare_amount', ascending=False).select(["fare_amount", "trip_distance", "cost_per_mile"]).show(50)

+-----------+-------------+-------------+
|fare_amount|trip_distance|cost_per_mile|
+-----------+-------------+-------------+
|      512.0|        112.0|     4.571429|
|      400.0|       156.17|    2.5613115|
|      400.0|         2.87|    139.37283|
|      384.0|         82.7|     4.643289|
|      366.0|        84.71|    4.3206234|
|      360.0|         1.54|    233.76624|
|      350.0|       139.17|    2.5149097|
|      339.0|         70.9|     4.781382|
|      310.0|        122.3|    2.5347505|
|      307.0|        56.39|    5.4442277|
|      300.0|        88.05|    3.4071548|
|      300.0|         8.29|     36.18818|
|      300.0|        38.06|     7.882291|
|      300.0|        59.66|     5.028495|
|      297.0|        68.83|    4.3149786|
|     295.55|        30.51|     9.686988|
|      293.0|        12.79|    22.908522|
|      282.0|       109.95|    2.5648022|
|      281.0|        69.57|    4.0390973|
|      280.0|         55.1|      5.08167|
|      279.0|        24.74|    11.

In [34]:
jan_2017.sort('trip_distance', ascending=False).select(["fare_amount", "trip_distance", "cost_per_mile"]).show(50)

+-----------+-------------+-------------+
|fare_amount|trip_distance|cost_per_mile|
+-----------+-------------+-------------+
|        3.0|        173.2|  0.017321017|
|      400.0|       156.17|    2.5613115|
|      350.0|       139.17|    2.5149097|
|      310.0|        122.3|    2.5347505|
|       52.0|       122.25|   0.42535788|
|      512.0|        112.0|     4.571429|
|      282.0|       109.95|    2.5648022|
|       52.0|       108.97|   0.47719556|
|       52.0|         95.5|    0.5445026|
|      300.0|        88.05|    3.4071548|
|      200.0|        85.88|     2.328831|
|      366.0|        84.71|    4.3206234|
|      213.5|         83.5|    2.5568862|
|      384.0|         82.7|     4.643289|
|      215.5|        82.36|    2.6165614|
|       52.0|         80.9|   0.64276886|
|      222.0|        80.63|    2.7533178|
|      150.0|        76.67|    1.9564368|
|      150.0|         73.8|    2.0325203|
|      220.0|         73.2|    3.0054646|
|      220.0|         73.2|    3.0

In [43]:
jan_2017.describe("cost_per_mile").show()

jan_2017.describe("miles_per_dollar").show()

+-------+-----------------+
|summary|    cost_per_mile|
+-------+-----------------+
|  count|          4992919|
|   mean|6.475433751279496|
| stddev|49.42269620379219|
|    min|      0.017321017|
|    max|          25000.0|
+-------+-----------------+

+-------+-------------------+
|summary|   miles_per_dollar|
+-------+-------------------+
|  count|            4992919|
|   mean| 0.1964336613874987|
| stddev|0.08364914420765267|
|    min|             4.0E-5|
|    max|          57.733334|
+-------+-------------------+



In [36]:
jan_2017.describe("tip_amount").show()
jan_2017.sort('tip_amount', ascending=False).select("tip_amount").show(50)

+-------+------------------+
|summary|        tip_amount|
+-------+------------------+
|  count|           4992919|
|   mean|1.6831354700390813|
| stddev| 2.322129974152001|
|    min|               0.0|
|    max|            393.94|
+-------+------------------+

+----------+
|tip_amount|
+----------+
|    393.94|
|     371.0|
|     366.0|
|     306.0|
|     300.0|
|     260.0|
|     258.0|
|     243.7|
|    222.64|
|     220.0|
|     219.7|
|     201.1|
|     200.0|
|     200.0|
|     192.0|
|     190.7|
|     190.0|
|     187.0|
|     186.2|
|     150.0|
|     125.7|
|     125.0|
|     115.0|
|     115.0|
|     115.0|
|    111.49|
|     110.0|
|     110.0|
|     108.0|
|     106.0|
|     102.1|
|     101.0|
|     100.0|
|     100.0|
|     100.0|
|     100.0|
|     100.0|
|     100.0|
|     100.0|
|     100.0|
|      97.0|
|      95.0|
|     91.23|
|      90.0|
|      90.0|
|      89.0|
|      88.0|
|      88.0|
|      85.0|
|      85.0|
+----------+
only showing top 50 rows



In [37]:
jan_2017.sort('cost_per_mile', ascending=False).select(["miles_per_dollar", "cost_per_mile", "trip_distance",  "total_amount",  "fare_amount", "tip_amount", "tolls_amount"]).show(50)
#upper bound of cost per mile is currently  1733.34

+----------------+-------------+-------------+------------+-----------+----------+------------+
|miles_per_dollar|cost_per_mile|trip_distance|total_amount|fare_amount|tip_amount|tolls_amount|
+----------------+-------------+-------------+------------+-----------+----------+------------+
|          4.0E-5|      25000.0|         0.01|       270.8|      250.0|      20.0|         0.0|
|     4.255319E-5|      23500.0|         0.01|      306.54|      235.0|     70.74|         0.0|
|    5.5555553E-5|      18000.0|         0.01|       210.8|      180.0|      30.0|         0.0|
|     6.060606E-5|      16500.0|         0.01|       165.8|      165.0|       0.0|         0.0|
|     6.666667E-5|      15000.0|         0.01|       150.8|      150.0|       0.0|         0.0|
|     6.666667E-5|      15000.0|         0.01|       150.8|      150.0|       0.0|         0.0|
|     7.142857E-5|      14000.0|         0.01|       216.8|      140.0|      76.0|         0.0|
|     7.407407E-5|      13500.0|        

In [38]:
jan_2017.sort('cost_per_mile', ascending=True).select(["cost_per_mile", "trip_distance",  "total_amount",  "fare_amount","extra", "tip_amount", "tolls_amount", "improvement_surcharge" ]).show(50)

#lower bound of cost per mile is currently 

+-------------+-------------+------------+-----------+-----+----------+------------+---------------------+
|cost_per_mile|trip_distance|total_amount|fare_amount|extra|tip_amount|tolls_amount|improvement_surcharge|
+-------------+-------------+------------+-----------+-----+----------+------------+---------------------+
|  0.017321017|        173.2|         9.3|        3.0|  5.5|       0.0|         0.0|                  0.3|
|  0.071633235|         34.9|       113.3|        2.5|  0.0|      20.0|        90.0|                  0.3|
|   0.07575758|         33.0|         3.8|        2.5|  0.5|       0.0|         0.0|                  0.3|
|   0.07911392|         31.6|         3.8|        2.5|  0.5|       0.0|         0.0|                  0.3|
|   0.09225092|         27.1|       13.84|        2.5|  5.0|       0.0|        5.54|                  0.3|
|   0.09960159|         25.1|         3.8|        2.5|  0.5|       0.0|         0.0|                  0.3|
|    0.1059322|         23.6|        

In [39]:
jan_2017.describe("total_amount").show()
jan_2017.sort('total_amount', ascending=False).select("total_amount").show(50)

+-------+------------------+
|summary|      total_amount|
+-------+------------------+
|  count|           4992919|
|   mean|  15.1093943040766|
| stddev|12.363551155207466|
|    min|              3.31|
|    max|            930.34|
+-------+------------------+

+------------+
|total_amount|
+------------+
|      930.34|
|       611.3|
|      608.34|
|      608.24|
|      592.84|
|      590.22|
|      538.84|
|      534.88|
|      518.34|
|      446.74|
|      432.96|
|      425.04|
|      419.37|
|       400.8|
|      387.34|
|      380.14|
|       378.8|
|      378.41|
|       377.8|
|      372.36|
|      372.34|
|       358.8|
|       350.8|
|       349.3|
|      344.05|
|      343.06|
|       339.8|
|      335.76|
|      331.89|
|      320.38|
|       320.3|
|       316.8|
|       310.8|
|      306.54|
|      306.34|
|       306.3|
|      304.01|
|       300.8|
|       300.8|
|       300.3|
|      298.63|
|      298.01|
|      294.95|
|      294.84|
|       294.8|
|      291.05|
|  

In [40]:
jan_2017.describe("improvement_surcharge").show()

+-------+---------------------+
|summary|improvement_surcharge|
+-------+---------------------+
|  count|              4992919|
|   mean|  0.30000001192092896|
| stddev|                  0.0|
|    min|                  0.3|
|    max|                  0.3|
+-------+---------------------+



In [41]:
jan_2017.describe("trip_distance").show()
jan_2017.sort('trip_distance', ascending=False).select("trip_distance").show(50)

+-------+------------------+
|summary|     trip_distance|
+-------+------------------+
|  count|           4992919|
|   mean|2.8367406499200527|
| stddev|3.5843179181452034|
|    min|              0.01|
|    max|             173.2|
+-------+------------------+

+-------------+
|trip_distance|
+-------------+
|        173.2|
|       156.17|
|       139.17|
|        122.3|
|       122.25|
|        112.0|
|       109.95|
|       108.97|
|         95.5|
|        88.05|
|        85.88|
|        84.71|
|         83.5|
|         82.7|
|        82.36|
|         80.9|
|        80.63|
|        76.67|
|         73.8|
|         73.2|
|         73.2|
|        71.98|
|        71.02|
|         70.9|
|         70.8|
|         70.6|
|        69.57|
|        68.83|
|         66.9|
|         66.5|
|        66.11|
|        65.58|
|        65.44|
|         64.7|
|        64.56|
|        64.55|
|        63.98|
|        63.55|
|        63.35|
|         62.4|
|         62.1|
|        61.64|
|        61.45|
| 

In [42]:
jan_2017.select('tpep_pickup_datetime').show(10)

+--------------------+
|tpep_pickup_datetime|
+--------------------+
| 2017-01-01 00:00:12|
| 2017-01-01 00:01:00|
| 2017-01-01 06:57:25|
| 2017-01-01 06:58:44|
| 2017-01-01 07:13:56|
| 2017-01-01 07:28:04|
| 2017-01-01 07:34:38|
| 2017-01-01 07:49:57|
| 2017-01-01 07:50:52|
| 2017-01-01 07:58:21|
+--------------------+
only showing top 10 rows



In [47]:
jan_2017.select('PUDate').distinct().show()


+----------+
|    PUDate|
+----------+
|2017-01-06|
|2017-01-30|
|2017-01-04|
|2017-01-10|
|2017-01-21|
|2017-01-05|
|2017-01-13|
|2017-01-01|
|2017-01-20|
|2017-01-11|
|2017-01-17|
|2017-01-19|
|2017-01-08|
|2017-01-07|
|2017-01-12|
|2017-01-31|
|2017-01-16|
|2017-01-09|
|2017-01-22|
|2017-01-15|
+----------+
only showing top 20 rows

