## Working with Dates

## Imports

In [1]:
import findspark
findspark.init()

import pyspark
from pyspark.sql import SparkSession

## SparkSession

In [2]:
spark = (
    SparkSession
    .builder
    .getOrCreate())

In [3]:
from pyspark.sql.functions import to_date, to_timestamp, lit, col

**2019-12-25 13:30:00**

In [4]:
df = spark.createDataFrame(
    [('2019-12-25 13:30:00',)],
    ['Christmas'])

df.show(1)

                                                                                

+-------------------+
|          Christmas|
+-------------------+
|2019-12-25 13:30:00|
+-------------------+



In [5]:
df.select(to_date(col('Christmas'), 'yyyy-MM-dd HH:mm:ss'), 
          to_timestamp(col('Christmas'), 'yyyy-MM-dd HH:mm:ss')).show(1)

+---------------------------------------+--------------------------------------------+
|to_date(Christmas, yyyy-MM-dd HH:mm:ss)|to_timestamp(Christmas, yyyy-MM-dd HH:mm:ss)|
+---------------------------------------+--------------------------------------------+
|                             2019-12-25|                         2019-12-25 13:30:00|
+---------------------------------------+--------------------------------------------+



**25/Dec/2019 13:30:00**

In [6]:
df = spark.createDataFrame([('25/Dec/2019 13:30:00',)], ['Christmas'])
df.show(1)

+--------------------+
|           Christmas|
+--------------------+
|25/Dec/2019 13:30:00|
+--------------------+



In [7]:
df.select(to_date(col('Christmas'), 'dd/MMM/yyyy HH:mm:ss'), 
          to_timestamp(col('Christmas'), 'dd/MMM/yyyy HH:mm:ss')).show(1)

+----------------------------------------+---------------------------------------------+
|to_date(Christmas, dd/MMM/yyyy HH:mm:ss)|to_timestamp(Christmas, dd/MMM/yyyy HH:mm:ss)|
+----------------------------------------+---------------------------------------------+
|                              2019-12-25|                          2019-12-25 13:30:00|
+----------------------------------------+---------------------------------------------+



**12/25/2019 01:30:00 PM**

In [8]:
df = spark.createDataFrame([('12/25/2019 01:30:00 PM',)], ['Christmas'])
df.show(1, truncate=False)

+----------------------+
|Christmas             |
+----------------------+
|12/25/2019 01:30:00 PM|
+----------------------+



## Changes in Datetime patterns in spark 3

To continue using the datetime patterns in spark 2, use `spark.sql("set spark.sql.legacy.timeParserPolicy=LEGACY")` otherwise for spark 3 datetime patterns use only one letter as shown below. For more on datetime patterns changes in spark 3, please refer to the [docs](https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html).

In [9]:
df.select(to_date(col('Christmas'), 'M/d/y h:m:s a'),
         to_timestamp(col('Christmas'), 'M/d/y h:m:s a')).show(1)

+---------------------------------+--------------------------------------+
|to_date(Christmas, M/d/y h:m:s a)|to_timestamp(Christmas, M/d/y h:m:s a)|
+---------------------------------+--------------------------------------+
|                       2019-12-25|                   2019-12-25 13:30:00|
+---------------------------------+--------------------------------------+



## Display

Set the browser to display scrollable dataframes.

In [10]:
from IPython.core.display import HTML
display(HTML("<style>pre {white-space: pre !important; }</style>"))

In [11]:
import os

data_path = 'file:///' + os.getcwd() + '/data'

file_path = data_path + '/reported-crimes.csv'

crimes_df = (
    spark.read
    .csv(file_path, header=True)
)

crimes_df.show(5, truncate=False)

+--------+-----------+----------------------+---------------------+----+------------+-----------------------+--------------------+------+--------+----+--------+----+--------------+--------+------------+------------+----+----------------------+------------+-------------+-----------------------------+
|ID      |Case Number|Date                  |Block                |IUCR|Primary Type|Description            |Location Description|Arrest|Domestic|Beat|District|Ward|Community Area|FBI Code|X Coordinate|Y Coordinate|Year|Updated On            |Latitude    |Longitude    |Location                     |
+--------+-----------+----------------------+---------------------+----+------------+-----------------------+--------------------+------+--------+----+--------+----+--------------+--------+------------+------------+----+----------------------+------------+-------------+-----------------------------+
|10224738|HY411648   |09/05/2015 01:30:00 PM|043XX S WOOD ST      |0486|BATTERY     |DOMESTIC BAT