In [31]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructField, StructType, StringType, IntegerType, DateType
from pyspark.sql.functions import dayofmonth, hour, dayofyear, month, year, weekofyear

In [32]:
spark = SparkSession.builder  \
        .appName("Date Time") \
        .getOrCreate()

In [33]:
schema = StructType([StructField("Date", DateType(), True)])

In [34]:
df = spark.read.csv('stock-data.csv', inferSchema=True, header=True, schema=schema)

In [35]:
df.printSchema()

root
 |-- Date: date (nullable = true)



In [36]:
df.select('Date').show()

+----------+
|      Date|
+----------+
|2018-02-28|
|2018-02-27|
|2018-02-26|
|2018-02-25|
|2018-02-24|
|2018-02-23|
|2018-02-22|
|2018-02-21|
|2018-02-20|
|2018-02-19|
|2018-02-18|
|2018-02-17|
+----------+



In [37]:
df.select(hour('Date')).show()

+----------+
|hour(Date)|
+----------+
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
+----------+



In [38]:
df.select(dayofmonth('Date')).show()

+----------------+
|dayofmonth(Date)|
+----------------+
|              28|
|              27|
|              26|
|              25|
|              24|
|              23|
|              22|
|              21|
|              20|
|              19|
|              18|
|              17|
+----------------+



In [39]:
df.withColumn("Day of year", dayofyear('date')).show()

+----------+-----------+
|      Date|Day of year|
+----------+-----------+
|2018-02-28|         59|
|2018-02-27|         58|
|2018-02-26|         57|
|2018-02-25|         56|
|2018-02-24|         55|
|2018-02-23|         54|
|2018-02-22|         53|
|2018-02-21|         52|
|2018-02-20|         51|
|2018-02-19|         50|
|2018-02-18|         49|
|2018-02-17|         48|
+----------+-----------+



In [46]:
df.filter(dayofmonth('Date') >= 25).show()

+----------+
|      Date|
+----------+
|2018-02-28|
|2018-02-27|
|2018-02-26|
|2018-02-25|
+----------+

