# Dates & Timestamps

In [34]:
import findspark
findspark.init('/home/aforestier10/Downloads/spark-3.5.3-bin-hadoop3')
import pyspark
from pyspark.sql import SparkSession

In [35]:
spark = SparkSession.builder.appName('Agg').getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

In [36]:
df = spark.read.csv('appl_stock.csv', header=True, inferSchema=True)
df.head(1)

[Row(Date=datetime.date(2010, 1, 4), Open=213.429998, High=214.499996, Low=212.38000099999996, Close=214.009998, Volume=123432400, Adj Close=27.727039)]

## DateTime

In [41]:
from pyspark.sql.functions import (dayofmonth, hour, month, dayofyear, year, weekofyear, format_number, date_format)

In [38]:
df.select(dayofmonth(df['Date'])).show()

+----------------+
|dayofmonth(Date)|
+----------------+
|               4|
|               5|
|               6|
|               7|
|               8|
|              11|
|              12|
|              13|
|              14|
|              15|
|              19|
|              20|
|              21|
|              22|
|              25|
|              26|
|              27|
|              28|
|              29|
|               1|
+----------------+
only showing top 20 rows



In [39]:
df.select(hour(df['Date'])).show()

+----------+
|hour(Date)|
+----------+
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
+----------+
only showing top 20 rows



In [42]:
df.select(month(df['Date'])).show()

+-----------+
|month(Date)|
+-----------+
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          1|
|          2|
+-----------+
only showing top 20 rows



### Combining knowledge - Know average closing price by year

In [51]:
df_w_year = df.withColumn("Year", year(df['Date']))
df_w_year.show()

+----------+------------------+------------------+------------------+------------------+---------+------------------+----+
|      Date|              Open|              High|               Low|             Close|   Volume|         Adj Close|Year|
+----------+------------------+------------------+------------------+------------------+---------+------------------+----+
|2010-01-04|        213.429998|        214.499996|212.38000099999996|        214.009998|123432400|         27.727039|2010|
|2010-01-05|        214.599998|        215.589994|        213.249994|        214.379993|150476200|27.774976000000002|2010|
|2010-01-06|        214.379993|            215.23|        210.750004|        210.969995|138040000|27.333178000000004|2010|
|2010-01-07|            211.75|        212.000006|        209.050005|            210.58|119282800|          27.28265|2010|
|2010-01-08|        210.299994|        212.000006|209.06000500000002|211.98000499999998|111902700|         27.464034|2010|
|2010-01-11|212.

In [59]:
year_group = df_w_year.groupBy('Year')
adj_close_avg_by_year = year_group.agg({'Adj Close': 'avg'}).collect()
adj_close_avg_by_year, type(adj_close_avg_by_year)

([Row(Year=2015, avg(Adj Close)=115.96740080555561),
  Row(Year=2013, avg(Adj Close)=62.61798788492063),
  Row(Year=2014, avg(Adj Close)=87.63583323809523),
  Row(Year=2012, avg(Adj Close)=74.81383696800002),
  Row(Year=2016, avg(Adj Close)=103.15032854761901),
  Row(Year=2010, avg(Adj Close)=33.665072424603196),
  Row(Year=2011, avg(Adj Close)=47.16023692063492)],
 list)

In [60]:
for r in adj_close_avg_by_year:
    print(r)

Row(Year=2015, avg(Adj Close)=115.96740080555561)
Row(Year=2013, avg(Adj Close)=62.61798788492063)
Row(Year=2014, avg(Adj Close)=87.63583323809523)
Row(Year=2012, avg(Adj Close)=74.81383696800002)
Row(Year=2016, avg(Adj Close)=103.15032854761901)
Row(Year=2010, avg(Adj Close)=33.665072424603196)
Row(Year=2011, avg(Adj Close)=47.16023692063492)
