### **Funciones de fecha y hora**

In [1]:
import os

os.environ['JAVA_HOME'] = "C:/Program Files/Java/jdk-11"
os.environ['PYSPARK_PYTHON'] = "C:/Users/usr/anaconda3/envs/pyspark_env/python.exe"
os.environ['PYSPARK_DRIVER_PYTHON'] = "C:/Users/usr/anaconda3/envs/pyspark_env/python.exe"
os.environ['HADOOP_HOME'] = "C:/hadoop-3.4.0"
os.environ['HADOOP_COMMON_LIB_NATIVE_DIR'] = "C:/hadoop-3.4.0/lib/native"
os.environ['PATH'] += os.pathsep + "C:/hadoop-3.4.0/bin"

import findspark
findspark.init()

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

sc = spark.sparkContext

In [3]:
data_part1 = spark.read.parquet('./data/convertir/part-00000-6b84f509-ffb8-43d4-820e-866fb80c0d08-c000.snappy.parquet')
data_part2 = spark.read.parquet('./data/convertir/part-00001-6b84f509-ffb8-43d4-820e-866fb80c0d08-c000.snappy.parquet')

data = data_part1.union(data_part2)

In [4]:
data.printSchema()

root
 |-- date: string (nullable = true)
 |-- timestamp: string (nullable = true)
 |-- date_str: string (nullable = true)
 |-- ts_str: string (nullable = true)



In [5]:
data.show(truncate=False)

+----------+-----------------------+----------+----------------+
|date      |timestamp              |date_str  |ts_str          |
+----------+-----------------------+----------+----------------+
|2021-01-01|2021-01-01 20:10:50.723|01-01-2021|18-08-2021 46:58|
+----------+-----------------------+----------+----------------+



- Vamos a convertir de formato string a formato date y timestamp

In [6]:
from pyspark.sql.functions import col, to_date, to_timestamp

In [9]:
data1 = data.select(
    to_date(col('date')).alias('date1'),
    to_timestamp(col('timestamp')).alias('ts1'),
    to_date(col('date_str'), 'dd-MM-yyyy').alias('date2'),
    to_timestamp(col('ts_str'), 'dd-MM-yyyy mm:ss').alias('ts2')
)

In [10]:
data1.show(truncate=False)

+----------+-----------------------+----------+-------------------+
|date1     |ts1                    |date2     |ts2                |
+----------+-----------------------+----------+-------------------+
|2021-01-01|2021-01-01 20:10:50.723|2021-01-01|2021-08-18 00:46:58|
+----------+-----------------------+----------+-------------------+



In [11]:
data1.printSchema()

root
 |-- date1: date (nullable = true)
 |-- ts1: timestamp (nullable = true)
 |-- date2: date (nullable = true)
 |-- ts2: timestamp (nullable = true)



- Ahora vamos a darle formato a una fecha.

In [12]:
from pyspark.sql.functions import date_format

In [13]:
data1.select(
    date_format(col('date1'), 'dd-MM-yyyy')
).show()

+------------------------------+
|date_format(date1, dd-MM-yyyy)|
+------------------------------+
|                    01-01-2021|
+------------------------------+



- Cálculos con fechas y horas.

In [14]:
df = spark.read.parquet('./data/calculo/calculo.parquet')

In [15]:
df.show()

+------+-------------+------------+-------------------+
|nombre|fecha_ingreso|fecha_salida|       baja_sistema|
+------+-------------+------------+-------------------+
|  Jose|   2021-01-01|  2021-11-14|2021-10-14 15:35:59|
|Mayara|   2021-02-06|  2021-11-25|2021-11-25 10:35:55|
+------+-------------+------------+-------------------+



In [16]:
from pyspark.sql.functions import datediff, months_between, last_day

In [18]:
df.select(
    col('nombre'),
    datediff(col('fecha_salida'), col('fecha_ingreso')).alias('dias'),
    months_between(col('fecha_salida'), col('fecha_ingreso')).alias('meses'),
    last_day(col('fecha_salida')).alias('ultimo_dia_mes')
).show()

+------+----+-----------+--------------+
|nombre|dias|      meses|ultimo_dia_mes|
+------+----+-----------+--------------+
|  Jose| 317|10.41935484|    2021-11-30|
|Mayara| 292| 9.61290323|    2021-11-30|
+------+----+-----------+--------------+



- Sumar y restar fechas.

In [19]:
from pyspark.sql.functions import date_add, date_sub

In [20]:
df.select(
    col('nombre'),
    col('fecha_ingreso'),
    date_add(col('fecha_ingreso'), 14).alias('mas_14_dias'),
    date_sub(col('fecha_ingreso'), 1).alias('menos_1_dia')
).show()

+------+-------------+-----------+-----------+
|nombre|fecha_ingreso|mas_14_dias|menos_1_dia|
+------+-------------+-----------+-----------+
|  Jose|   2021-01-01| 2021-01-15| 2020-12-31|
|Mayara|   2021-02-06| 2021-02-20| 2021-02-05|
+------+-------------+-----------+-----------+

