In [5]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from datetime import datetime,timedelta
import pytz

spark = SparkSession.builder\
        .appName("Test_unixTime")\
        .getOrCreate()

In [6]:
spark

In [7]:
tz_mexico = pytz.timezone('America/Mexico_City')
fecha_hoy = datetime.now(tz_mexico).strftime('%Y-%m-%d %H:%M:%S')
fecha_hoy_dt = datetime.now(tz_mexico)

In [8]:
df = spark.read.csv('test1.csv', header=True)

                                                                                

In [9]:
df.show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2|18000 |
+---------+---+----------+------+



In [10]:
df.count()

6

In [11]:
df.select('name','age').show()

+---------+---+
|     name|age|
+---------+---+
|    Krish| 31|
|Sudhanshu| 30|
|    Sunny| 29|
|     Paul| 24|
|   Harsha| 21|
|  Shubham| 23|
+---------+---+



In [12]:
df.printSchema()

root
 |-- Name: string (nullable = true)
 |-- age: string (nullable = true)
 |-- Experience: string (nullable = true)
 |-- Salary: string (nullable = true)



In [13]:
df.withColumn('Age',df['age'].cast('Integer')).printSchema()

root
 |-- Name: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- Experience: string (nullable = true)
 |-- Salary: string (nullable = true)



In [14]:
df.show()

+---------+---+----------+------+
|     Name|age|Experience|Salary|
+---------+---+----------+------+
|    Krish| 31|        10| 30000|
|Sudhanshu| 30|         8| 25000|
|    Sunny| 29|         4| 20000|
|     Paul| 24|         3| 20000|
|   Harsha| 21|         1| 15000|
|  Shubham| 23|         2|18000 |
+---------+---+----------+------+



In [15]:
print(fecha_hoy)

2022-05-29 17:14:27


In [16]:
dff = df.withColumn('Fecha', lit(fecha_hoy))
dff.show()

+---------+---+----------+------+-------------------+
|     Name|age|Experience|Salary|              Fecha|
+---------+---+----------+------+-------------------+
|    Krish| 31|        10| 30000|2022-05-29 17:14:27|
|Sudhanshu| 30|         8| 25000|2022-05-29 17:14:27|
|    Sunny| 29|         4| 20000|2022-05-29 17:14:27|
|     Paul| 24|         3| 20000|2022-05-29 17:14:27|
|   Harsha| 21|         1| 15000|2022-05-29 17:14:27|
|  Shubham| 23|         2|18000 |2022-05-29 17:14:27|
+---------+---+----------+------+-------------------+



In [17]:
dff.printSchema()

root
 |-- Name: string (nullable = true)
 |-- age: string (nullable = true)
 |-- Experience: string (nullable = true)
 |-- Salary: string (nullable = true)
 |-- Fecha: string (nullable = false)



In [18]:
df_unix = dff.select('Name','age','Experience','Salary',unix_timestamp(col('Fecha')).alias('fecha'))
df_unix.show()

+---------+---+----------+------+----------+
|     Name|age|Experience|Salary|     fecha|
+---------+---+----------+------+----------+
|    Krish| 31|        10| 30000|1653862467|
|Sudhanshu| 30|         8| 25000|1653862467|
|    Sunny| 29|         4| 20000|1653862467|
|     Paul| 24|         3| 20000|1653862467|
|   Harsha| 21|         1| 15000|1653862467|
|  Shubham| 23|         2|18000 |1653862467|
+---------+---+----------+------+----------+



In [19]:
df_not_unix = df_unix.select('Name','age','Experience','Salary',from_unixtime(col('Fecha')).alias('fecha'))
df_not_unix.show()

+---------+---+----------+------+-------------------+
|     Name|age|Experience|Salary|              fecha|
+---------+---+----------+------+-------------------+
|    Krish| 31|        10| 30000|2022-05-29 17:14:27|
|Sudhanshu| 30|         8| 25000|2022-05-29 17:14:27|
|    Sunny| 29|         4| 20000|2022-05-29 17:14:27|
|     Paul| 24|         3| 20000|2022-05-29 17:14:27|
|   Harsha| 21|         1| 15000|2022-05-29 17:14:27|
|  Shubham| 23|         2|18000 |2022-05-29 17:14:27|
+---------+---+----------+------+-------------------+



In [20]:
df_convert = df_not_unix.withColumn("fecha", unix_timestamp(col('fecha')).cast('bigint'))
df_convert.show()
df_convert.printSchema()

+---------+---+----------+------+----------+
|     Name|age|Experience|Salary|     fecha|
+---------+---+----------+------+----------+
|    Krish| 31|        10| 30000|1653862467|
|Sudhanshu| 30|         8| 25000|1653862467|
|    Sunny| 29|         4| 20000|1653862467|
|     Paul| 24|         3| 20000|1653862467|
|   Harsha| 21|         1| 15000|1653862467|
|  Shubham| 23|         2|18000 |1653862467|
+---------+---+----------+------+----------+

root
 |-- Name: string (nullable = true)
 |-- age: string (nullable = true)
 |-- Experience: string (nullable = true)
 |-- Salary: string (nullable = true)
 |-- fecha: long (nullable = true)



In [21]:
df_convert_2 = df_convert.withColumn('fecha', from_unixtime(col('fecha')).cast('string'))
df_convert_2.show()
df_convert_2.printSchema()

+---------+---+----------+------+-------------------+
|     Name|age|Experience|Salary|              fecha|
+---------+---+----------+------+-------------------+
|    Krish| 31|        10| 30000|2022-05-29 17:14:27|
|Sudhanshu| 30|         8| 25000|2022-05-29 17:14:27|
|    Sunny| 29|         4| 20000|2022-05-29 17:14:27|
|     Paul| 24|         3| 20000|2022-05-29 17:14:27|
|   Harsha| 21|         1| 15000|2022-05-29 17:14:27|
|  Shubham| 23|         2|18000 |2022-05-29 17:14:27|
+---------+---+----------+------+-------------------+

root
 |-- Name: string (nullable = true)
 |-- age: string (nullable = true)
 |-- Experience: string (nullable = true)
 |-- Salary: string (nullable = true)
 |-- fecha: string (nullable = true)



In [22]:
df.select(count('Name')).collect()

[Row(count(Name)=6)]