### Initialize pyspark

In [1]:
import findspark
findspark.init()
import pyspark

### Initialize and create a spark session

In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

### Creating List of Tuple

In [3]:
donuts = [("plain donut", 1.50, "2018-04-17"), ("vanilla donut", 2.0, "2018-04-01"), ("glazed donut", 2.50, "2018-04-02")]

### Creating Dataframe from Tuple

In [4]:
df = spark.createDataFrame(donuts).toDF("Donut Name", "Price", "Purchase Date")

### Show

In [5]:
df.show()

+-------------+-----+-------------+
|   Donut Name|Price|Purchase Date|
+-------------+-----+-------------+
|  plain donut|  1.5|   2018-04-17|
|vanilla donut|  2.0|   2018-04-01|
| glazed donut|  2.5|   2018-04-02|
+-------------+-----+-------------+



### Hashing Function Imports

In [6]:
from pyspark.sql.functions import hash,md5,sha1,sha2

In [9]:
(df.withColumn("Hash", hash("Donut Name")) # murmur3 hash as default.
  .withColumn("MD5", md5("Donut Name"))
  .withColumn("SHA1", sha1("Donut Name"))
  .withColumn("SHA2", sha2("Donut Name", 256)) # 256 is the number of bits
  .show())

+-------------+-----+-------------+----------+--------------------+--------------------+--------------------+
|   Donut Name|Price|Purchase Date|      Hash|                 MD5|                SHA1|                SHA2|
+-------------+-----+-------------+----------+--------------------+--------------------+--------------------+
|  plain donut|  1.5|   2018-04-17|1594998220|53a70d9f08d8bb249...|7882fd7481cb43452...|4aace471ed4433f1b...|
|vanilla donut|  2.0|   2018-04-01| 673697474|254c8f04be947ec2c...|5dbbc954723a74fe0...|ccda17c5bc47d1671...|
| glazed donut|  2.5|   2018-04-02| 715175419|44199f422534a5736...|aaee30ecdc523fa1e...|6d1568ca8c20ffc0b...|
+-------------+-----+-------------+----------+--------------------+--------------------+--------------------+



### Closing Spark Session

In [10]:
spark.stop()