In [1]:
import os
os.getcwd()
os.chdir("H:\pyspark_advanced-coding_interview")
os.getcwd()

'H:\\pyspark_advanced-coding_interview'

In [2]:
from pyspark.sql import SparkSession

# Initialize Spark Session
spark = SparkSession.builder.appName("RandomPasswordAndUniqueID").getOrCreate()

# Sample Data: Basic user information (for demonstration)
data = [
    (1, "Alice"),
    (2, "Bob"),
    (3, "Charlie"),
    (4, "David"),
    (5, "Eva"),
    (6, "Frank"),
    (7, "Grace"),
    (8, "Henry"),
    (9, "Ivy"),
    (10, "Jack"),
    (11, "Kim"),
    (12, "Liam"),
    (13, "Mia"),
    (14, "Noah"),
    (15, "Olivia")
]

# Create DataFrame
df = spark.createDataFrame(data, ["id", "name"])

# Create a Temporary View for Spark SQL
df.createOrReplaceTempView("users")

# Show the original DataFrame
df.show(truncate=False)


+---+-------+
|id |name   |
+---+-------+
|1  |Alice  |
|2  |Bob    |
|3  |Charlie|
|4  |David  |
|5  |Eva    |
|6  |Frank  |
|7  |Grace  |
|8  |Henry  |
|9  |Ivy    |
|10 |Jack   |
|11 |Kim    |
|12 |Liam   |
|13 |Mia    |
|14 |Noah   |
|15 |Olivia |
+---+-------+



#### Generate Random UUIDs Using uuid() in Spark SQL

In [3]:
res = spark.sql(""" 
                
SELECT id, name, uuid() AS unique_id
FROM users;

              
                """)
res.show()

+---+-------+--------------------+
| id|   name|           unique_id|
+---+-------+--------------------+
|  1|  Alice|0b4ab8db-d43a-4fc...|
|  2|    Bob|2bf478df-9199-4c8...|
|  3|Charlie|1063c92f-7f53-450...|
|  4|  David|cfd370a3-6bc3-439...|
|  5|    Eva|47d1a18f-93a9-42b...|
|  6|  Frank|b094114e-8811-4c2...|
|  7|  Grace|ba948855-8614-4c7...|
|  8|  Henry|8bac6584-bc80-40e...|
|  9|    Ivy|b56c2e60-48b0-4c1...|
| 10|   Jack|1d64fee6-d3e3-4f6...|
| 11|    Kim|84aecd53-12cf-440...|
| 12|   Liam|c2e73ffd-3d1b-4ac...|
| 13|    Mia|34586c79-96ec-481...|
| 14|   Noah|9f0c3b5c-cbc3-424...|
| 15| Olivia|9cf04ec6-cf3e-4b4...|
+---+-------+--------------------+



In [5]:
from pyspark.sql.functions import expr

# Generate unique UUIDs similar to NEWID
df_with_uuid = df.withColumn("unique_id", expr("uuid()"))

# Show the result
df_with_uuid.show(truncate=False)


+---+-------+------------------------------------+
|id |name   |unique_id                           |
+---+-------+------------------------------------+
|1  |Alice  |d057e16f-0022-4573-8f57-1ccbe6b48d2e|
|2  |Bob    |ce25ea13-5938-4ca7-b693-af55e90e8121|
|3  |Charlie|61030dcc-90fc-42ad-9433-d5ea4c6fd137|
|4  |David  |eca7d25f-d2bd-4a3e-a02d-aa2bdf7dff2e|
|5  |Eva    |69569e0d-b852-40d1-8092-f9bf118d5020|
|6  |Frank  |9e0bdc37-2a0c-4623-99d1-cb1442144519|
|7  |Grace  |d6efeeff-e206-4696-ba0e-fc8f04a38d2e|
|8  |Henry  |73fa0135-0b4d-4a86-b40e-0f45f95d1844|
|9  |Ivy    |41118319-2f4e-480b-ba73-54cb198b9890|
|10 |Jack   |019bf641-941c-436d-a89a-e362040d3020|
|11 |Kim    |e0a7df25-8505-4801-aeba-52c61cb1bfa7|
|12 |Liam   |ab501d98-e43b-441d-a0f8-8f6d758edaba|
|13 |Mia    |132f5c9c-3195-4b49-a022-2f8903c9961a|
|14 |Noah   |5b288067-3f0e-4727-9a2e-bd2591334f56|
|15 |Olivia |489dfbfa-55b5-4abb-a41e-7cfcbcdf6477|
+---+-------+------------------------------------+



#### Generate Random Passwords Using md5 and Random Functions in Spark SQL

In [4]:
res1 = spark.sql(""" 
                
SELECT id, name, 
       md5(cast(rand() * 1000000 AS STRING)) AS random_password
FROM users;

              
                """)
res1.show()

+---+-------+--------------------+
| id|   name|     random_password|
+---+-------+--------------------+
|  1|  Alice|7ad62bc8451d62610...|
|  2|    Bob|2c9d371dc92406e89...|
|  3|Charlie|1b7681ed07dbc9e3c...|
|  4|  David|85d0483ce38544531...|
|  5|    Eva|32c15bbdb1eddad82...|
|  6|  Frank|80498ebdb0849a99d...|
|  7|  Grace|8e0dce56aff993c61...|
|  8|  Henry|9cfee74e78fc8c376...|
|  9|    Ivy|bd28034ffe187bf63...|
| 10|   Jack|54858d13744508532...|
| 11|    Kim|37372ea52728eef9c...|
| 12|   Liam|ce2b8b141285ffc69...|
| 13|    Mia|b508acc8f995b354a...|
| 14|   Noah|0244440c8d13f2b07...|
| 15| Olivia|cebfeeffe69648d14...|
+---+-------+--------------------+



In [6]:
from pyspark.sql.functions import md5, rand, concat

# Generate random passwords using MD5 hash
df_random_passwords = df.withColumn("random_password", md5(concat(rand().cast("string"))))

# Show the result
df_random_passwords.show(truncate=False)


+---+-------+--------------------------------+
|id |name   |random_password                 |
+---+-------+--------------------------------+
|1  |Alice  |2b9606197cde44c390b87f334182cfac|
|2  |Bob    |b865d23d9b63f71b5137444c81d50e93|
|3  |Charlie|14d7a4cd5cd5b2ee64e506b4f26bfe6f|
|4  |David  |cc4afaf8f6aa126174cceddf713fee0c|
|5  |Eva    |655719d574b9f3c42bc59899d73f6363|
|6  |Frank  |7cae20e9de52a958e96392cd1cf56f49|
|7  |Grace  |24c43050ee10b453bf9694b3ee30df7c|
|8  |Henry  |814375e7db63c64f2a781cb53a54bfbf|
|9  |Ivy    |554c16feefc0aa14fc96fd20c43a8920|
|10 |Jack   |c0835e2171fd8f9749236216ee6dccfe|
|11 |Kim    |c97b11802f525ee3747e6fe9a9bef766|
|12 |Liam   |a65ab0d4e8d5c9c3588c59fead1c1518|
|13 |Mia    |e3972868106f31211a6c2f492ce2b1f6|
|14 |Noah   |7daefe2baff894873c7118b6da2f5aeb|
|15 |Olivia |bfa1fc82d7a75ddd2dcf8b32feea76a0|
+---+-------+--------------------------------+



In [7]:
import random
import string
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType

# Define a function to generate random passwords
def generate_random_password(length=8):
    characters = string.ascii_letters + string.digits + string.punctuation
    return ''.join(random.choice(characters) for i in range(length))

# Register UDF
random_password_udf = udf(generate_random_password, StringType())

# Apply UDF to generate random passwords
df_with_random_passwords = df.withColumn("random_password", random_password_udf())

df_with_random_passwords.show(truncate=False)


+---+-------+---------------+
|id |name   |random_password|
+---+-------+---------------+
|1  |Alice  |^'xzC+}U       |
|2  |Bob    |5jR1sIQH       |
|3  |Charlie|1i"U{4w0       |
|4  |David  |ic~=/*`V       |
|5  |Eva    |}\!M/<j{       |
|6  |Frank  |$}]G+Slz       |
|7  |Grace  |~F0UT<cq       |
|8  |Henry  ||VV$iv%@       |
|9  |Ivy    |Y943=7gc       |
|10 |Jack   |h<05k=W"       |
|11 |Kim    |EJ9g3hS8       |
|12 |Liam   |DrvR>E,_       |
|13 |Mia    |J.K<M:UC       |
|14 |Noah   |w%dP/Azi       |
|15 |Olivia |"9+3Z$jv       |
+---+-------+---------------+



In [9]:
from pyspark.sql.functions import monotonically_increasing_id, lit

# Generate unique IDs using monotonically increasing values and combine with a random number
df_unique_id = df.withColumn("unique_id", concat(monotonically_increasing_id(), lit("-"), md5(rand().cast("string"))))

df_unique_id.show(truncate=False)


+---+-------+---------------------------------------------+
|id |name   |unique_id                                    |
+---+-------+---------------------------------------------+
|1  |Alice  |8589934592-55ac8efc193e16693cbffcf3eb1d538b  |
|2  |Bob    |17179869184-42fb221b6a68ef9d742e973f13c652eb |
|3  |Charlie|25769803776-9e72920ee9d84f376b9bf8a71449b174 |
|4  |David  |42949672960-4191258cce09394513a59a8a16c2f6a4 |
|5  |Eva    |51539607552-cffb998dff8d8cb8eb5bef0b7ca39275 |
|6  |Frank  |60129542144-ccd4a528a0d9c8f23a04643af3047239 |
|7  |Grace  |77309411328-8bc5a009d5f0eac44e42291394f090b9 |
|8  |Henry  |85899345920-68e470eefaf2729b28de5ec1997848c4 |
|9  |Ivy    |94489280512-65c19b0a55ce2b79aceca9caa6dd9ead |
|10 |Jack   |111669149696-70fb57edaed5e1e56438f6795169cef2|
|11 |Kim    |120259084288-0d5d330b36c1c4925baa56130cf3e1a6|
|12 |Liam   |128849018880-0d3b48a57caf182c5fcc70dbe46f0c24|
|13 |Mia    |146028888064-a98dba90ceca03d5f9d97f094f2f7903|
|14 |Noah   |154618822656-aa73ef1e342c20