#### **1) Applying random.choice to a Single Column**

In [0]:
from pyspark.sql.functions import col, udf
from pyspark.sql.types import StringType
import random

In [0]:
# Sample DataFrame
data = [(1, "Anand"),
        (2, "Baskar"),
        (3, "Catherin"),
        (4, "Dravid"),
        (5, "Swetha"),
        (6, "Akash"),
        (7, "Senthil"),
        (8, "Praveen")]
columns = ["ID", "Name"]

df = spark.createDataFrame(data, columns)
display(df)

ID,Name
1,Anand
2,Baskar
3,Catherin
4,Dravid
5,Swetha
6,Akash
7,Senthil
8,Praveen


In [0]:
# List of random choices
choices = ["HR", "IT", "Finance", "Marketing"]

# Define UDF to apply random.choice
@udf(StringType())
def random_choice_udf():
    return random.choice(choices)

# Apply the UDF to add a new column
df_with_random_choice = df.withColumn("Department", random_choice_udf())

# Show the resulting DataFrame
display(df_with_random_choice)

ID,Name,Department
1,Anand,Finance
2,Baskar,HR
3,Catherin,IT
4,Dravid,IT
5,Swetha,Finance
6,Akash,Finance
7,Senthil,HR
8,Praveen,IT


#### **2) Applying random.choice to Multiple Columns**

In [0]:
# List of random choices
choices = ["HR", "IT", "Finance", "Marketing"]

# Define UDF for random department
@udf(StringType())
def random_department():
    return random.choice(choices)

# Another UDF for random project assignment
projects = ["Project A", "Project B", "Project C"]

@udf(StringType())
def random_project():
    return random.choice(projects)

# Apply both UDFs to add new columns
df_with_random_columns = (
    df.withColumn("Department", random_department())
      .withColumn("Project", random_project())
)

# Show the resulting DataFrame
display(df_with_random_columns)

ID,Name,Department,Project
1,Anand,Marketing,Project A
2,Baskar,Finance,Project C
3,Catherin,Finance,Project C
4,Dravid,HR,Project C
5,Swetha,HR,Project A
6,Akash,Marketing,Project A
7,Senthil,HR,Project A
8,Praveen,Finance,Project C


#### **3) Using a Predefined List in random.choice with Existing Column Data**

**Syntax**

     dict.get(key,value)

**key:** (Required) Key to be **searched in the dictionary**.

**value:** (Optional) Value to be returned if the **key is not present** in the dictionary.

In [0]:
course = {'language': 'python', 'fee': 4000}

# Using get() method to get the value from dictionary
print('language:', course.get('language'))
print('fee:', course.get('fee'))

language: python
fee: 4000


In [0]:
# Using get() to get the value as a None
print('duration:', course.get('duration'))

duration: None


In [0]:
# Using get() to get the value as specified
print('duration:', course.get('duration','Not in dictionary'))

duration: Not in dictionary


In [0]:

# Using get() to get the value as specified
course = {'language': 'python', 'fee': 4000}
print('duration:', course.get('language','Not in dictionary'))

duration: python


In [0]:
# Using get() to get the value as specified
course = {'language': 'python', 'fee': 4000, 'HR': ["Policy", "Recruitment", "Sales"], 'Finance': ["Auditing", "Budgeting", "Works", "Temp"]}

print('HR:', course.get('HR','Not in dictionary'))
print('Random Choice of HR:', random.choice(course.get('HR','Not in dictionary')))

print('Finance:', course.get('Finance','Not in dictionary'))
print('Random Choice of Finance:', random.choice(course.get('Finance','Not in dictionary')))

HR: ['Policy', 'Recruitment', 'Sales']
Random Choice of HR: Sales
Finance: ['Auditing', 'Budgeting', 'Works', 'Temp']
Random Choice of Finance: Auditing


In [0]:
@udf(StringType())
def random_from_column(value):
    options = {
        "HR": ["Policy"],
        "IT": ["DevOps"],
        "Finance": ["Auditing"]
    }
    return random.choice(options.get(value, ["General"]))

# Add a new column with predefined random values based on "Department"
df_with_dependent_choice = df_with_random_columns.withColumn("Specialization", random_from_column(col("Department")))

display(df_with_dependent_choice)

ID,Name,Department,Project,Specialization
1,Anand,Marketing,Project C,General
2,Baskar,HR,Project A,DevOps
3,Catherin,Finance,Project B,Policy
4,Dravid,IT,Project B,General
5,Swetha,IT,Project A,General
6,Akash,HR,Project A,Policy
7,Senthil,Finance,Project B,Auditing
8,Praveen,IT,Project A,Auditing
