In [1]:
import os
os.getcwd()
os.chdir("H:\pyspark_advanced-coding_interview")
os.getcwd()

'H:\\pyspark_advanced-coding_interview'

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType

# Initialize Spark Session
spark = SparkSession.builder.appName("CapitalizeFirstLetter").getOrCreate()

# Define schema using StructType and StructField
schema = StructType([
    StructField("Name", StringType(), True)
])

# Sample data (unformatted names)
data = [
    ("alice",),
    ("bob",),
    ("charlie",),
    ("david",),
    ("eve",),
    ("frank",),
    ("grace",),
    ("heidi",),
    ("ivan",),
    ("judy",),
    ("mallory",),
    ("oscar",),
    ("peggy",),
    ("trent",),
    ("victor",)
]

# Create DataFrame
df = spark.createDataFrame(data, schema)
df.show()


+-------+
|   Name|
+-------+
|  alice|
|    bob|
|charlie|
|  david|
|    eve|
|  frank|
|  grace|
|  heidi|
|   ivan|
|   judy|
|mallory|
|  oscar|
|  peggy|
|  trent|
| victor|
+-------+



# Pyspark

In [3]:
from pyspark.sql.functions import initcap

# Use initcap to capitalize the first letter
capitalized_df = df.withColumn("CapitalizedName", initcap("Name"))
capitalized_df.show()


+-------+---------------+
|   Name|CapitalizedName|
+-------+---------------+
|  alice|          Alice|
|    bob|            Bob|
|charlie|        Charlie|
|  david|          David|
|    eve|            Eve|
|  frank|          Frank|
|  grace|          Grace|
|  heidi|          Heidi|
|   ivan|           Ivan|
|   judy|           Judy|
|mallory|        Mallory|
|  oscar|          Oscar|
|  peggy|          Peggy|
|  trent|          Trent|
| victor|         Victor|
+-------+---------------+



In [4]:
from pyspark.sql.functions import upper, substring, concat

# Capitalize the first letter using upper and substring
custom_capitalized_df = df.withColumn(
    "CustomCapitalizedName", 
    concat(upper(substring("Name", 1, 1)), substring("Name", 2, 100))
)

custom_capitalized_df.show()


+-------+---------------------+
|   Name|CustomCapitalizedName|
+-------+---------------------+
|  alice|                Alice|
|    bob|                  Bob|
|charlie|              Charlie|
|  david|                David|
|    eve|                  Eve|
|  frank|                Frank|
|  grace|                Grace|
|  heidi|                Heidi|
|   ivan|                 Ivan|
|   judy|                 Judy|
|mallory|              Mallory|
|  oscar|                Oscar|
|  peggy|                Peggy|
|  trent|                Trent|
| victor|               Victor|
+-------+---------------------+



In [7]:
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType

# Define a custom function
def capitalize_first_letter(name):
    if name:
        return name[0].upper() + name[1:]
    return name

# Register the function as a UDF
capitalize_udf = udf(capitalize_first_letter, StringType())

# Apply the UDF to capitalize the first letter
udf_capitalized_df = df.withColumn("UDFCapitalizedName", capitalize_udf("Name"))
udf_capitalized_df.show()


+-------+------------------+
|   Name|UDFCapitalizedName|
+-------+------------------+
|  alice|             Alice|
|    bob|               Bob|
|charlie|           Charlie|
|  david|             David|
|    eve|               Eve|
|  frank|             Frank|
|  grace|             Grace|
|  heidi|             Heidi|
|   ivan|              Ivan|
|   judy|              Judy|
|mallory|           Mallory|
|  oscar|             Oscar|
|  peggy|             Peggy|
|  trent|             Trent|
| victor|            Victor|
+-------+------------------+



In [5]:
from pyspark.sql.functions import expr

# Use expr to capitalize the first letter
expr_capitalized_df = df.withColumn(
    "ExprCapitalizedName", 
    expr("concat(upper(substring(Name, 1, 1)), substring(Name, 2, length(Name) - 1))")
)

expr_capitalized_df.show()


+-------+-------------------+
|   Name|ExprCapitalizedName|
+-------+-------------------+
|  alice|              Alice|
|    bob|                Bob|
|charlie|            Charlie|
|  david|              David|
|    eve|                Eve|
|  frank|              Frank|
|  grace|              Grace|
|  heidi|              Heidi|
|   ivan|               Ivan|
|   judy|               Judy|
|mallory|            Mallory|
|  oscar|              Oscar|
|  peggy|              Peggy|
|  trent|              Trent|
| victor|             Victor|
+-------+-------------------+



# Spark SQL

In [6]:
# Register the DataFrame as a temporary table
df.createOrReplaceTempView("Names")

# SQL Query to capitalize the first letter
query_sql = """
SELECT Name, 
       CONCAT(UPPER(SUBSTRING(Name, 1, 1)), SUBSTRING(Name, 2, LENGTH(Name) - 1)) AS SQLCapitalizedName
FROM Names
"""

# Execute the query
sql_capitalized_df = spark.sql(query_sql)
sql_capitalized_df.show()


+-------+------------------+
|   Name|SQLCapitalizedName|
+-------+------------------+
|  alice|             Alice|
|    bob|               Bob|
|charlie|           Charlie|
|  david|             David|
|    eve|               Eve|
|  frank|             Frank|
|  grace|             Grace|
|  heidi|             Heidi|
|   ivan|              Ivan|
|   judy|              Judy|
|mallory|           Mallory|
|  oscar|             Oscar|
|  peggy|             Peggy|
|  trent|             Trent|
| victor|            Victor|
+-------+------------------+



# Python

In [8]:
# Example string
name = "alice"

# Capitalize using str.capitalize()
capitalized_name = name.capitalize()
print(capitalized_name)  # Output: Alice


Alice


In [9]:
# Example string
full_name = "john doe"

# Capitalize using str.title()
capitalized_full_name = full_name.title()
print(capitalized_full_name)  # Output: John Doe


John Doe


In [10]:
# Example string
sentence = "python programming"

# Capitalize first letter using slicing
capitalized_sentence = sentence[0].upper() + sentence[1:]
print(capitalized_sentence)  # Output: Python programming


Python programming


In [11]:
import string

# Example string
phrase = "hello world, welcome to python"

# Capitalize each word using string.capwords()
capitalized_phrase = string.capwords(phrase)
print(capitalized_phrase)  # Output: Hello World, Welcome To Python


Hello World, Welcome To Python


In [12]:
import re

# Example string
text = "this is an example using regular expressions."

# Capitalize first letter of the sentence using re.sub
capitalized_text = re.sub(r'^\w', lambda x: x.group().upper(), text)
print(capitalized_text)  # Output: This is an example using regular expressions.


This is an example using regular expressions.


In [13]:
# Example list of names
names = ["john", "jane", "doe", "smith"]

# Capitalize each name in the list
capitalized_names = [name.capitalize() for name in names]
print(capitalized_names)  # Output: ['John', 'Jane', 'Doe', 'Smith']


['John', 'Jane', 'Doe', 'Smith']
