## String Function in PySpark

In [0]:
df = spark.read.csv('/Volumes/ytdemo/data/demodata/GlobalSuperstore.csv/', header=True, inferSchema=True)
display(df)

In [0]:
from pyspark.sql.functions import length, char_length, initcap, upper, lower,concat, concat_ws, lit, substring, instr, trim

## Example 1 — length() and char_length()

In [0]:
df.select(
    "Customer Name",
    length("Customer Name").alias("name_length"),
    char_length("Customer Name").alias("char_length")
).display()

## Example 2 — upper(), lower(), initcap()
- upper() → ALL CAPS
- lower() → all lowercase
- initcap() → First letter capitalized (Title Case)

In [0]:
df.select(
    'Product Name',
    upper('Product Name').alias('product_upper'),
    lower('Product Name').alias('product_lower'),
    initcap('Product Name').alias('product_title_case')
).display()


## Example 3 — lit()

In [0]:
df.select(
    "Order ID",
    lit("Superstore Order").alias("constant_text")
).display()

## Example 4 — concat() and concat_ws()

In [0]:
df.select(
    "Customer Name",
    "City",
    concat("Customer Name", lit(" - "), "City").alias("name_city"),
    concat_ws(", ", "Customer Name", "City").alias("name_city_ws")
).display()

## Example 5 — substring()

In [0]:
df.select(
    "Order ID",
    substring("Order ID", 1,7).alias("order_prefix")
).display()

## Example 6 — instr()

In [0]:
df.select(
    "Product Name",
    instr("Product Name", "Table").alias("position_of_table")
).display()

## Example 7 — trim()

In [0]:
df.select(
    trim("Customer Name").alias("trimmed_name")
).display()