In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType, ArrayType, MapType, DoubleType
from pyspark.sql.functions import col, lit, array_contains, expr # Importing col and lit for demonstrations, and other functions as needed

# 1. Initialize Spark Session
#    This is the entry point to any Spark functionality.
spark = SparkSession.builder \
    .appName("PySpark Column Functions Demo") \
    .getOrCreate()

print("--- Initializing Spark Session ---")

# 2. Create a Sample DataFrame
#    We'll create a DataFrame with various data types to demonstrate different Column methods.
schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("product_name", StringType(), True),
    StructField("category", StringType(), True),
    StructField("price", DoubleType(), True),
    StructField("quantity", IntegerType(), True),
    StructField("is_available", BooleanType(), True),
    StructField("tags", ArrayType(StringType()), True),
    StructField("attributes", MapType(StringType(), StringType()), True),
    StructField("description", StringType(), True)
])

data = [
    (1, "Laptop Pro", "Electronics", 1200.50, 5, True, ["tech", "portable"], {"brand": "ABC", "color": "silver"}, "High-performance laptop for professionals."),
    (2, "Smartphone X", "Electronics", 800.00, 10, True, ["mobile", "camera"], {"brand": "XYZ", "os": "Android"}, "Latest smartphone with advanced features."),
    (3, "Wireless Mouse", "Accessories", 25.99, 50, True, ["peripheral"], {"type": "optical", "connection": "bluetooth"}, "Ergonomic wireless mouse."),
    (4, "USB-C Hub", "Accessories", 49.99, 20, False, ["connectivity"], {"ports": "4", "version": "3.0"}, None), # Null description
    (5, "Mechanical Keyboard", "Accessories", 150.00, None, True, ["gaming"], {"layout": "US", "switches": "blue"}, "Gaming keyboard with tactile feedback."), # Null quantity
    (6, "External SSD", "Storage", 180.00, 15, True, ["storage"], {"capacity": "1TB"}, "Fast external solid-state drive."),
    (7, "Smart Watch Lite", "Wearables", 299.99, 8, False, ["health"], {"brand": "FitPro"}, "Basic smartwatch for fitness tracking."),
    (8, "Headphones ANC", "Audio", 220.00, 12, True, ["audio", "noise-cancelling"], {"type": "over-ear"}, "Premium headphones with active noise cancellation.")
]

df = spark.createDataFrame(data, schema)

In [0]:
df.display()

In [0]:
df.printSchema()

In [0]:
#arithmetic
df_arthematic = df.withColumn("orderValue",col("price") * col("quantity")).withColumn("taxedPrice",col("price")* 1.08)
df_arthematic.display()

In [0]:
#comparison operations

df_comparison = df.filter(col("price")>200)\
                  .filter(col("is_available") == True)\
                  .filter(col("category").isin("Electronics","Wearables"))\
                  .filter((col("price").between(200,800)))
df_comparison.display()
#df.display()

In [0]:
#eqNullSafe

df_eq_null_safe =df.withColumn("eq_desc",col("description").eqNullSafe(lit(None))).display()

In [0]:
#eqNullSafe

df.select(df["id"],df["id"].isNull()).display()