- You are working with a financial services company that processes transactions data. 
- Each transaction contains a JSON field with transaction details,which is stored as a string.
- You need to update the schema to handle these JSON fields properly

In [0]:
%python
#Step 1: Raw DataFrame
#Let’s say you start with a table or CSV like this:
#from pyspark.sql import SparkSession (no need pf importing in databricks, has bydefault)

dataa = [
    (1001, "2025-09-18 10:00:00", '{"amount": 250.75, "currency": "INR", "merchant": "Flipkart", "status": "success"}'),
    (1002, "2025-09-18 10:05:00", '{"amount": 1200.00, "currency": "USD", "merchant": "Amazon", "status": "failed"}')
]
columns = ["transaction_id", "timestamp", "transaction_details"]
df_raw = spark.createDataFrame(dataa, columns)
display(df_raw)




In [0]:
%python
#parse JSON column

from pyspark.sql.functions import from_json, col
from pyspark.sql.types import StructType, StructField, StringType, DoubleType

json_schema = StructType([
    StructField("amount", DoubleType()),
    StructField("currency", StringType()),
    StructField("merchant", StringType()),
    StructField("status", StringType())
])

df_parsed = df_raw.withColumn("details", from_json(col("transaction_details"), json_schema))
display(df_parsed)



In [0]:
%python
#Step 3: Flatten the Structure
df_final = df_parsed.select(
    "transaction_id",
    "timestamp",
    col("details.amount").alias("amount"),
    col("details.currency").alias("currency"),
    col("details.merchant").alias("merchant"),
    col("details.status").alias("status")
)
display(df_final)

In [0]:
%python
#create a DF with column "add" and values in the column are 01,01,02 (now dilter the value "01")

df=spark.createDataFrame(['01','01','02'],['add'])
display(df)

In [0]:
%python
from pyspark.sql.functions import col

df_new=df.filter(col("add")=='01')
display(df_new)


In [0]:
%python

#DROP a column

df_drop_column=df.drop("add")
display(df_drop_column)
