In [0]:
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_timestamp

# Create sample sales data
data = {
    "OrderID": [1, 2, 3, 4],
    "OrderDate": ["2024-01-01 10:00:00", "2024-01-02 11:00:00", "2024-01-03 12:00:00", "2024-01-04 13:00:00"],
    "CustomerID": ["C001", "C002", "C003", "C004"],
    "Product": ["ProductA", "ProductB", "ProductC", "ProductD"],
    "Quantity": [10, 20, 15, 5],
    "Price": [100.0, 200.0, 150.0, 50.0]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
csv_path = "/dbfs/FileStore/sales_data_4.csv"
df.to_csv(csv_path, index=False)

print(f"Sample data saved to {csv_path}")

# Initialize SparkSession
spark = SparkSession.builder \
    .appName("StructuredStreamingExample") \
    .getOrCreate()

# Load data from CSV
df = spark.read.format("csv").option("header", "true").load("/FileStore/sales_data_4.csv")

print("Data Loaded Successfully")

# Transform the data: Add a new column for total amount
df_transformed = df.withColumn("TotalAmount", col("Quantity").cast("int") * col("Price").cast("double"))

print("Data Transformed Successfully")

# Write transformed data to a Delta table
df_transformed.write.format("delta").mode("overwrite").save("/delta/sales_data_4")

print("Transformed data written to Delta table successfully")

Sample data saved to /dbfs/FileStore/sales_data_4.csv
Data Loaded Successfully
Data Transformed Successfully
Transformed data written to Delta table successfully
