In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

In [None]:
# Get the SparkSession
spark = SparkSession.builder.appName("IrisFeatureEngineering").getOrCreate()

# Define the path to the ingested data
ingested_data_path = "/mnt/my_delta_lake/iris_ingested" # Ensure this matches the ingestion path

# Read the Delta Lake table
iris_df = spark.read.format("delta").load(ingested_data_path)

# Example feature engineering steps:
# 1. Create a new feature: petal area
iris_df = iris_df.withColumn("petal_area", col("petal_length") * col("petal_width"))

# 2. You might add more complex features based on domain knowledge or EDA insights
#    For example, ratios of features, polynomial features, etc.

# Select the features and the target variable
feature_engineered_df = iris_df.select("sepal_length", "sepal_width", "petal_length", "petal_width", "petal_area", "species")

# Define the path to save the feature-engineered data
feature_engineered_data_path = "/mnt/my_delta_lake/iris_feature_engineered" # Replace with your desired path

# Write the feature-engineered data to Delta Lake
feature_engineered_df.write.format("delta").mode("overwrite").save(feature_engineered_data_path)

print(f"Feature-engineered data saved to: {feature_engineered_data_path}")

spark.stop()