# 📘 Feature Store Registration
This notebook registers the engineered sensor features table from the Delta Live Tables (DLT) pipeline
into the Databricks Feature Store for easier discoverability, governance, and reuse during model training and inference.


In [0]:
# df = spark.read.table("arao.aerodemo.sensor_features").cache()
# df.count()  # Force materialization to avoid lazy read error
# df.write.format("delta").mode("overwrite").saveAsTable("arao.aerodemo.sensor_features_table")

In [0]:
# %sql
# ALTER TABLE arao.aerodemo.sensor_features_table 
# ALTER COLUMN aircraft_id SET NOT NULL;

# ALTER TABLE arao.aerodemo.sensor_features_table 
# ALTER COLUMN timestamp SET NOT NULL;

In [0]:
# %sql
# ALTER TABLE arao.aerodemo.sensor_features_table 
# ADD CONSTRAINT sensor_features_pk 
# PRIMARY KEY (aircraft_id, timestamp);

In [0]:
# %sql
# ALTER TABLE arao.aerodemo.sensor_features
# ADD CONSTRAINT sensor_features_pk PRIMARY KEY (aircraft_id, timestamp);

In [0]:
from pyspark.sql.types import *
from pyspark.sql import functions as F

# 🧹 1. Drop the old table if it exists to avoid constraint conflicts
spark.sql("DROP TABLE IF EXISTS arao.aerodemo.sensor_features_table")

# 📥 2. Read from existing DLT materialized table
df_raw = spark.table("arao.aerodemo.sensor_features") \
    .filter("aircraft_id IS NOT NULL AND timestamp IS NOT NULL")

# 🧼 3. Clean nulls (as double safety) and cast PKs
df_clean = df_raw.withColumn("aircraft_id", F.col("aircraft_id").cast("string")) \
                 .withColumn("timestamp", F.col("timestamp").cast("string"))

# 💾 4. Save as Delta table with schema overwrite
df_clean.write.format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("arao.aerodemo.sensor_features_table")

# 🔐 5. Enforce NOT NULL constraints
spark.sql("""
  ALTER TABLE arao.aerodemo.sensor_features_table 
  ALTER COLUMN aircraft_id SET NOT NULL
""")
spark.sql("""
  ALTER TABLE arao.aerodemo.sensor_features_table 
  ALTER COLUMN timestamp SET NOT NULL
""")

# 🛡️ 6. Add primary key constraint (required for Feature Store)
spark.sql("""
  ALTER TABLE arao.aerodemo.sensor_features_table 
  ADD CONSTRAINT sensor_features_pk 
  PRIMARY KEY (aircraft_id, timestamp)
""")

In [0]:
from databricks.feature_store import FeatureStoreClient

fs = FeatureStoreClient()

fs.create_table(
    name="arao.aerodemo.sensor_features_table",
    primary_keys=["aircraft_id", "timestamp"],
    timestamp_keys=["timestamp"],
    description="Engineered features for anomaly prediction from sensor data",
    df=spark.read.table("arao.aerodemo.sensor_features_table")
)