In [0]:
import os

volume_path = "/Volumes/arao/aerodemo/tmp/raw"
display(dbutils.fs.ls(volume_path))

In [0]:
dbutils.fs.rm("/Volumes/arao/aerodemo/tmp/raw/schema", recurse=True)
dbutils.fs.rm("/Volumes/arao/aerodemo/tmp/raw/checkpoints", recurse=True)

In [0]:
import shutil
import os

# Paths to your volume folders
raw_path = "/Volumes/arao/aerodemo/tmp/raw"
maint_path = "/Volumes/arao/aerodemo/tmp/maintenance"

# Remove all CSV files and schema/checkpoint folders
for folder in [raw_path, maint_path]:
    dbutils.fs.rm(f"{folder}/", True)  # True = recursive

In [0]:
raw_path = "/Volumes/arao/aerodemo/tmp/raw"

latest_file = sorted(dbutils.fs.ls(raw_path), key=lambda f: f.modificationTime, reverse=True)
display(spark.read.format("csv").option("header", True).load(latest_file[0].path))

In [0]:
raw_path = "/Volumes/arao/aerodemo/tmp/raw"
files = dbutils.fs.ls(raw_path)

# Show file names
for f in files:
    print(f.path)

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import col

# Define the expected sensor schema
sensor_schema = StructType([
    StructField("timestamp", TimestampType(), True),
    StructField("aircraft_id", StringType(), True),
    StructField("model", StringType(), True),
    StructField("engine_temp", DoubleType(), True),
    StructField("fuel_efficiency", DoubleType(), True),
    StructField("vibration", DoubleType(), True),
    StructField("altitude", DoubleType(), True),
    StructField("airspeed", DoubleType(), True),
    StructField("anomaly_score", DoubleType(), True),
    StructField("oil_pressure", DoubleType(), True),
    StructField("engine_rpm", IntegerType(), True),
    StructField("battery_voltage", DoubleType(), True)
])

# Read the CSV file using schema
df = spark.read \
    .format("csv") \
    .option("header", "true") \
    .schema(sensor_schema) \
    .load("dbfs:/Volumes/arao/aerodemo/tmp/raw/raw_sensor_data_20250516_220905.csv")

df.display()

In [0]:
# Clean schema and checkpoint metadata
dbutils.fs.rm("dbfs:/Volumes/arao/aerodemo/tmp/raw/schema", True)
dbutils.fs.rm("dbfs:/Volumes/arao/aerodemo/tmp/raw/checkpoints", True)
dbutils.fs.rm("dbfs:/Volumes/arao/aerodemo/tmp/maintenance/schema", True)
dbutils.fs.rm("dbfs:/Volumes/arao/aerodemo/tmp/maintenance/checkpoints", True)

In [0]:
dbutils.fs.ls("dbfs:/Volumes/arao/aerodemo/tmp/raw/")

In [0]:
dbutils.fs.rm("dbfs:/Volumes/arao/aerodemo/tmp/raw", recurse=True)

In [0]:
spark.read.table("arao.aerodemo.sensor_features").printSchema()

In [0]:
spark.sql("DROP TABLE IF EXISTS arao.aerodemo.sensor_features_table")

df.write.format("delta").mode("overwrite").saveAsTable("arao.aerodemo.sensor_features_table")

In [0]:
%sql
DESCRIBE TABLE EXTENDED arao.aerodemo.sensor_features;