In [0]:
catalog = 'demos'
database = "engine_rul_predictor"
model_name = "engine_rul_predictor"
source_table = "features"
target_table = "gold"

In [0]:
# Set the default catalog and schema
spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE {database}")

In [0]:
# Read Features Data
features_df = (
  spark.read
    .format("delta")
    .table(f"{database}.{source_table}")
)

In [0]:
import mlflow
import pyspark.sql.functions as F

# Feature and predict function
def predict_rul(data):
    # Load the model
    model_uri = f'models:/{model_name}@production'
    model_fct = mlflow.pyfunc.spark_udf(spark, model_uri=model_uri)

    # Ensure OpSet1 is cast to float
    data = data.withColumn("OpSet1", data["OpSet1"].cast("float"))

    # Make the prediction
    prediction_df = data.withColumn('prediction', model_fct(*data.columns).getItem(0))
    
    # Clean up the output
    clean_pred_df = prediction_df.select(
'id','Cycle', 'OpSet1', 'OpSet2', 'OpSet3', 'SensorMeasure1', 'SensorMeasure2', 'SensorMeasure3',
        'SensorMeasure4', 'SensorMeasure5', 'SensorMeasure6', 'SensorMeasure7', 'SensorMeasure8', 
        'SensorMeasure9', 'SensorMeasure10', 'SensorMeasure11', 'SensorMeasure12', 'SensorMeasure13', 
        'SensorMeasure14', 'SensorMeasure15', 'SensorMeasure16', 'SensorMeasure17', 'SensorMeasure18', 
        'SensorMeasure19', 'SensorMeasure20', 'SensorMeasure21', 'RemainingUsefulLife', 
         'prediction'
    )
    
    return clean_pred_df

In [0]:
predicted_df = predict_rul(features_df)
display(predicted_df)

In [0]:
# Write the output to a Gold Delta table
predicted_df.write.format('delta').mode('overwrite').saveAsTable(f"{database}.{target_table}")

In [0]:
display(spark.table(f"{database}.{target_table}"))