In [0]:
import datetime
from pyspark.sql.types import StructType, StructField, StringType, TimestampType

# Read the model-level report table
try:
    report_df = spark.table("field_demos.ml_ops.master_model_report")
except Exception as e:
    print("Error reading master_model_report table:", e)
    dbutils.jobs.taskValues.set(key="retrain_candidates", value=[])
    raise e

# Filter models that need retraining and collect their names
models_to_retrain = report_df.filter("needs_retrained = true").select("model_name").collect()
candidates = [row["model_name"] for row in models_to_retrain]

# Build retraining candidate log events
retrain_logs = []
current_time = datetime.datetime.now()
for model_name in candidates:
    retrain_logs.append({
        "model_name": model_name,
        "retrain_time": current_time,
        "status": "Candidate flagged",
        "details": f"Model {model_name} flagged for retraining."
    })

# If any retraining candidates were found, write log events to a Delta table
if retrain_logs:
    log_schema = StructType([
        StructField("model_name", StringType(), True),
        StructField("retrain_time", TimestampType(), True),
        StructField("status", StringType(), True),
        StructField("details", StringType(), True)
    ])

    log_df = spark.createDataFrame(retrain_logs, schema=log_schema)
    log_df.write.format("delta") \
        .mode("append") \
        .saveAsTable("field_demos.ml_ops.model_retrain_log")
    print("Retraining candidate events logged to Delta table 'field_demos.ml_ops.model_retrain_log'.")
else:
    print("No models flagged for retraining.")

# Pass the list of candidate model_ids to the next job task
dbutils.jobs.taskValues.set(key="retrain_candidates", value=candidates)
print("Candidates to retrain passed to next job:", candidates)