In [1]:
import mlflow.spark
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_timestamp, avg, stddev, max, min

# 1. Iniciar sesi√≥n y conexi√≥n (Reto 1)
spark = SparkSession.builder.appName("Final_Inferencia_Produccion").getOrCreate()
mlflow.set_tracking_uri("http://mlflow:5000")

# 2. Cargar el modelo desde el Registry
model_uri = "models:/Modelo_Predictor_SECOP_II/Production"
loaded_model = mlflow.spark.load_model(model_uri)

# 3. Preparar datos (Reto 2)
# Cargamos el archivo y RENOMBRAMOS la columna para que coincida con lo que espera el modelo
df_input = spark.read.parquet("/opt/spark-data/processed/secop_final_ready.parquet") \
                .withColumnRenamed("features_scaled", "features") # <--- CLAVE PARA EL ERROR

# 4. Generar predicciones Batch (Reto 3)
df_results = loaded_model.transform(df_input)

# Renombrar para mayor claridad y a√±adir timestamp
df_results = df_results.withColumnRenamed("prediction", "prediccion_valor") \
                       .withColumn("fecha_inferencia", current_timestamp())

# 5. Monitorear predicciones - Estad√≠sticas (Reto 4)
print("\nüìä ESTAD√çSTICAS DE PRODUCCI√ìN (MONITOREO):")
df_results.select(
    avg("prediccion_valor").alias("Promedio"),
    stddev("prediccion_valor").alias("Desviacion"),
    min("prediccion_valor").alias("Min"),
    max("prediccion_valor").alias("Max")
).show()

# 6. Guardar resultados (Reto 5)
output_path = "/opt/spark-data/results/predicciones_finales"
df_final_report = df_results.select(
    "nombre_entidad", 
    "valor_del_contrato", 
    "prediccion_valor", 
    "fecha_inferencia"
)

df_final_report.write.mode("overwrite").parquet(f"{output_path}.parquet")
df_final_report.write.mode("overwrite").option("header", "true").csv(output_path + ".csv")

print(f" ¬°PROCESO COMPLETADO! Resultados guardados en: {output_path}")
df_final_report.show(10)

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
26/02/14 18:11:53 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
26/02/14 18:11:54 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
  latest = client.get_latest_versions(name, None if stage is None else [stage])
2026/02/14 18:11:55 INFO mlflow.spark: 'models:/Modelo_Predictor_SECOP_II/Production' resolved as 'file:///opt/mlflow/mlruns/741409308133091839/f4525d4c25ac4c308979af435296e38c/artifacts/model'
2026/02/14 18:11:56 INFO mlflow.spark: URI 'models:/Modelo_Predictor_SECOP_II/Production/sparkml' does not point to the current DFS.
2026/02/14 18:11:56 INFO mlflow.spark: File 'models:/Modelo_Predictor_SECOP_II/Production/sparkml' not found on DFS. Will attempt to upload the file.
                                                                       


üìä ESTAD√çSTICAS DE PRODUCCI√ìN (MONITOREO):
+--------------------+--------------------+-----------------+--------------------+
|            Promedio|          Desviacion|              Min|                 Max|
+--------------------+--------------------+-----------------+--------------------+
|2.0714816118887916E9|6.110044514057408E10|700000.1341406628|1.899999999478093...|
+--------------------+--------------------+-----------------+--------------------+



                                                                                

 ¬°PROCESO COMPLETADO! Resultados guardados en: /opt/spark-data/results/predicciones_finales
+--------------------+------------------+--------------------+--------------------+
|      nombre_entidad|valor_del_contrato|    prediccion_valor|    fecha_inferencia|
+--------------------+------------------+--------------------+--------------------+
|                 JEP|       7.3451015E7| 7.345101532734275E7|2026-02-14 18:12:...|
|ALCALDIA MUNICIPI...|         6673341.0|   6673341.006328298|2026-02-14 18:12:...|
|RADIO TELEVISION ...|            1.32E7|1.3200000002107598E7|2026-02-14 18:12:...|
|SECRETARIA GENERA...|         6.75268E7|  6.75267999871744E7|2026-02-14 18:12:...|
|ALCALDIA MUNICIPA...|             1.5E7| 1.500000005543935E7|2026-02-14 18:12:...|
|MINISTERIO DE MIN...|       2.7606667E7|2.7606667009962495E7|2026-02-14 18:12:...|
|                 ANM|       2.7723175E7|2.7723175040784314E7|2026-02-14 18:12:...|
|MINISTERIO DE AMB...|        1.316546E8|1.3165460001221597E8|2026-

26/02/14 18:12:13 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors
