In [24]:
from pyspark.ml import PipelineModel
from pyspark.ml.classification import RandomForestClassificationModel, LogisticRegressionModel
from pyspark.sql.functions import col, udf
from pyspark.sql.types import DoubleType

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 26, Finished, Available, Finished)

In [25]:
# Cargar pipeline de preprocesamiento y modelo entrenado

pipeline_loaded = PipelineModel.load("Files/Models/pipeline_preprocesamiento")

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 27, Finished, Available, Finished)

In [26]:
# RandomForest

model_loaded = RandomForestClassificationModel.load("Files/Models/Churn_RF")

print("✅ Pipeline y modelo cargados correctamente.")

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 28, Finished, Available, Finished)

✅ Pipeline y modelo cargados correctamente.


In [27]:
# Cargar datos nuevos (simulación)

dfNuevos = spark.sql("SELECT * FROM lakehouseSilver.dfcontratosfinal")

print("✅ Nuevos contratos cargados. Filas:", dfNuevos.count())

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 29, Finished, Available, Finished)

✅ Nuevos contratos cargados. Filas: 1690


In [28]:
# Aplicar pipeline (preprocesamiento idéntico al entrenamiento)

dfNuevos_preparado = pipeline_loaded.transform(dfNuevos)

print("✅ Nuevos contratos transformados (scaledFeatures listos).")

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 30, Finished, Available, Finished)

✅ Nuevos contratos transformados (scaledFeatures listos).


In [29]:
# Generar predicciones

predicciones = model_loaded.transform(dfNuevos_preparado)

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 31, Finished, Available, Finished)

In [30]:
extract_prob_1 = udf(lambda v: float(v[1]), DoubleType())

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 32, Finished, Available, Finished)

In [31]:
resultado = predicciones.withColumn("prob_Churn", extract_prob_1(col("probability")))

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 33, Finished, Available, Finished)

In [32]:
resultado = resultado.select("numContrato", "idCliente", "prediction", "prob_Churn")

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 34, Finished, Available, Finished)

In [33]:
resultado.show(10, truncate=False)

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 35, Finished, Available, Finished)

+-----------+---------+----------+--------------------+
|numContrato|idCliente|prediction|prob_Churn          |
+-----------+---------+----------+--------------------+
|14057      |16503    |0.0       |0.035690667884622655|
|14124      |551      |0.0       |0.052684144751877125|
|14032      |17504    |0.0       |0.09017393591452036 |
|14143      |17301    |0.0       |0.08537016182312594 |
|14169      |17666    |0.0       |0.0642485800179309  |
|14152      |17614    |0.0       |0.0853373848054033  |
|14159      |17619    |0.0       |0.1735723640134005  |
|14180      |17677    |0.0       |0.0791305072600846  |
|14174      |16376    |0.0       |0.0920425084409219  |
|13799      |17307    |0.0       |0.07269426576224544 |
+-----------+---------+----------+--------------------+
only showing top 10 rows



In [34]:
# Guardar resultados en Lakehouse como tabla Delta

resultado.write.format("delta").mode("overwrite").saveAsTable("Predicciones_Churn")

print("✅ Resultados guardados en la tabla: Predicciones_Churn")

StatementMeta(, c83bf376-ee2b-4b77-b57c-59b5c8e40396, 36, Finished, Available, Finished)

✅ Resultados guardados en la tabla: Predicciones_Churn
