# Import libraries

In [1]:
from pyspark.ml import PipelineModel
from pyspark.sql.functions import col

# Read Customer profiles from silver layer

In [1]:
df = spark.read.table("silver.crm.customers")
display(df.select("customerid","gender","tenure","monthlycharges","totalcharges","review", "sentimentScore"))

In [1]:
df = df.drop("review")
df.printSchema()

root
 |-- customerid: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- seniorcitizen: integer (nullable = true)
 |-- partner: string (nullable = true)
 |-- dependents: string (nullable = true)
 |-- tenure: integer (nullable = true)
 |-- phoneservice: string (nullable = true)
 |-- multiplelines: string (nullable = true)
 |-- internetservice: string (nullable = true)
 |-- onlinesecurity: string (nullable = true)
 |-- onlinebackup: string (nullable = true)
 |-- deviceprotection: string (nullable = true)
 |-- techsupport: string (nullable = true)
 |-- streamingtv: string (nullable = true)
 |-- streamingmovies: string (nullable = true)
 |-- contract: string (nullable = true)
 |-- paperlessbilling: string (nullable = true)
 |-- paymentmethod: string (nullable = true)
 |-- monthlycharges: double (nullable = true)
 |-- totalcharges: double (nullable = true)
 |-- sentimentScore: integer (nullable = true)



# Load saved churn prediction model

In [1]:
loaded_model = PipelineModel.load("/Workspace/model/customer_churn/ml_model/")

# Predict churn

In [1]:
predictions = loaded_model.transform(df)

# View results

In [1]:
predictions.select("customerid", "prediction").show()

+----------+----------+
|customerid|prediction|
+----------+----------+
|7234-KMNRQ|       0.0|
|9732-OUYRN|       0.0|
|7698-YFGEZ|       1.0|
|9700-ZCLOT|       0.0|
|8084-OIVBS|       0.0|
|7047-YXDMZ|       0.0|
|9585-KKMFD|       0.0|
|8215-NGSPE|       0.0|
|9251-AWQGT|       0.0|
|9507-HSMMZ|       0.0|
|8439-LTUGF|       0.0|
|7277-KAMWT|       0.0|
|9800-OUIGR|       0.0|
|7808-DVWEP|       1.0|
|7601-DHFWZ|       0.0|
|8896-BQTTI|       0.0|
|8219-VYBVI|       0.0|
|9087-EYCPR|       0.0|
|9817-APLHW|       0.0|
|7996-BPXHY|       0.0|
+----------+----------+
only showing top 20 rows



# Column subsetting - get relevant columns

In [1]:
selected_columns = [
    "customerid", 
    "gender", 
    "seniorcitizen", 
    "partner", 
    "dependents", 
    "tenure", 
    "phoneservice", 
    "multiplelines", 
    "internetservice", 
    "onlinesecurity", 
    "onlinebackup", 
    "deviceprotection", 
    "techsupport", 
    "streamingtv", 
    "streamingmovies", 
    "contract", 
    "paperlessbilling", 
    "paymentmethod", 
    "monthlycharges", 
    "totalcharges", 
    "sentimentScore",
    "prediction"
]
customers_selected = predictions.select(*[col(c) for c in selected_columns])
customers_selected.select("customerid","gender","tenure","monthlycharges","totalcharges", "sentimentScore", "prediction").show()

columns_selection = customers_selected.select("customerid","gender","tenure","monthlycharges","totalcharges", "sentimentScore", "prediction")

+----------+------+------+--------------+------------+--------------+----------+
|customerid|gender|tenure|monthlycharges|totalcharges|sentimentScore|prediction|
+----------+------+------+--------------+------------+--------------+----------+
|7234-KMNRQ|  Male|     4|          19.0|       73.45|             2|       0.0|
|9732-OUYRN|Female|    49|          19.0|       918.7|             1|       0.0|
|7698-YFGEZ|  Male|     1|          20.0|        20.0|             2|       1.0|
|9700-ZCLOT|  Male|     2|          20.0|        32.7|             5|       0.0|
|8084-OIVBS|Female|    11|          20.0|      211.95|             5|       0.0|
|7047-YXDMZ|  Male|    21|          20.0|       417.7|             1|       0.0|
|9585-KKMFD|  Male|    41|          20.0|       879.8|             2|       0.0|
|8215-NGSPE|Female|    42|          20.0|      833.55|             2|       0.0|
|9251-AWQGT|Female|    48|          20.0|       935.9|             1|       0.0|
|9507-HSMMZ|  Male|    54|  

In [1]:
columns_selection.columns

['customerid',
 'gender',
 'tenure',
 'monthlycharges',
 'totalcharges',
 'sentimentScore',
 'prediction']

# Save results to ADW

In [1]:
columns_selection.write.insertInto("external_gold_adw.datalab.churn_predictions")