## Import Libraries

In [1]:
import pandas as pd
import joblib
from catboost import CatBoostClassifier

<span style="font-size: 22px;">Create Directory if it doesnot exist</span>

In [2]:
# Load the new dataset
new_data = pd.read_csv('../output/finalData.csv')

new_data = new_data.sort_values(by='kay_contactid')

new_data = new_data.iloc[200:400]

# Display the first few rows of the new dataset to check
new_data.head(5)

Unnamed: 0,kay_contactid,Gender,birthdate,HNI Customer,Customer Concern,age,abs_age,age_range,Zone,Item,...,CenterCode,Invoicecenter,PackageValiditydays,Free,AQU,status2,ExistNewTag,Churn,month,year
1048,CGRC21299,Female,1992-11-24,Yes,,31,31,30-40,WEST-NITIN,Package,...,KAMI,,90,no,No,LIVE,Existing,1,10,2021
1062,CGRC21319,Male,2000-01-01,Yes,,24,24,20-30,WEST-NITIN,Package,...,KAMI,,270,no,No,LIVE,Existing,0,12,2021
1061,CGRC21319,Male,2000-01-01,Yes,,24,24,20-30,WEST-NITIN,Package,...,KAMI,,270,no,No,LIVE,Existing,0,12,2021
1063,CGRC21319,Male,2000-01-01,Yes,,24,24,20-30,WEST-NITIN,Package,...,KAMI,,270,no,No,LIVE,Existing,0,11,2021
1049,CGRC21358,Female,2000-01-01,Yes,,24,24,20-30,WEST-NITIN,Package,...,KAMI,,365,no,No,LIVE,Existing,0,10,2021


<span style="font-size: 22px;">Load the preprocessing pipeline</span>

In [3]:
loaded_pipeline = joblib.load('../model/preprocessing_pipeline.pkl')
num_transformer = loaded_pipeline['num_transformer']
cat_transformer = loaded_pipeline['cat_transformer']

<span style="font-size: 22px;">Define the numerical and categorical features (same as used during training)</span>

In [4]:
# Define the numerical and categorical features (same as used during training)
num_features = ['PackageValiditydays']
cat_features = ['age_range', 'Gender', 'HNI Customer', 'Zone', 'Item', 'BusinessUnit', 'TierName',
                'Marketbucket', 'Free', 'AQU', 'status2', 'ExistNewTag', 'FinalAmount',
                'InvoiceCenterCode', 'Promotion', 'Category', 'PackageCode',
                'CenterCode', 'finalInvoiceId', 'month', 'year']

# Check if all cat_features are in the new data
missing_features = set(cat_features) - set(new_data.columns)
if missing_features:
    raise ValueError(f"Missing columns in new_data: {missing_features}")

<span style="font-size: 22px;">Preprocess numerical features (standard scaling)</span>

In [5]:
new_data_num = num_transformer.transform(new_data[num_features])

<span style="font-size: 22px;">Preprocess categorical features (one-hot encoding)</span>

In [6]:
new_data_cat = cat_transformer.transform(new_data[cat_features])

<span style="font-size: 22px;">Concatenate the processed numerical and categorical features</span>

In [7]:
new_data_processed = pd.concat([pd.DataFrame(new_data_num, columns=num_features),
                                 pd.DataFrame(new_data_cat.toarray(), columns=cat_transformer.get_feature_names_out(cat_features))],
                                axis=1)

print("New dataset preprocessing completed!")

New dataset preprocessing completed!


<span style="font-size: 22px;">Load the CatBoost model</span>

In [8]:
# Load the CatBoost model
loaded_model = CatBoostClassifier()
loaded_model.load_model("../model/catboost_model.cbm")

<catboost.core.CatBoostClassifier at 0x2863f24fa70>

<span style="font-size: 22px;">Make predictions on the new dataset</span>

In [9]:
y_new_pred = loaded_model.predict(new_data_processed)

# Add predictions to the new data
new_data['Predicted_Churn'] = y_new_pred

# Save the new dataset with predictions to a CSV file
new_data.to_csv('../output/predictions/new_data_with_predictions.csv', index=False)

print("Predictions added to the new dataset and saved to 'new_data_with_predictions.csv'!")

Predictions added to the new dataset and saved to 'new_data_with_predictions.csv'!


In [10]:
new_data.head(3)

Unnamed: 0,kay_contactid,Gender,birthdate,HNI Customer,Customer Concern,age,abs_age,age_range,Zone,Item,...,Invoicecenter,PackageValiditydays,Free,AQU,status2,ExistNewTag,Churn,month,year,Predicted_Churn
1048,CGRC21299,Female,1992-11-24,Yes,,31,31,30-40,WEST-NITIN,Package,...,,90,no,No,LIVE,Existing,1,10,2021,0
1062,CGRC21319,Male,2000-01-01,Yes,,24,24,20-30,WEST-NITIN,Package,...,,270,no,No,LIVE,Existing,0,12,2021,0
1061,CGRC21319,Male,2000-01-01,Yes,,24,24,20-30,WEST-NITIN,Package,...,,270,no,No,LIVE,Existing,0,12,2021,0
