In [16]:
#Adding project root to notebook search paths to utilize project-wide importing from utils.py and config.py
import sys
from pathlib import Path
source_path = Path.cwd().parent
sys.path.append(str(source_path))

In [17]:
#import statements
import pandas as pd
from utils import ingest_kaggle_data, one_hot_encode, impute_missing
from config import DATA_PATH, MODELS_PATH
from sklearn.model_selection import train_test_split
import joblib

In [18]:
#load in and preprocess data
handle = "anaghapaul/e-commerce-dataset"
df = ingest_kaggle_data(handle)
df = impute_missing(df)
df = one_hot_encode(df)
df = df.drop(columns = ["CustomerID"])

#load in model
model_file = MODELS_PATH.joinpath("rfc_churn.joblib")
rfc = joblib.load(model_file)

Dataset URL: https://www.kaggle.com/datasets/anaghapaul/e-commerce-dataset


In [19]:
#Defining X and y
X = df.drop(columns = ["Churn_1"])
y = df["Churn_1"]

#Splitting the data for training, including passing the index for adding shuffled train and test predictions to the correct indices of the dataset
X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(X, y, df.index, test_size = 0.3, stratify = y, random_state = 42)

In [20]:
#generate predictions
y_train_pred = rfc.predict(X_train)
y_test_pred = rfc.predict(X_test)

#save prediction to file
df.loc[idx_train, "RFC_Predicted_Churn"] = y_train_pred
df.loc[idx_test, "RFC_Predicted_Churn"] = y_test_pred
df["RFC_Predicted_Churn"] = df["RFC_Predicted_Churn"].astype(int)
output_file = DATA_PATH.joinpath("churn_rfc_pred.csv")
df.to_csv(output_file, index = False)

#Testing
df = pd.read_csv(output_file)
df.head()
df[df["Churn_1"] != df["RFC_Predicted_Churn"]]

Unnamed: 0,Tenure,CityTier,WarehouseToHome,HourSpendOnApp,NumberOfDeviceRegistered,SatisfactionScore,NumberOfAddress,OrderAmountHikeFromlastYear,CouponUsed,OrderCount,...,PreferredPaymentMode_COD,PreferredPaymentMode_Cash on Delivery,PreferredPaymentMode_Credit Card,PreferredPaymentMode_Debit Card,PreferredPaymentMode_E wallet,PreferredPaymentMode_UPI,Churn_1,Gender_Male,Complain_1,RFC_Predicted_Churn
7,9.0,1.0,6.0,3.0,3.0,2.0,3.0,16.0,2.0,2.0,...,0,0,0,0,0,0,1,1,1,0
10,4.0,1.0,18.0,2.0,3.0,3.0,2.0,15.0,9.0,15.0,...,0,1,0,0,0,0,1,0,0,0
111,0.0,1.0,8.0,3.0,3.0,5.0,8.0,20.0,1.0,1.0,...,0,0,0,1,0,0,1,0,0,0
192,15.0,1.0,14.0,2.0,4.0,4.0,3.0,15.0,7.0,7.0,...,0,0,1,0,0,0,1,1,0,0
443,18.0,1.0,8.0,3.0,3.0,5.0,6.0,15.0,1.0,1.0,...,0,0,1,0,0,0,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5199,1.0,2.0,16.0,3.0,4.0,3.0,3.0,16.0,3.0,3.0,...,1,0,0,0,0,0,1,0,0,0
5328,1.0,1.0,9.0,3.0,4.0,1.0,3.0,14.0,1.0,2.0,...,1,0,0,0,0,0,0,0,1,1
5477,1.0,1.0,12.0,3.0,4.0,1.0,2.0,15.0,2.0,2.0,...,0,0,0,1,0,0,0,0,1,1
5498,1.0,3.0,16.0,3.0,4.0,1.0,3.0,14.0,1.0,2.0,...,0,0,1,0,0,0,1,1,0,0
