In [27]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pickle

In [12]:
# Load the dataset
dataset = pd.read_csv("cleaned_df_luxottica_churn_updated_0108.csv", index_col=None)

In [13]:
# Create a copy of the dataset for transformation
dataset_transformed = pd.get_dummies(dataset, drop_first=True)

In [14]:
# Separate features and target variable
X = dataset_transformed.drop('Churn_Yes', axis=1)
y = dataset_transformed['Churn_Yes']

In [15]:
# Apply Min-Max scaling to ensure non-negative values for SelectKBest
min_max_scaler = MinMaxScaler()
X_scaled_for_selection = min_max_scaler.fit_transform(X)

In [16]:
# Initialize the SelectKBest with chi2
select_k_best = SelectKBest(score_func=chi2, k=10)
X_selected_for_selection = select_k_best.fit_transform(X_scaled_for_selection, y)

In [17]:
# Get the selected feature indices and names
selected_features_indices = select_k_best.get_support(indices=True)
selected_features = X.columns[selected_features_indices]
print("Selected Feature Indices:\n", selected_features_indices)
print("Selected Features:\n", selected_features)

Selected Feature Indices:
 [   0    3    5    6   10   11   13 3299 3300 3301]
Selected Features:
 Index(['Age', 'Customer_Support_Interactions', 'Customer_Satisfaction',
       'Purchase_Frequency', 'Lifetime_Value', 'Average_Order_Value',
       'Number_of_Product_Categories_Purchased',
       'Loyalty_Program_Participation_Inactive',
       'Engagement_with_Promotions_Low', 'Engagement_with_Promotions_Medium'],
      dtype='object')


In [18]:
# Create the final feature and target datasets with selected features
X_final_selected = X[selected_features]

In [19]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_final_selected, y, test_size=0.25, random_state=0)

In [20]:
# Apply SMote to the training data
smote = SMOTE(random_state=0)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [21]:
# Standardize the features
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)

In [22]:
# Initialize and train the Random Forest classifier
classifier = RandomForestClassifier(random_state=0)
classifier.fit(X_train_resampled, y_train_resampled)

In [23]:
# Predict the test set results
y_pred = classifier.predict(X_test)

In [24]:
# Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [25]:
# Display the results
print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)
print("Classification Report:\n", class_report)

Confusion Matrix:
 [[17143    44]
 [    0  6589]]
Accuracy: 0.9981493943472409
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     17187
           1       0.99      1.00      1.00      6589

    accuracy                           1.00     23776
   macro avg       1.00      1.00      1.00     23776
weighted avg       1.00      1.00      1.00     23776



In [28]:
# Save the trained model to a file
model_filename = "finalized_model_random_forest.sav"
pickle.dump(classifier, open(model_filename, 'wb'))

# Example input for prediction (use scaled values)
example_input_unscaled = [[44, 2, 0.45, 4, 1200, 240.6, 1, 1, 1, 1]]
example_input = scaler.transform(example_input_unscaled)

# Load the saved model and make a prediction
loaded_model = pickle.load(open(model_filename, 'rb'))
prediction_result = loaded_model.predict(example_input)
prediction_result = prediction_result.astype(int)

print("Prediction result:", prediction_result)


Prediction result: [1]


