In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [16]:
cdata = pd.read_csv(r"C:\Users\hp\Documents\forbidden\Hidden_Databases\online retail customer churn data.csv")
cdata

Unnamed: 0,Customer_ID,Age,Gender,Annual_Income,Total_Spend,Years_as_Customer,Num_of_Purchases,Average_Transaction_Amount,Num_of_Returns,Num_of_Support_Contacts,Satisfaction_Score,Last_Purchase_Days_Ago,Email_Opt_In,Promotion_Response,Target_Churn
0,1,62,Other,45.15,5892.58,5,22,453.80,2,0,3,129,True,Responded,True
1,2,65,Male,79.51,9025.47,13,77,22.90,2,2,3,227,False,Responded,False
2,3,18,Male,29.19,618.83,13,71,50.53,5,2,2,283,False,Responded,True
3,4,21,Other,79.63,9110.30,3,33,411.83,5,3,5,226,True,Ignored,True
4,5,21,Other,77.66,5390.88,15,43,101.19,3,0,5,242,False,Unsubscribed,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,54,Male,143.72,1089.09,2,29,77.75,0,3,2,88,True,Ignored,False
996,997,19,Male,164.19,3700.24,9,90,34.45,6,4,4,352,False,Responded,True
997,998,47,Female,113.31,705.85,17,69,187.37,7,3,1,172,True,Unsubscribed,False
998,999,23,Male,72.98,3891.60,7,31,483.80,1,2,5,55,False,Responded,True


In [17]:
def customer_churn_prediction(cdata, target_column='Target_Churn', categorical_features=None): 
    """ Predicts customer churn using a Random Forest Classifier
    Args:
        cdata (pd.DataFrame): The dataset containing customer information.
        target_column (str): The name of the target variable (Target_Churn).
        categorical_features (list): List of categorical column names. If None, it attempts to infer them.
    Returns:
        tuple: (accuracy, classification report, confusion matrix, trained model) """

    X = cdata.drop("Target_Churn", axis=1)
    y = cdata["Target_Churn"]
    
    if categorical_features is None:
        categorical_features = X.select_dtypes(include=['object']).columns.tolist()

    if categorical_features:
            for col in categorical_features:
                le = LabelEncoder()
                X[col] = le.fit_transform(X[col])

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size= 0.8, random_state= 42)

    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    ## Evaluate the model ##
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    matrix = confusion_matrix(y_test, y_pred)

    return accuracy, report, matrix, model

In [18]:
if __name__ == "__main__":
    try:
        df = pd.read_csv(r"C:\Users\hp\Documents\forbidden\Hidden_Databases\online retail customer churn data.csv")

        accuracy, report, matrix, trained_model = customer_churn_prediction(df)

        print("Accuracy:", accuracy)
        print("\nClassification Report:\n", report)
        print("\nConfusion Matrix:\n", matrix)
    except Exception as e:
            print(f"An error occurred: {e}")

Accuracy: 0.5

Classification Report:
               precision    recall  f1-score   support

       False       0.46      0.33      0.38        94
        True       0.52      0.65      0.58       106

    accuracy                           0.50       200
   macro avg       0.49      0.49      0.48       200
weighted avg       0.49      0.50      0.49       200


Confusion Matrix:
 [[31 63]
 [37 69]]


## NOTES ##

- use of __name__ == "__main__" ,

    It's often used to include test code or example usage within a module. When the module is imported, the test code is not executed, 
    but when the module is run directly, the test code is executed.

    The if __name__ == "__main__": block acts as a gatekeeper.
    When you run the module directly, the gate is open, and the example/test code runs.
    When you import the module, the gate is closed, and the example/test code is skipped.