In [6]:
import pandas as pd 
import numpy as np 
from sklearn.metrics import (accuracy_score, recall_score, precision_score, f1_score)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBClassifier
import warnings

In [7]:
df=pd.read_csv('cleaned_data.csv')

In [8]:
df

Unnamed: 0,CustomerID,Gender,Tenure,ContractType,InternetService,TechSupport,Churn,AgeGroup,TotalChargesPerMonth
0,1,1,-0.792930,0,0,1,1,1,0.599062
1,2,1,-1.004760,0,0,1,1,1,-2.356286
2,3,2,-0.898845,0,0,0,1,2,-0.222481
3,4,2,-0.581101,1,1,1,1,2,1.067034
4,5,1,0.689875,0,0,0,1,1,-0.047870
...,...,...,...,...,...,...,...,...,...
995,996,1,1.166491,0,0,1,1,1,-1.113936
996,997,1,-0.528144,0,0,0,1,2,0.350859
997,998,2,-0.210400,0,0,1,1,2,1.380799
998,999,1,2.596340,1,1,0,1,1,-0.159594


In [9]:
X=df.drop(columns='Churn')

In [10]:
y=df['Churn']

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape,X_test.shape

((800, 8), (200, 8))

In [15]:
def evaluate_model(true,predicted):
    
    accuracy  = accuracy_score(true, predicted)
    recall    = recall_score(true, predicted, average='weighted')
    precision = precision_score(true, predicted, average='weighted')
    f1s       = f1_score(true, predicted, average='weighted')

    return accuracy,recall,precision,f1s

In [16]:
models={
    'K-Neigbour Classifier': KNeighborsClassifier(),
    'decision Tree Classifier': DecisionTreeClassifier(),
    'Random Forest Classifier': RandomForestClassifier(),
    'Xg boost Classifier': XGBClassifier(),
    'Catboost Classifier': CatBoostClassifier(verbose=False)
}

model_list=[]
clas_report=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train, y_train)
    
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    train_accuracy, train_recall, train_precision, train_f1s= evaluate_model(y_train, y_train_pred)
    test_accuracy, test_recall, test_precision, test_f1s= evaluate_model(y_test, y_test_pred)
    
    
    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])
    
    print('model performance for training set')
    print('- ACCURACY: {:.2%}'.format(train_accuracy))
    print('- RECALL: {:.2%}'.format(train_recall))
    print('- PRECISION: {:.2%}'.format(train_precision))
    print('- F1: {:.2%}'.format(train_f1s))
    
    print('--------------------------------------------------')
    
    print('model performance for testing set')
    print('- ACCURACY: {:.2%}'.format(test_accuracy))
    print('- RECALL: {:.2%}'.format(test_recall))
    print('- PRECISION: {:.2%}'.format(test_precision))
    print('- F1: {:.2%}'.format(test_f1s))
    clas_report.append(test_accuracy)
    
    print('='*35)
    print('\n')

NameError: name 'CatBoostClassifier' is not defined

In [17]:
models = {
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),  # Avoid warnings
}

model_list = []
class_report = []

for model_name, model in models.items():
    # Fit the model
    model.fit(X_train, y_train)

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Evaluate the model
    train_accuracy, train_recall, train_precision, train_f1s = evaluate_model(y_train, y_train_pred)
    test_accuracy, test_recall, test_precision, test_f1s = evaluate_model(y_test, y_test_pred)

    # Print training set performance
    print(f"Model: {model_name}")
    print("Model performance for training set:")
    print(f"- ACCURACY: {train_accuracy:.2%}")
    print(f"- RECALL: {train_recall:.2%}")
    print(f"- PRECISION: {train_precision:.2%}")
    print(f"- F1: {train_f1s:.2%}")

    print("--------------------------------------------------")

    # Print testing set performance
    print("Model performance for testing set:")
    print(f"- ACCURACY: {test_accuracy:.2%}")
    print(f"- RECALL: {test_recall:.2%}")
    print(f"- PRECISION: {test_precision:.2%}")
    print(f"- F1: {test_f1s:.2%}")
    
    class_report.append((model_name, test_accuracy))  # Store model name and accuracy for later use

    print('=' * 35)
    print('\n')

Model: K-Nearest Neighbors
Model performance for training set:
- ACCURACY: 88.62%
- RECALL: 88.62%
- PRECISION: 86.00%
- F1: 84.44%
--------------------------------------------------
Model performance for testing set:
- ACCURACY: 88.00%
- RECALL: 88.00%
- PRECISION: 78.27%
- F1: 82.85%


Model: Decision Tree
Model performance for training set:
- ACCURACY: 100.00%
- RECALL: 100.00%
- PRECISION: 100.00%
- F1: 100.00%
--------------------------------------------------
Model performance for testing set:
- ACCURACY: 100.00%
- RECALL: 100.00%
- PRECISION: 100.00%
- F1: 100.00%


Model: Random Forest
Model performance for training set:
- ACCURACY: 100.00%
- RECALL: 100.00%
- PRECISION: 100.00%
- F1: 100.00%
--------------------------------------------------
Model performance for testing set:
- ACCURACY: 100.00%
- RECALL: 100.00%
- PRECISION: 100.00%
- F1: 100.00%


Model: XGBoost
Model performance for training set:
- ACCURACY: 100.00%
- RECALL: 100.00%
- PRECISION: 100.00%
- F1: 100.00%
-----

Parameters: { "use_label_encoder" } are not used.

