### Importing Libraries

In [109]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

#### Reading csv

In [110]:
df=pd.read_csv("tel_churn.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,0
1,1,0,56.95,1889.5,0,0,1,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,0,53.85,108.15,1,0,1,1,0,1,...,0,0,0,1,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,0,1,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,0


In [111]:
df=df.drop('Unnamed: 0',axis=1)

In [112]:
x=df.drop('Churn',axis=1)
x

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,1,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.50,0,1,1,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,0,1,1,0,1,0,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.30,1840.75,0,1,1,0,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,0,70.70,151.65,1,0,1,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,0,84.80,1990.50,0,1,0,1,0,1,0,...,0,0,0,1,0,1,0,0,0,0
7028,0,103.20,7362.90,1,0,0,1,0,1,0,...,0,1,0,0,0,0,0,0,0,1
7029,0,29.60,346.45,1,0,0,1,0,1,1,...,0,0,1,0,1,0,0,0,0,0
7030,1,74.40,306.60,0,1,0,1,1,0,0,...,0,0,0,1,1,0,0,0,0,0


In [113]:
y=df['Churn']
y

0       0
1       0
2       1
3       0
4       1
       ..
7027    0
7028    0
7029    0
7030    1
7031    0
Name: Churn, Length: 7032, dtype: int64

##### Train Test Split

In [114]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

#### Decision Tree Classifier

In [115]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [116]:
model_dt.fit(x_train,y_train)

In [117]:
y_pred=model_dt.predict(x_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [118]:
model_dt.score(x_test,y_test)

0.7953091684434968

In [119]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.91      0.87      1026
           1       0.67      0.48      0.56       381

    accuracy                           0.80      1407
   macro avg       0.75      0.70      0.71      1407
weighted avg       0.78      0.80      0.78      1407



###### As you can see that the accuracy is quite low, and as it's an imbalanced dataset, we shouldn't consider Accuracy as our metrics to measure the model, as Accuracy is cursed in imbalanced datasets.

###### Hence, we need to check recall, precision & f1 score for the minority class, and it's quite evident that the precision, recall & f1 score is too low for Class 1, i.e. churned customers.

###### Hence, moving ahead to call SMOTEENN (UpSampling + ENN)

In [120]:
from imblearn.combine import SMOTEENN

# Assuming 'x' and 'y' are your features and target
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x, y)


In [121]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [122]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [123]:
model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)
model_score_r = model_dt_smote.score(xr_test, yr_test)
print(model_score_r)
print(metrics.classification_report(yr_test, yr_predict))


0.9387755102040817
              precision    recall  f1-score   support

           0       0.94      0.93      0.93       534
           1       0.94      0.95      0.94       642

    accuracy                           0.94      1176
   macro avg       0.94      0.94      0.94      1176
weighted avg       0.94      0.94      0.94      1176



In [124]:
print(metrics.confusion_matrix(yr_test, yr_predict))

[[494  40]
 [ 32 610]]


###### Now we can see quite better results, i.e. Accuracy: 92 %, and a very good recall, precision & f1 score for minority class.

###### Let's try with some other classifier.

#### Random Forest Classifier

In [125]:
from sklearn.ensemble import RandomForestClassifier

In [126]:
model_rf=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [127]:
model_rf.fit(x_train,y_train)

In [128]:
y_pred=model_rf.predict(x_test)

In [129]:
model_rf.score(x_test,y_test)

0.7945984363894811

In [130]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.81      0.93      0.87      1026
           1       0.70      0.43      0.53       381

    accuracy                           0.79      1407
   macro avg       0.76      0.68      0.70      1407
weighted avg       0.78      0.79      0.78      1407



In [134]:
from imblearn.combine import SMOTEENN

# Assuming 'x' and 'y' are your features and target
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x, y)


In [137]:
from sklearn.model_selection import train_test_split

xr_train1, xr_test1, yr_train1, yr_test1 = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)


In [138]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [139]:
model_rf_smote.fit(xr_train1,yr_train1)

In [140]:
yr_predict1 = model_rf_smote.predict(xr_test1)

In [141]:
model_score_r1 = model_rf_smote.score(xr_test1, yr_test1)

In [142]:
print(model_score_r1)
print(metrics.classification_report(yr_test1, yr_predict1))

0.9411764705882353
              precision    recall  f1-score   support

           0       0.97      0.90      0.93       533
           1       0.92      0.98      0.95       640

    accuracy                           0.94      1173
   macro avg       0.95      0.94      0.94      1173
weighted avg       0.94      0.94      0.94      1173



In [143]:
print(metrics.confusion_matrix(yr_test1, yr_predict1))

[[479  54]
 [ 15 625]]


###### With RF Classifier, also we are able to get quite good results, infact better than Decision Tree.

###### We can now further go ahead and create multiple classifiers to see how the model performance is, but that's not covered here, so you can do it by yourself :)

#### Performing PCA

In [144]:
# Applying PCA
from sklearn.decomposition import PCA
pca = PCA(0.9)
xr_train_pca = pca.fit_transform(xr_train1)
xr_test_pca = pca.transform(xr_test1)
explained_variance = pca.explained_variance_ratio_

In [145]:
model=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [146]:
model.fit(xr_train_pca,yr_train1)

In [147]:
yr_predict_pca = model.predict(xr_test_pca)

In [148]:
model_score_r_pca = model.score(xr_test_pca, yr_test1)

In [149]:
print(model_score_r_pca)
print(metrics.classification_report(yr_test1, yr_predict_pca))

0.7382779198635976
              precision    recall  f1-score   support

           0       0.76      0.63      0.69       533
           1       0.73      0.83      0.78       640

    accuracy                           0.74      1173
   macro avg       0.74      0.73      0.73      1173
weighted avg       0.74      0.74      0.73      1173



##### With PCA, we couldn't see any better results, hence let's finalise the model which was created by RF Classifier, and save the model so that we can use it in a later stage :)

#### Pickling the model

In [150]:
import pickle

In [156]:
filename = 'modelling.sav'

In [166]:
import os
import pickle

# Use current working directory as the root (usually the project root in notebooks)
ROOT_DIR = os.getcwd()

# Target models/ directory inside root
MODEL_DIR = os.path.join(ROOT_DIR, "models")
os.makedirs(MODEL_DIR, exist_ok=True)

# Define the full path to save the model
filename = os.path.join(MODEL_DIR, "modelling.sav")

# Save the model
pickle.dump(model_rf_smote, open(filename, 'wb'))

print(f"✅ Model saved to: {filename}")


✅ Model saved to: C:\Users\asus\OneDrive\Desktop\Machine learning\ML projects\Telcom_performence\Analysis\models\modelling.sav


In [162]:
load_model = pickle.load(open(filename, 'rb'))

In [163]:
model_score_r1 = load_model.score(xr_test1, yr_test1)

In [164]:
model_score_r1

0.9411764705882353

##### Our final model i.e. RF Classifier with SMOTEENN, is now ready and dumped in model.sav, which we will use and prepare API's so that we can access our model from UI.

In [170]:
import pickle

# Full path to your feature columns file
feature_file = r"C:\\Users\\asus\\OneDrive\\Desktop\\Machine learning\\ML projects\\Telcom_performence\\models\\model.sav"

# Load the list of feature columns
with open(feature_file, "rb") as f:
    feature_columns = pickle.load(f)

print("✅ Feature columns loaded successfully:")
print(feature_columns)


✅ Feature columns loaded successfully:
RandomForestClassifier(max_depth=6, min_samples_leaf=8, random_state=100)


In [174]:
import pickle

# Assuming `model_rf_smote` is your trained model and `feature_columns` contains your feature column names
model_and_features = {
    'model': model_rf_smote,
    'feature_columns': feature_columns
}

# Save the dictionary to a file named model.sav
filename = 'model.sav'
with open(filename, 'wb') as file:
    pickle.dump(model_and_features, file)


In [175]:
# Load the model and feature columns from the model.sav file
with open('model.sav', 'rb') as file:
    loaded_data = pickle.load(file)

# Check if loaded data is a dictionary
print(type(loaded_data))  # This should print <class 'dict'>

# Extract the model and feature columns
if isinstance(loaded_data, dict):
    model = loaded_data.get('model')  # Access the model
    feature_columns = loaded_data.get('feature_columns')  # Access the feature columns
    print("Model and features loaded successfully!")
else:
    print("Loaded data is not a dictionary. Check how the model was saved.")


<class 'dict'>
Model and features loaded successfully!


In [177]:
import pickle

# Load the model and feature columns from the model.sav file
with open('model.sav', 'rb') as file:
    loaded_data = pickle.load(file)

# Check if loaded data is a dictionary
if isinstance(loaded_data, dict):
    model = loaded_data.get('model')  # Access the model
    feature_columns = loaded_data.get('feature_columns')  # Access the feature columns
    print("Model and features loaded successfully!")
else:
    print("Loaded data is not a dictionary. Check how the model was saved.")


Model and features loaded successfully!
