In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [None]:
df=pd.read_csv('churn.csv')

In [None]:
df

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df=df.drop(columns=['customerID'])

In [None]:
objects_columns=df.select_dtypes(include=['object']).columns

In [None]:
objects_columns

In [None]:
encoders={}
for columns in objects_columns:
  encoders[columns]=LabelEncoder()
  df[columns]=encoders[columns].fit_transform(df[columns])

In [None]:
corr=df.corr()
plt.figure(figsize=(10,10))
sns.heatmap(corr,annot=True)

In [None]:
plt.figure(figsize=(4,3))
sns.boxplot(x='gender', y='tenure', data=df)
plt.title('Gender vs. Tenure')
plt.show()

In [None]:
plt.figure(figsize=(5,3))
sns.histplot(df['MonthlyCharges'],bins=30,kde=True)
plt.title('Histogram of Monthly Charges')
plt.xlabel('Monthly Charges')
plt.ylabel('Frequency')
plt.show()

In [None]:
sns.pairplot(df)

In [None]:
X=df.drop('Churn',axis=1)
Y=df['Churn']

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

In [None]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_train.shape)

In [None]:
X_train

In [None]:
lr=LogisticRegression()
lr.fit(X_train,Y_train)
y_pred_lr=lr.predict(X_test)

In [None]:
accuracy_lr=accuracy_score(Y_test,y_pred_lr)
precision_lr=precision_score(Y_test, y_pred_lr),
recall_lr=recall_score(Y_test, y_pred_lr),
f1_lr=f1_score(Y_test, y_pred_lr),

In [None]:
print('Accuracy:',accuracy_lr)
print('Precision:',precision_lr)
print('Recall:',recall_lr)
print('F1_score:',f1_lr)

In [None]:
from sklearn.metrics import roc_curve, auc
y_pred_lr_prob = lr.predict_proba(X_test)[:, 1]

fpr, tpr, thresholds = roc_curve(Y_test, y_pred_lr_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(5,4))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()


In [None]:
dt=DecisionTreeClassifier()
dt.fit(X_train,Y_train)
y_pred_dt=dt.predict(X_test)

In [None]:
accuracy_dt=accuracy_score(Y_test,y_pred_dt)
precision_dt=precision_score(Y_test, y_pred_dt),
recall_dt=recall_score(Y_test, y_pred_dt),
f1_dt=f1_score(Y_test, y_pred_dt),

In [None]:
print('Accuracy:',accuracy_dt)
print('Precision:',precision_dt)
print('Recall:',recall_dt)
print('F1_score:',f1_dt)

In [None]:
from sklearn.tree import plot_tree
plt.figure(figsize=(30,25))
plot_tree(dt, filled=True, feature_names=X.columns, class_names=['0','1'], rounded=True)
plt.show()

In [None]:
rf=RandomForestClassifier()
rf.fit(X_train,Y_train)
y_pred_rf=rf.predict(X_test)

In [None]:
accuracy_rf=accuracy_score(Y_test,y_pred_rf)
precision_rf=precision_score(Y_test, y_pred_rf),
recall_rf=recall_score(Y_test, y_pred_rf),
f1_rf=f1_score(Y_test, y_pred_rf),

In [None]:
print('Accuracy:',accuracy_rf)
print('Precision:',precision_rf)
print('Recall:',recall_rf)
print('F1_score:',f1_rf)

In [None]:
plt.figure(figsize=(30,25))
plot_tree(rf.estimators_[0], filled=True, feature_names=X.columns, class_names=['0','1'], rounded=True)
plt.show()

In [None]:
svm=SVC()
svm.fit(X_train,Y_train)
y_pred_svm=svm.predict(X_test)

In [None]:
accuracy_svm=accuracy_score(Y_test,y_pred_svm)
precision_svm=precision_score(Y_test, y_pred_svm),
recall_svm=recall_score(Y_test, y_pred_svm),
f1_svm=f1_score(Y_test, y_pred_svm),

In [None]:
print('Accuracy:',accuracy_svm)
print('Precision:',precision_svm)
print('Recall:',recall_svm)
print('F1_score:',f1_svm)

In [None]:
knn=KNeighborsClassifier()
knn.fit(X_train,Y_train)
y_pred_knn=knn.predict(X_test)

In [None]:
accuracy_knn=accuracy_score(Y_test,y_pred_knn)
precision_knn=precision_score(Y_test, y_pred_knn),
recall_knn=recall_score(Y_test, y_pred_knn),
f1_knn=f1_score(Y_test, y_pred_knn),

In [None]:
print('Accuracy:',accuracy_knn)
print('Precision:',precision_knn)
print('Recall:',recall_knn)
print('F1_score:',f1_knn)

In [None]:
GBC=GradientBoostingClassifier()
GBC.fit(X_train,Y_train)
y_pred_GBC=GBC.predict(X_test)

In [None]:
accuracy_GBC=accuracy_score(Y_test,y_pred_GBC)
precision_GBC=precision_score(Y_test, y_pred_GBC),
recall_GBC=recall_score(Y_test, y_pred_GBC),
f1_GBC=f1_score(Y_test, y_pred_GBC),

In [None]:
print('Accuracy:',accuracy_GBC)
print('Precision:',precision_GBC)
print('Recall:',recall_GBC)
print('F1_score:',f1_GBC)

In [None]:
import joblib

joblib.dump(dt, 'dt_model.pkl')
joblib.dump(rf, 'rf_model.pkl')
joblib.dump(GBC, 'gbc_model.pkl')
joblib.dump(svm, 'svm_model.pkl')
joblib.dump(knn, 'knn_model.pkl')
joblib.dump(lr, 'lr_model.pkl')