In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

dataset = pd.read_csv('/kaggle/input/telecom-users-dataset/telecom_users.csv')
dataset.drop(dataset.columns[[0, 1]], axis = 1, inplace = True)
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.isna().sum()

In [None]:
dataset.iloc[dataset['tenure'][dataset['tenure'].apply(lambda x : x == 0) == True].index]['TotalCharges']

In [None]:
for i in dataset['tenure'][dataset['tenure'].apply(lambda x : x == 0) == True].index:
    dataset.at[i, 'TotalCharges'] = 0

In [None]:
dataset.iloc[dataset['tenure'][dataset['tenure'].apply(lambda x : x == 0) == True].index]['TotalCharges']

In [None]:
dataset['TotalCharges'] = dataset['TotalCharges'].apply(lambda x : float(x))
dataset['TotalCharges']

In [None]:
dataset.info()

In [None]:
dataset.describe()

In [None]:
columns = dataset.columns[dataset.dtypes == object]

In [None]:
fig, axes = plt.subplots(8, 2, figsize = (25, 40))
fig.tight_layout(pad = 2) 
for col, ax in zip(columns, axes.flat[:]):
    sns.histplot(data = dataset, x = col, hue = 'Churn', ax = ax)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (30, 10))
for col, ax in zip(dataset.columns[dataset.dtypes != object], axes.flat[:]):
    sns.violinplot(x = dataset[col], ax = ax)

In [None]:
sns.pairplot(dataset, hue = 'Churn')

In [None]:
fig, ax = plt.subplots(figsize = (10, 10))
sns.heatmap(dataset.corr(), annot=True)

In [None]:
dataset.info()

In [None]:
from sklearn.preprocessing import LabelEncoder

def labelEncoder(dataset, col):
    return LabelEncoder().fit_transform(dataset[col])

In [None]:
dataset.dtypes == object

In [None]:
dataset.tail(10)

In [None]:
for col in dataset.columns[dataset.dtypes == object]:
    dataset[col] = labelEncoder(dataset, col)
    
dataset.info()

In [None]:
dataset.tail(10)

In [None]:
sns.kdeplot(data = dataset, x = 'TotalCharges', hue = 'Churn', shade = True)
sns.displot(data = dataset, x = 'MonthlyCharges', kde = True, hue = 'Churn')
sns.displot(data = dataset, x = 'tenure', kde = True, hue = 'Churn')

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(dataset.iloc[ : , : -1], dataset.iloc[ : , -1], train_size = 0.70)
x_train.shape, x_test.shape
x_train.head()
x_test.head()

In [None]:
y_train.unique()

In [None]:
y_test.unique()

In [None]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)
x_train[0]

In [None]:
y_test[:2]

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import NuSVC
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn import neighbors
import xgboost as xgb
from sklearn.metrics import r2_score, accuracy_score

models = {
    'logistic': LogisticRegression(max_iter = 1000),
    'decisionTree': DecisionTreeClassifier(max_depth = 5),
    'randomForest': RandomForestClassifier(n_estimators = 150),
    'adaBoost': AdaBoostClassifier(DecisionTreeClassifier(max_depth = 1)),
    'bagging': BaggingClassifier(),
    'xTree': ExtraTreesClassifier(),
    'gradienBoosting': GradientBoostingClassifier(),
    'linearSvm': LinearSVC(),
    'nuSvm': NuSVC(),
    'svm': SVC(),
    'naiveBayes': GaussianNB(),
    'berunoulliNB': BernoulliNB(),
    'xgboost': xgb.XGBClassifier(),
    'Knearest': neighbors.KNeighborsClassifier(),
}

accuracy_scores = []
predicted = []
for i in models: 
    models[i].fit(x_train, y_train)
    y_pred = models[i].predict(x_test)
    accuracy_scores.append(int(accuracy_score(y_pred, y_test) * 100))
    predicted.append(y_pred)

for j, k in zip(accuracy_scores, models):
    print (' \n ', k, ' accuracy : ', j, ' %  ')

In [None]:
y_pred

In [None]:
plt.figure(figsize = (25, 8))
sns.barplot(x = list(models.keys()), y = accuracy_scores)
max(accuracy_scores)

In [None]:
from sklearn.metrics import classification_report

for i, j in zip(list(models.keys()), predicted):
    print (' \n \n ', i, ' : \n \n', classification_report(j, y_test))