In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
from imblearn.over_sampling import SMOTE 
import json
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
import sklearn.linear_model as linearModels
import sklearn.ensemble as ensembleModels
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, roc_auc_score, classification_report

In [None]:
df = pd.read_csv('.data/telco_customer_churn.csv', index_col='customerID')
df.head().T

# EDA

# Pipelines

In [None]:
target_col_name = 'Churn'
X = df.loc[:, [x for x in df.columns if x != target_col_name]]

y = df.loc[:, [target_col_name]]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state=444)

In [None]:
columns = dict(
    numeric=['tenure', 'MonthlyCharges', 'TotalCharges'],
    categoricals=['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling',
                  'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup','DeviceProtection', 
                  'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaymentMethod',]
)

## Numerics

In [None]:
numeric_transformer = StandardScaler()
X_numerics = numeric_transformer.fit_transform(X_train[columns['numeric']].replace(' ', 0))

## Categoricals

In [None]:
categorical_encoder = OneHotEncoder(sparse_output=False)
X_categoricals = categorical_encoder.fit_transform(X_train[columns['categoricals']])

## Join

In [None]:
X_train_trans = np.concatenate((X_numerics, X_categoricals), axis=1)

## Target

In [None]:
target_encoder = OneHotEncoder(categories=[['No', 'Yes']], drop='first', sparse_output=False)
y_train_trans = target_encoder.fit_transform(y_train).ravel()
y_train_trans

# Baseline

In [None]:
model = ensembleModels.RandomForestClassifier(random_state=11)
model.fit(X_train_trans, y_train_trans)

In [None]:
X_numerics = numeric_transformer.transform(X_test[columns['numeric']].replace(' ', 0))
X_categoricals = categorical_encoder.transform(X_test[columns['categoricals']])
X_test_trans = np.concatenate((X_numerics, X_categoricals), axis=1)

y_test_trans = target_encoder.fit_transform(y_test).ravel()

In [None]:
predictions = model.predict(X_test_trans)
accuracy = accuracy_score(y_test_trans, predictions)
precision = precision_score(y_test_trans, predictions)
recall = recall_score(y_test_trans, predictions)

print(f"""\
Accuracy:  {accuracy:0.1%}
Precision: {precision:0.1%}
Recall:    {recall:0.1%}
""")