# Download churn dataset

In [None]:
!kaggle datasets list -s telco-customer-churn

In [None]:
from utils.paths import DATA_RAW_DIR, DATA_INTERIM_DIR, DATA_PROCESSED_DIR, MODELS_DIR

In [None]:
!kaggle datasets download -d blastchar/telco-customer-churn -p {DATA_RAW_DIR} --unzip

# import libraries

In [None]:
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

In [None]:
import tensorflow as tf

In [None]:
import keras
from keras.models import load_model
from keras.callbacks import EarlyStopping

# load dataset

In [None]:
from pathlib import Path

path_churn = DATA_RAW_DIR / "WA_Fn-UseC_-Telco-Customer-Churn.csv"
print(path_churn.exists())

In [None]:
df_churn = pd.read_csv(path_churn, sep=",", encoding="utf-8")
df_churn.head()

# EDA

In [None]:
df_churn.describe(include="all")

In [None]:
# summary with pandas

def summarize_dataframe(df):
    summary = df.describe(include="all").T
    summary['type'] = df.dtypes
    summary['unique_values'] = df.nunique()
    summary["missing_values"] = df.isnull().sum()
    summary['examples'] = df.apply(lambda col: col.dropna().unique()[:3])\

    summary = summary[['type', 'unique_values', 'missing_values', 'examples']]
    return summary



In [None]:
summary = summarize_dataframe(df_churn)
summary

In [None]:
from ydata_profiling import ProfileReport

ProfileReport(df_churn, minimal=True)

In [None]:
df_churn.columns

In [None]:
# remove unnecessary columns customerID and gender
df_churn = df_churn.drop(columns=["customerID", "gender"])

In [None]:
# check missing values?
df_churn.isnull().sum()

In [None]:
# replace yes for 1 and no for 0
df_churn['Churn'] = df_churn['Churn'].replace({'Yes': 1, 'No': 0})

In [None]:
df_churn.Churn

# label encoder

In [None]:
print(df_churn.shape)
print(df_churn.columns)

In [None]:
df_churn.dtypes

In [None]:
# Define categorical columns
categorical_cols = df_churn.select_dtypes(include=['object']).columns.tolist()
categorical_cols

In [None]:
# replace Yes with 1 and No with 0 in categorical columns
df_churn[categorical_cols] = df_churn[categorical_cols].replace({'Yes': 1, 'No': 0})


In [None]:
df_churn.head(15)

In [None]:
df_churn.dtypes

In [None]:
df_churn.head(10)

In [None]:
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df_churn[col] = le.fit_transform(df_churn[col].astype(str))  
    label_encoders[col] = le

In [None]:
joblib.dump(label_encoders, MODELS_DIR / "label_encoders.pkl")

In [None]:
scale_cols = ['tenure','MonthlyCharges','TotalCharges']

scale = MinMaxScaler()
df_churn[scale_cols] = scale.fit_transform(df_churn[scale_cols])

In [None]:
df_churn.head(15)

In [None]:
df_churn.shape

In [None]:
df_churn.dtypes

In [None]:
joblib.dump(scale, MODELS_DIR / "scaler.pkl")

# entrenamiento

In [None]:
# split data
X = df_churn.drop("Churn", axis=1)
y = df_churn["Churn"]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

# neural network

In [None]:
# define columns/feature

num_features = len(X.columns)
num_features

In [None]:
# define sequential model
model_sequential = keras.Sequential(
    [
        keras.layers.Input(shape=(num_features,)),
        keras.layers.Dense(16, activation="relu"),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)

In [None]:
keras.utils.plot_model(model_sequential, show_shapes=True)

## loss function

# run with 50 epochs