# Import libraries

In [None]:
%matplotlib inline

import pandas as pd
import numpy as np

# Load data

https://www.kaggle.com/blastchar/telco-customer-churn#WA_Fn-UseC_-Telco-Customer-Churn.csv

In [None]:
df = pd.read_csv('Data/telco_customer_churn.csv', sep=',')

In [None]:
df.columns

In [None]:
df.shape

# Data preprocessing / Feature engineering

#### Select variables

### Initial look at the data

In [None]:
df.head(5)

In [None]:
# Remove customerID and Churn

# selected_features = ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
#                      'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
#                      'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
#                      'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
#                      'MonthlyCharges', 'TotalCharges']
# 
# df[selected_features].head(5)

df.drop(columns=['customerID', 'Churn']).head(5)

In [None]:
df_new = df.drop(columns=['customerID', 'Churn'])

In [None]:
df_new.shape

#### One-hot encoding

In [None]:
categorical_columns = ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService',
                       'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
                       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
                       'Contract', 'PaperlessBilling', 'PaymentMethod']

df_new = pd.get_dummies(df_new, columns=categorical_columns)

# df_new = df_new.drop(columns=categorical_columns)

In [None]:
df_new.shape

In [None]:
df_new.columns

In [None]:
df_new.head(5)

# Split into train and test set

In [None]:
X = df_new.values
y = df['Churn'].values

In [None]:
X

In [None]:
y

In [None]:
y = np.where(y=='Yes', 1.0, 0.0)

In [None]:
y

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
print(X.shape)
print(X_train.shape)
print(X_test.shape)

# Build and train model

In [None]:
from keras import backend, Sequential, regularizers, optimizers, models
from keras.layers import InputLayer, Dense

In [None]:
# Clear tensorflow session
backend.clear_session()

In [None]:
# Build model
model = Sequential()

model.add(Dense(
    32,
    input_shape=(X_train.shape[1],),
    activation='tanh',
    kernel_regularizer=regularizers.l2(0.01),
    bias_regularizer=regularizers.l2(0.01)
))

model.add(Dense(
    1,
    activation='sigmoid',
    kernel_regularizer=regularizers.l2(0.01),
    bias_regularizer=regularizers.l2(0.01)
))

In [None]:
model.summary()

In [None]:
# Define loss function
loss = 'binary_crossentropy'

In [None]:
# Define metrics
metrics = ['binary_accuracy']

In [None]:
# Define optimizer
optimizer = optimizers.Adam(lr=1e-3, decay=1e-2)

In [None]:
# Compile model
model.compile(
    loss=loss,
    optimizer=optimizer,
    sample_weight_mode=None,
    metrics=metrics
)

In [None]:
# Train model
model.fit(
    X_train,
    y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.2,
    verbose=1
)

# Make predictions

In [None]:
df_new.columns

In [None]:
X_test[0]

In [None]:
test_input = np.array([2.0, 24.4, 1548.65, 
              0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0,
              0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0,
              1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0,
              1.0, 0.0, 0.0, 0.0])

In [None]:
test_input = np.expand_dims(test_input, axis=0)

In [None]:
test_input.shape

In [None]:
prediction = model.predict(test_input)
print(prediction)

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred

# Performance metrics

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [None]:
print(y_test[:12])
print(y_pred.squeeze()[:12])

In [None]:
y_test = np.where(y_test==1.0, 'Yes', 'No')
y_pred = np.where(y_pred>0.5, 'Yes', 'No')

In [None]:
print(y_test[:12])
print(y_pred.squeeze()[:12])

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label='Yes')
recall = recall_score(y_test, y_pred, pos_label='Yes')

print(f'Accuracy: {acc:.4}')
print(f'Precision: {precision:.4}')
print(f'Recall/Sensitivity: {recall:.4}')

# Øvelser - Fine tune model

- Prøv at træne modellen med forskellige hyperparametre, og se hvordan det påvirker performance metrics.
- Prøv at fjerne kolonner i træningsdata og træn modellen igen. Se hvordan det påvirker performance metrics.
- Prøv at tilføje flere forskellige lag til det neurale netværk. Se hvordan det påvirker performance metrics.
- Prøv at brug en anden optimzer til træningen af det neurale netværk. Se hvordan det påvirker performance metrics.
- Prøv at lade modellen træne over længere ved at sætte ```epochs``` op. Se hvordan det påvirker performance metrics.
- Test forskellige kombinationer af de overstående punkter, og se hvor god performance man kan få.

# Exercise - Fine tune model

- Try training the model with different hyperparameters, and see how it affects the performance metrics.
- Try removing columns from the training data and retrain the model. See how it affects the performance metrics.
- Try adding more layers and different layers to the neural network. See how it affects the performance metrics.
- Try using another optimizer for training the neural netowrk. See how it affects the performance metrics.
- Try letting the model train for more time by increasing the number ```epochs```. See how it affects the performance metrics.
- Test different combinations of the methods in the previous bullets, and see how high performance you can get.