### Import Libraries

In [43]:
import pandas as pd

# Logistic regression

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression

# Neural Network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import BatchNormalization

### Load Data

In [6]:
df = pd.read_csv('./Assets/Datasets/Telco_Customer.csv')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 16 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Customer ID                   7043 non-null   int64  
 1   Tenure Months                 7043 non-null   int64  
 2   Location                      7043 non-null   object 
 3   Device Class                  7043 non-null   object 
 4   Games Product                 7043 non-null   object 
 5   Music Product                 7043 non-null   object 
 6   Education Product             7043 non-null   object 
 7   Call Center                   7043 non-null   object 
 8   Video Product                 7043 non-null   object 
 9   Use MyApp                     7043 non-null   object 
 10  Payment Method                7043 non-null   object 
 11  Monthly Purchase (Thou. IDR)  7043 non-null   float64
 12  Churn Label                   7043 non-null   object 
 13  Lon

In [8]:
df.shape

(7043, 16)

In [9]:
df.describe()

Unnamed: 0,Customer ID,Tenure Months,Monthly Purchase (Thou. IDR),Longitude,Latitude,CLTV (Predicted Thou. IDR)
count,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0
mean,3521.0,32.371149,84.1902,107.043249,-6.404182,5720.384481
std,2033.283305,24.559481,39.117061,0.358314,0.322896,1537.974298
min,0.0,0.0,23.725,106.81667,-6.91474,2603.9
25%,1760.5,9.0,46.15,106.81667,-6.91474,4509.7
50%,3521.0,29.0,91.455,106.81667,-6.2,5885.1
75%,5281.5,55.0,116.805,107.60981,-6.2,6994.65
max,7042.0,72.0,154.375,107.60981,-6.2,8450.0


#### Dropping Unnecessary Column

In [10]:
df = df.drop(['Customer ID', 'Longitude', 'Latitude'], axis=1)

In [11]:
df.head(3)

Unnamed: 0,Tenure Months,Location,Device Class,Games Product,Music Product,Education Product,Call Center,Video Product,Use MyApp,Payment Method,Monthly Purchase (Thou. IDR),Churn Label,CLTV (Predicted Thou. IDR)
0,2,Jakarta,Mid End,Yes,Yes,No,No,No,No,Digital Wallet,70.005,Yes,4210.7
1,2,Jakarta,High End,No,No,No,No,No,No,Pulsa,91.91,Yes,3511.3
2,8,Jakarta,High End,No,No,Yes,No,Yes,Yes,Pulsa,129.545,Yes,6983.6


#### Checking the unique values 

In [12]:
columns = ['Location', 'Games Product', 'Music Product', 'Education Product', 'Call Center', 'Video Product', 'Use MyApp', 'Payment Method', 'Churn Label']

for column in columns:
    print(df[column].unique())

['Jakarta' 'Bandung']
['Yes' 'No' 'No internet service']
['Yes' 'No' 'No internet service']
['No' 'Yes' 'No internet service']
['No' 'Yes']
['No' 'Yes' 'No internet service']
['No' 'Yes' 'No internet service']
['Digital Wallet' 'Pulsa' 'Debit' 'Credit']
['Yes' 'No']


In [13]:
columns_replace_val = ['Games Product', 'Music Product', 'Education Product', 'Video Product', 'Use MyApp']

for column in columns_replace_val:
    df[column] = df[column].replace(['No internet service'], 'No')

#### Encoding

In [14]:
columns=['Games Product', 'Music Product', 'Education Product', 'Video Product', 'Use MyApp', 'Call Center', 'Churn Label', 'Location', 'Device Class', 'Payment Method']

le = LabelEncoder()

for column in columns:
    df[column] = le.fit_transform(df[column])

### Feature Selection

In [15]:
X = df.drop(['Churn Label'], axis=1)
y = df['Churn Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Standarization

In [16]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)	

### Hyperparameter Tuning

In [23]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}

### Modelling

In [24]:
model = LogisticRegression(random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_

In [25]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

### Evaluate Model

In [26]:
# accuracy = accuracy_score(y_test, y_pred)
# confusion_matrix = confusion_matrix(y_test, y_pred)
# classification_report = classification_report(y_test, y_pred)

# print("Accuracy: ", accuracy)
# print("Confusion Matrix: \n", confusion_matrix)
# print("Classification Report: \n", classification_report)

accuracy_tuned = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy_tuned)

Accuracy:  0.7835344215755855


## Neural Network

In [56]:
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import RMSprop

model = Sequential([
    Dense(128, activation='relu', kernel_regularizer=l2(0.01), input_dim=X_train.shape[1]),
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(1, activation='sigmoid')
])

model_NN.compile(loss='binary_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])

In [58]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)
model_NN.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2, callbacks=[reduce_lr])

y_pred_nn = (model_NN.predict(X_test) > 0.5).astype(int)
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(accuracy_nn)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.7366926898509581


In [59]:
y_pred_nn = (model_NN.predict(X_test) > 0.5).astype(int)  # Convert probabilities to binary predictions
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {accuracy_nn:.2f}")

Neural Network Accuracy: 0.74
