In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score
from tensorflow.keras import regularizers

In [7]:
# Dataset
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [8]:
# One-hot encode the categorical variables
categorical_features = ['AI_Response_Time', 'Change_in_Usage_Patterns', 'AI_Interaction_Level']
one_hot_encoder = ColumnTransformer(transformers=[
    ('one_hot', OneHotEncoder(drop='first'), categorical_features)],
    remainder='passthrough')

X_train = one_hot_encoder.fit_transform(train_data.drop(columns=['ID', 'Customer_Churn']))
y_train = train_data['Customer_Churn']
X_test = one_hot_encoder.transform(test_data.drop(columns=['ID', 'Customer_Churn']))
y_test = test_data['Customer_Churn']

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the neural network
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5491 - loss: 0.6849 - val_accuracy: 0.5850 - val_loss: 0.6508
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6030 - loss: 0.6463 - val_accuracy: 0.5920 - val_loss: 0.6480
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6008 - loss: 0.6487 - val_accuracy: 0.5960 - val_loss: 0.6470
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6046 - loss: 0.6459 - val_accuracy: 0.5900 - val_loss: 0.6478
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6198 - loss: 0.6374 - val_accuracy: 0.6010 - val_loss: 0.6526
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6196 - loss: 0.6391 - val_accuracy: 0.6000 - val_loss: 0.6469
Epoch 7/10
[1m125/125[0m [32m━━━━━━━

In [9]:


model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=regularizers.l2(0.01)),
    layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5575 - loss: 1.2327 - val_accuracy: 0.5860 - val_loss: 0.9717
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5996 - loss: 0.9148 - val_accuracy: 0.5880 - val_loss: 0.8033
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6020 - loss: 0.7868 - val_accuracy: 0.5930 - val_loss: 0.7238
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6115 - loss: 0.7147 - val_accuracy: 0.6030 - val_loss: 0.6911
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6011 - loss: 0.6889 - val_accuracy: 0.6000 - val_loss: 0.6706
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5981 - loss: 0.6742 - val_accuracy: 0.6000 - val_loss: 0.6637
Epoch 7/10
[1m125/125[0m 

In [10]:

# Predict the labels for the test set
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary labels

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy}')


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Test accuracy: 0.58


## Reduce model with neural net

In [14]:
# Dataset
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [15]:
# Drop the unwanted features
train_data_reduced = train_data.drop(columns=['AI_Personalization_Effectiveness', 'AI_Response_Time'])
test_data_reduced = test_data.drop(columns=['AI_Personalization_Effectiveness', 'AI_Response_Time'])

# One-hot encode the categorical variables
categorical_features = ['Change_in_Usage_Patterns', 'AI_Interaction_Level']  # Update this list as needed
one_hot_encoder = ColumnTransformer(transformers=[
    ('one_hot', OneHotEncoder(drop='first'), categorical_features)],
    remainder='passthrough')

X_train_reduced = one_hot_encoder.fit_transform(train_data_reduced.drop(columns=['ID', 'Customer_Churn']))
y_train = train_data_reduced['Customer_Churn']
X_test_reduced = one_hot_encoder.transform(test_data_reduced.drop(columns=['ID', 'Customer_Churn']))
y_test = test_data_reduced['Customer_Churn']

# Standardize the data
scaler = StandardScaler()
X_train_reduced = scaler.fit_transform(X_train_reduced)
X_test_reduced = scaler.transform(X_test_reduced)

In [16]:
# Define the neural network
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train_reduced.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train_reduced, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test_reduced, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5702 - loss: 0.6752 - val_accuracy: 0.5900 - val_loss: 0.6543
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5987 - loss: 0.6558 - val_accuracy: 0.6070 - val_loss: 0.6422
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6085 - loss: 0.6462 - val_accuracy: 0.6100 - val_loss: 0.6412
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6001 - loss: 0.6464 - val_accuracy: 0.6090 - val_loss: 0.6427
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6243 - loss: 0.6428 - val_accuracy: 0.6010 - val_loss: 0.6452
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6371 - loss: 0.6375 - val_accuracy: 0.6100 - val_loss: 0.6419
Epoch 7/10
[1m125/125[0m [32m━━━━━━━

In [17]:
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train_reduced.shape[1],)),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

model.fit(X_train_reduced, y_train, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping])


Epoch 1/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.5445 - loss: 0.7107 - val_accuracy: 0.5890 - val_loss: 0.6618
Epoch 2/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5709 - loss: 0.6786 - val_accuracy: 0.5890 - val_loss: 0.6567
Epoch 3/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5715 - loss: 0.6720 - val_accuracy: 0.5930 - val_loss: 0.6497
Epoch 4/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5834 - loss: 0.6662 - val_accuracy: 0.6030 - val_loss: 0.6526
Epoch 5/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5815 - loss: 0.6658 - val_accuracy: 0.6010 - val_loss: 0.6464
Epoch 6/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5744 - loss: 0.6646 - val_accuracy: 0.5840 - val_loss: 0.6473
Epoch 7/50
[1m63/63[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7fd44cb07ee0>

In [19]:
# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test_reduced, y_test)
print(f'Test accuracy: {test_acc}')

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5971 - loss: 0.6538
Test accuracy: 0.5935999751091003


In [20]:
from tensorflow.keras.callbacks import EarlyStopping

model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train_reduced.shape[1],),
                 kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5)

model.fit(X_train_reduced, y_train, epochs=50, batch_size=32, validation_split=0.2,
          callbacks=[early_stopping])
# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test_reduced, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.5340 - loss: 1.5516 - val_accuracy: 0.5920 - val_loss: 1.0674
Epoch 2/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5770 - loss: 1.0072 - val_accuracy: 0.5880 - val_loss: 0.8322
Epoch 3/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5840 - loss: 0.8143 - val_accuracy: 0.5900 - val_loss: 0.7361
Epoch 4/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5799 - loss: 0.7368 - val_accuracy: 0.5930 - val_loss: 0.6916
Epoch 5/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5826 - loss: 0.7008 - val_accuracy: 0.5930 - val_loss: 0.6735
Epoch 6/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5782 - loss: 0.6822 - val_accuracy: 0.5890 - val_loss: 0.6660
Epoch 7/50
[1m125/125[0m 

## Hypertuning it

In [21]:
pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Note: you may need to restart the kernel to use updated packages.


In [22]:
import kerastuner as kt

def build_model(hp):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=(X_train_reduced.shape[1],)))
    
    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-512
    hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
    model.add(layers.Dense(units=hp_units, activation='relu'))
    model.add(layers.Dropout(0.5))
    
    # Add more hidden layers with tuned number of units
    for i in range(hp.Int('num_layers', 1, 4)):
        model.add(layers.Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32),
                               activation='relu'))
        model.add(layers.Dropout(0.5))
    
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

tuner = kt.Hyperband(build_model,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(X_train_reduced, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")

# Build the model with the optimal hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_reduced, y_train, epochs=50, validation_split=0.2)


Trial 30 Complete [00h 00m 11s]
val_accuracy: 0.597000002861023

Best val_accuracy So Far: 0.6159999966621399
Total elapsed time: 00h 04m 15s

The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 384 and the optimal learning rate for the optimizer
is 0.001.

Epoch 1/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.5761 - loss: 0.6840 - val_accuracy: 0.6030 - val_loss: 0.6481
Epoch 2/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.5728 - loss: 0.6676 - val_accuracy: 0.6000 - val_loss: 0.6436
Epoch 3/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6041 - loss: 0.6519 - val_accuracy: 0.6000 - val_loss: 0.6431
Epoch 4/50
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.5972 - loss: 0.6593 - val_accuracy: 0.6130 - val_loss: 0.6419
Epoch 5/50
[1m125/125[0m [32m━━━

In [23]:
# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test_reduced, y_test)
print(f'Test accuracy: {test_acc}')

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5566 - loss: 0.6769
Test accuracy: 0.5856000185012817


In [24]:
from sklearn.metrics import accuracy_score

# Predict the labels for the test set
y_pred = model.predict(X_test_reduced)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary labels

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy}')

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Test accuracy: 0.5856
