## Import Libraries

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import joblib

In [17]:
SEED = 42

## Split data

In [18]:
df = pd.read_csv('Data/ready_churm.csv')

In [19]:
x_train, x_test, y_train, y_test = train_test_split(df.drop(columns=['churn']), df['churn'], test_size=0.2, random_state=SEED)

In [20]:
scaler = StandardScaler()
columns_scale = ['subscription_age', 'bill_avg', 'service_failure_count', 'download_avg', 'upload_avg']
x_train[columns_scale] = scaler.fit_transform(x_train[columns_scale])
x_test[columns_scale] = scaler.transform(x_test[columns_scale])

## train

In [21]:
model_dict = {
    'LogisticRegression': LogisticRegression(random_state=SEED),
    'RandomForest': RandomForestClassifier(random_state=SEED),
    'SVM': SVC(random_state=SEED)
}
param_grid = {
    'LogisticRegression': {'C': [0.01, 0.1, 1, 10]},
    'RandomForest': {'n_estimators': [50, 100, 200]},
    'SVM': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
}

In [22]:
for name, model in model_dict.items():
    print(f"Train model: {name}")

    grid = GridSearchCV(model, param_grid[name], cv=5, scoring='accuracy')
    grid.fit(x_train, y_train)

    best_model = grid.best_estimator_

    y_pred = best_model.predict(x_test)

    print(f'Best parametr: {grid.best_params_}')
    print(classification_report(y_true=y_test, y_pred=y_pred))
    joblib.dump(best_model, f'Data\\{name}_best_model.pkl')

Train model: LogisticRegression
Best parametr: {'C': 0.1}
              precision    recall  f1-score   support

           0       0.73      0.68      0.70      6553
           1       0.75      0.79      0.77      7902

    accuracy                           0.74     14455
   macro avg       0.74      0.74      0.74     14455
weighted avg       0.74      0.74      0.74     14455

Train model: RandomForest
Best parametr: {'n_estimators': 200}
              precision    recall  f1-score   support

           0       0.78      0.83      0.80      6553
           1       0.85      0.80      0.83      7902

    accuracy                           0.81     14455
   macro avg       0.81      0.82      0.81     14455
weighted avg       0.82      0.81      0.81     14455

Train model: SVM
Best parametr: {'C': 10, 'kernel': 'rbf'}
              precision    recall  f1-score   support

           0       0.73      0.84      0.78      6553
           1       0.85      0.74      0.79      7902

  

## NN

In [26]:
from keras.models import Sequential
from keras.layers import Dense

In [27]:
model = Sequential(name='ChurnNet')
model.add(Dense(32, activation='relu', input_shape=(x_train.shape[1],)))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [28]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7141 - loss: 0.5612 - val_accuracy: 0.7605 - val_loss: 0.4803
Epoch 2/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7614 - loss: 0.4828 - val_accuracy: 0.7614 - val_loss: 0.4721
Epoch 3/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7675 - loss: 0.4715 - val_accuracy: 0.7670 - val_loss: 0.4671
Epoch 4/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7681 - loss: 0.4682 - val_accuracy: 0.7696 - val_loss: 0.4641
Epoch 5/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7793 - loss: 0.4566 - val_accuracy: 0.7711 - val_loss: 0.4587
Epoch 6/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7774 - loss: 0.4571 - val_accuracy: 0.7726 - val_loss: 0.4582
Epoch 7/10
[1m723/723[0m 

<keras.src.callbacks.history.History at 0x2de0642ce00>

In [32]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Model Name: {model.name}')
y_pred = (model.predict(x_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7839 - loss: 0.4488
Model Name: ChurnNet
[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.73      0.83      0.78      6553
           1       0.84      0.75      0.79      7902

    accuracy                           0.79     14455
   macro avg       0.79      0.79      0.78     14455
weighted avg       0.79      0.79      0.79     14455



In [34]:
model.save('Data\\NN_best_model.keras')