## Import Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import joblib

In [2]:
SEED = 42

## Split data

In [3]:
df = pd.read_csv('Data/ready_churm.csv')

In [4]:
x_train, x_test, y_train, y_test = train_test_split(df.drop(columns=['churn']), df['churn'], test_size=0.2, random_state=SEED)

In [5]:
scaler = StandardScaler()
columns_scale = ['subscription_age', 'bill_avg', 'service_failure_count', 'download_avg', 'upload_avg']
x_train[columns_scale] = scaler.fit_transform(x_train[columns_scale])
x_test[columns_scale] = scaler.transform(x_test[columns_scale])

## train

In [6]:
model_dict = {
    'LogisticRegression': LogisticRegression(random_state=SEED),
    'RandomForest': RandomForestClassifier(random_state=SEED),
    'SVM': SVC(random_state=SEED)
}
param_grid = {
    'LogisticRegression': {'C': [0.01, 0.1, 1, 10]},
    'RandomForest': {'n_estimators': [50, 100, 200]},
    'SVM': {'C': [0.1, 1, 10]}
}

In [7]:
for name, model in model_dict.items():
    print(f"Train model: {name}")

    grid = GridSearchCV(model, param_grid[name], cv=5, scoring='accuracy')
    grid.fit(x_train, y_train)

    best_model = grid.best_estimator_

    y_pred = best_model.predict(x_test)

    print(f'Best parametr: {grid.best_params_}')
    print(classification_report(y_true=y_test, y_pred=y_pred))
    joblib.dump(best_model, f'Data\\{name}_best_model.pkl')

Train model: LogisticRegression
Best parametr: {'C': 0.1}
              precision    recall  f1-score   support

           0       0.73      0.68      0.70      6553
           1       0.75      0.79      0.77      7902

    accuracy                           0.74     14455
   macro avg       0.74      0.74      0.74     14455
weighted avg       0.74      0.74      0.74     14455

Train model: RandomForest
Best parametr: {'n_estimators': 200}
              precision    recall  f1-score   support

           0       0.78      0.83      0.80      6553
           1       0.85      0.80      0.83      7902

    accuracy                           0.81     14455
   macro avg       0.81      0.82      0.81     14455
weighted avg       0.82      0.81      0.81     14455

Train model: SVM
Best parametr: {'C': 10}
              precision    recall  f1-score   support

           0       0.73      0.84      0.78      6553
           1       0.85      0.74      0.79      7902

    accuracy       

## NN

In [8]:
from keras.models import Sequential
from keras.layers import Dense

In [9]:
model = Sequential(name='ChurnNet')
model.add(Dense(32, activation='relu', input_shape=(x_train.shape[1],)))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.6951 - loss: 0.5735 - val_accuracy: 0.7602 - val_loss: 0.4798
Epoch 2/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7627 - loss: 0.4790 - val_accuracy: 0.7669 - val_loss: 0.4702
Epoch 3/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7735 - loss: 0.4640 - val_accuracy: 0.7725 - val_loss: 0.4666
Epoch 4/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7730 - loss: 0.4650 - val_accuracy: 0.7741 - val_loss: 0.4630
Epoch 5/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7791 - loss: 0.4572 - val_accuracy: 0.7751 - val_loss: 0.4574
Epoch 6/10
[1m723/723[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7799 - loss: 0.4569 - val_accuracy: 0.7748 - val_loss: 0.4573
Epoch 7/10
[1m723/723[0m 

<keras.src.callbacks.history.History at 0x227dd151b80>

In [11]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Model Name: {model.name}')
y_pred = (model.predict(x_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7816 - loss: 0.4497
Model Name: ChurnNet
[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.74      0.81      0.77      6553
           1       0.83      0.76      0.80      7902

    accuracy                           0.79     14455
   macro avg       0.78      0.79      0.78     14455
weighted avg       0.79      0.79      0.79     14455



In [12]:
model.save('Data\\NN_best_model.keras')

### RandomForestClassifier smaller

In [13]:
random_forest = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [50, 100],        
    'max_depth': [5, 10, 20, None],       
    'min_samples_split': [2, 10, 20],   
    'min_samples_leaf': [1, 5, 10]
}

grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

grid_search.fit(x_train, y_train)

print(f'Best parametr: {grid_search.best_params_}')
print(f'Best accuracy: {grid_search.best_score_:.4f}')

y_pred = grid_search.best_estimator_.predict(x_test)
print(classification_report(y_true=y_test, y_pred=y_pred))

Best parametr: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 20, 'n_estimators': 100}
Best accuracy: 0.8183
              precision    recall  f1-score   support

           0       0.78      0.84      0.81      6553
           1       0.86      0.80      0.83      7902

    accuracy                           0.82     14455
   macro avg       0.82      0.82      0.82     14455
weighted avg       0.82      0.82      0.82     14455



In [14]:
joblib.dump(grid_search, 'Data\\RandomForest_best_model.pkl')

['Data\\RandomForest_best_model.pkl']

In [15]:
joblib.dump(scaler, 'Data\\scaler.pkl')

['Data\\scaler.pkl']

### SCM proba

In [18]:
svc = SVC(C=10, probability=True, random_state=SEED)
svc.fit(x_train, y_train)
joblib.dump(svc, 'Data\\SVM_best_model.pkl')

['Data\\SVM_best_model.pkl']