In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
dataset=pd.read_csv("dataset2.csv")


In [2]:
y=dataset["Churn"]

In [3]:
X=dataset.drop(["Churn"],axis=1)

In [4]:
columns=X.columns

In [5]:
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()
X=scaler.fit_transform(X)

In [6]:
from pandas import DataFrame
X=DataFrame(X,columns=columns)

In [7]:
# from sklearn.decomposition import PCA

# pca=PCA(n_components=10)
# X=pca.fit_transform(X)

In [8]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
import optuna


In [10]:

def objective_rf(trial):
    # Define the hyperparameters to optimize
    n_estimators = trial.suggest_int('n_estimators', 100, 1000, step=100)
    max_depth = trial.suggest_int('max_depth', 3, 10)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    

    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
    
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_val)
    
    accuracy = accuracy_score(y_val, y_pred)
    
    return accuracy

def objective_knn(trial):
    # Define the hyperparameters to optimize
    n_neighbors = trial.suggest_int('n_neighbors', 2, 10)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    
    clf = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)
    
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_val)
    
    accuracy = accuracy_score(y_val, y_pred)
    
    return accuracy

def objective_mlp(trial):
    # Define the hyperparameters to optimize
    hidden_layer_sizes = trial.suggest_categorical('hidden_layer_sizes', [(100), (100, 100), (100, 100, 100)])
    activation = trial.suggest_categorical('activation', [ 'tanh', 'relu'])
    
    clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, activation=activation)
    
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_val)
    
    accuracy = accuracy_score(y_val, y_pred)
    
    return accuracy


def objective_svm(trial):
    # Define the hyperparameters to optimize
    C = trial.suggest_loguniform('C', 1e-10, 1e10)
    kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid'])
    
    clf = SVC(C=C, kernel=kernel)
    
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_val)
    
    accuracy = accuracy_score(y_val, y_pred)
    
    return accuracy

In [11]:

studies=[objective_knn,objective_mlp,objective_rf,objective_svm]
best_model_value=0
for study in studies:
    study_name = study.__name__
    print(study_name)
    current_study = optuna.create_study(direction='maximize')
    current_study.optimize(study, n_trials=100)
    
    print("Number of finished trials: ", len(current_study.trials))
    print("Best trial:")
    trial = current_study.best_trial
    print("  Value: ", trial.value)
    if trial.value>best_model_value:
        best_model_value=trial.value
        best_model=study
    
    print("\n\n")
    
    

[I 2024-06-10 08:44:43,372] A new study created in memory with name: no-name-2389a3fa-4084-463b-b343-b0560f206d8a
[I 2024-06-10 08:44:43,391] Trial 0 finished with value: 0.9115442278860569 and parameters: {'n_neighbors': 9, 'weights': 'distance'}. Best is trial 0 with value: 0.9115442278860569.
[I 2024-06-10 08:44:43,418] Trial 1 finished with value: 0.9145427286356822 and parameters: {'n_neighbors': 5, 'weights': 'uniform'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,432] Trial 2 finished with value: 0.8680659670164917 and parameters: {'n_neighbors': 2, 'weights': 'distance'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,447] Trial 3 finished with value: 0.8680659670164917 and parameters: {'n_neighbors': 2, 'weights': 'distance'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,473] Trial 4 finished with value: 0.8950524737631185 and parameters: {'n_neighbors': 3, 'weights': 'uniform'}. Best is trial 1 with 

objective_knn


[I 2024-06-10 08:44:43,594] Trial 10 finished with value: 0.9130434782608695 and parameters: {'n_neighbors': 7, 'weights': 'uniform'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,622] Trial 11 finished with value: 0.9145427286356822 and parameters: {'n_neighbors': 5, 'weights': 'uniform'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,650] Trial 12 finished with value: 0.9145427286356822 and parameters: {'n_neighbors': 5, 'weights': 'uniform'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,679] Trial 13 finished with value: 0.9085457271364318 and parameters: {'n_neighbors': 10, 'weights': 'uniform'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,708] Trial 14 finished with value: 0.9085457271364318 and parameters: {'n_neighbors': 6, 'weights': 'uniform'}. Best is trial 1 with value: 0.9145427286356822.
[I 2024-06-10 08:44:43,737] Trial 15 finished with value: 0.9145427286356822 and par

Number of finished trials:  100
Best trial:
  Value:  0.9145427286356822



objective_mlp


[I 2024-06-10 08:44:49,326] Trial 0 finished with value: 0.9160419790104948 and parameters: {'hidden_layer_sizes': (100, 100), 'activation': 'relu'}. Best is trial 0 with value: 0.9160419790104948.
[I 2024-06-10 08:44:52,573] Trial 1 finished with value: 0.9190404797601199 and parameters: {'hidden_layer_sizes': (100, 100), 'activation': 'relu'}. Best is trial 1 with value: 0.9190404797601199.
[I 2024-06-10 08:44:55,798] Trial 2 finished with value: 0.9085457271364318 and parameters: {'hidden_layer_sizes': (100, 100), 'activation': 'relu'}. Best is trial 1 with value: 0.9190404797601199.
[I 2024-06-10 08:45:00,714] Trial 3 finished with value: 0.904047976011994 and parameters: {'hidden_layer_sizes': (100, 100, 100), 'activation': 'tanh'}. Best is trial 1 with value: 0.9190404797601199.
[I 2024-06-10 08:45:04,016] Trial 4 finished with value: 0.9130434782608695 and parameters: {'hidden_layer_sizes': (100, 100), 'activation': 'relu'}. Best is trial 1 with value: 0.9190404797601199.
[I 202

Number of finished trials:  100
Best trial:
  Value:  0.9355322338830585



objective_rf


[I 2024-06-10 08:47:49,416] Trial 0 finished with value: 0.9460269865067467 and parameters: {'n_estimators': 400, 'max_depth': 8, 'min_samples_split': 5}. Best is trial 0 with value: 0.9460269865067467.
[I 2024-06-10 08:47:50,602] Trial 1 finished with value: 0.9430284857571214 and parameters: {'n_estimators': 500, 'max_depth': 9, 'min_samples_split': 4}. Best is trial 0 with value: 0.9460269865067467.
[I 2024-06-10 08:47:52,412] Trial 2 finished with value: 0.9445277361319341 and parameters: {'n_estimators': 900, 'max_depth': 7, 'min_samples_split': 3}. Best is trial 0 with value: 0.9460269865067467.
[I 2024-06-10 08:47:53,865] Trial 3 finished with value: 0.9430284857571214 and parameters: {'n_estimators': 800, 'max_depth': 6, 'min_samples_split': 6}. Best is trial 0 with value: 0.9460269865067467.
[I 2024-06-10 08:47:54,609] Trial 4 finished with value: 0.9010494752623688 and parameters: {'n_estimators': 600, 'max_depth': 3, 'min_samples_split': 3}. Best is trial 0 with value: 0.946

Number of finished trials:  100
Best trial:
  Value:  0.9490254872563718



objective_svm
