In [5]:
import ipynb
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.preprocessing import OrdinalEncoder
from numpy import asarray
from statistics import mean

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
from ipynb.fs.full.ParameterTuning import ParameterTuning

In [8]:
preprocessed_df = pd.read_csv("../data/preprocessedMarketingCampaign.csv", index_col=0)

categorical_features = [ 'Education_2n_Cycle','Education_Basic','Education_Graduation','Education_Master','Education_PhD',
                        'AcceptedCmp5_0','AcceptedCmp5_1','AcceptedCmp1_0','AcceptedCmp1_1' ]
target = "Teenhome"

# Convert categorical feature types
preprocessed_df[categorical_features] = preprocessed_df[categorical_features].astype('category') 
preprocessed_df[target] = preprocessed_df[target].astype('category') 

print('Revenue distribution:\n'+ str(preprocessed_df[target].value_counts()))

Revenue distribution:
1    1158
0    1158
Name: Teenhome, dtype: int64


In [9]:
# KNN

grid_params = { 'n_neighbors' : [2,3,4,5,7,9,11,13],
               'weights' : ['uniform','distance'],
               'metric' : ['minkowski','euclidean','manhattan']}

ParameterTuning (preprocessed_df.copy(), target, KNeighborsClassifier(), grid_params)

NameError: name 'oversampled_df' is not defined

In [None]:
# SVM

param_grid = {'kernel': ['rbf', 'linear', 'poly', 'sigmoid']} 

ParameterTuning (preprocessed_df.copy(), target, SVC(), param_grid)

In [None]:
# DT

grid_params = { 'criterion' : ['gini', 'entropy'],
               'max_depth':range(1,10), "min_samples_split":range(1,10),
              'min_samples_leaf':range(1,5)}

ParameterTuning (preprocessed_df.copy(), target, DecisionTreeClassifier(), grid_params)

In [None]:
# RF

param_grid = {
    'bootstrap': [True],
    'max_depth': [10, 80, 90, 100, 110, 200],
    'max_features': [2, 3, 8],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}

ParameterTuning (preprocessed_df.copy(), target, RandomForestClassifier(), param_grid)

In [11]:
# MLP

grid_params = {
    'hidden_layer_sizes': [(10,30,10),(20,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

ParameterTuning (preprocessed_df.copy(), target, MLPClassifier(), grid_params)

Fitting 3 folds for each of 32 candidates, totalling 96 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done  96 out of  96 | elapsed:  5.1min finished


{'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 30, 10), 'learning_rate': 'adaptive', 'solver': 'adam'}


In [14]:
# GBE

grid_params = {
    "n_estimators":[5,50,250,500],
    "max_depth":[1,3,5,7,9],
    "learning_rate":[0.01,0.1,1,10,100]
}

ParameterTuning (preprocessed_df.copy(), target, GradientBoostingClassifier(), grid_params)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   38.6s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  8.4min finished


{'learning_rate': 1, 'max_depth': 9, 'n_estimators': 500}
