In [46]:
import ipynb
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.preprocessing import OrdinalEncoder
from numpy import asarray
from statistics import mean
from sklearn.metrics import accuracy_score,f1_score
from sklearn.model_selection import train_test_split
from numpy import set_printoptions
from statistics import mean
from sklearn.model_selection import GridSearchCV
from sklearn.utils import resample

In [47]:
import warnings
warnings.filterwarnings('ignore')

In [48]:
# ONLINE SHOPPING INTENTIONS

filename1 = 'online_shoppers_intentions'
df1r0 = pd.read_csv("../data/train/noresampling/"+filename1+"_0.csv", index_col=0)
df1t = pd.read_csv("../data/test/"+filename1+".csv", index_col=0)
target1 = 'Revenue'

numerical_features1 = ["Administrative", "Administrative_Duration", "Informational", "Informational_Duration", 
                      "ProductRelated", "ProductRelated_Duration", "BounceRates", "ExitRates", "PageValues", "SpecialDay"]
categorical_features1 = ["OperatingSystems", "Browser", "Region", "TrafficType", "VisitorType", "Weekend", "Month"]
df1r0[categorical_features1] = df1r0[categorical_features1].astype('category')  
df1t[categorical_features1] = df1t[categorical_features1].astype('category')
df1r0[target1] = df1r0[target1].astype('category') 
df1t[target1] = df1t[target1].astype('category')


# MARKETING CAMPAIGN

filename2 = 'marketing_campaign'
df2r0 = pd.read_csv("../data/train/noresampling/"+filename2+"_0.csv", index_col=0)
df2t = pd.read_csv("../data/test/"+filename2+".csv", index_col=0)
target2 = 'Teenhome'

numerical_features2 = ['Income','MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts','MntSweetProducts', 
                        'MntGoldProds','Year_Birth','Recency','NumDealsPurchases','NumWebPurchases',
                        'NumCatalogPurchases','NumStorePurchases','NumWebVisitsMonth','Dt_Customer']
categorical_features2 = ['Education','Marital_Status','Kidhome','AcceptedCmp3', 
                        'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1', 'AcceptedCmp2','Complain','Response']
df2r0[categorical_features2] = df2r0[categorical_features2].astype('category') 
df2t[categorical_features2] = df2t[categorical_features2].astype('category')
df2r0[target2] = df2r0[target2].astype('category') 
df2t[target2] = df2t[target2].astype('category')

# HEART

filename3 = 'heart'
df3r0 = pd.read_csv("../data/train/noresampling/"+filename3+"_0.csv", index_col=0)
df3t = pd.read_csv("../data/test/"+filename3+".csv", index_col=0)
target3 = 'target'

numerical_features3 = ['trestbps','chol','thalach','oldpeak', 'age']
categorical_features3 = ['sex', 'cp','fbs','restecg','exang','slope','ca','thal']
df3r0[categorical_features3] = df3r0[categorical_features3].astype('category') 
df3t[categorical_features3] = df3t[categorical_features3].astype('category') 
df3r0[target3] = df3r0[target3].astype('category') 
df3t[target3] = df3t[target3].astype('category')

# Sanity Check
#df1r0

In [56]:
def ParameterTuning (dataset_name, dfr, dft, target, numerical_features, categorical_features, model, grid_params):
    
    print("_______________________________________________________________________________ Dataset:"+dataset_name)

    Xr_train = dfr[numerical_features+categorical_features]
    yr_train = dfr[target]
    
    X_test = dft[numerical_features+categorical_features]
    y_test = dft[target]

    mod = GridSearchCV(model, grid_params, verbose=1, cv=3, n_jobs=-1)
    results_r = mod.fit(Xr_train, yr_train)
    print(results_r.best_params_)

In [None]:
# RF
param_grid_rf = {
    'max_depth': [5, 10, 15],
    'max_features': [2, 3, 4, 5],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10],
    'n_estimators': [100, 200]
}
ParameterTuning (filename3, df3r0, df3t, target3, numerical_features3, categorical_features3, RandomForestClassifier(), param_grid_rf)
ParameterTuning (filename2, df2r0, df2t, target2, numerical_features2, categorical_features2, RandomForestClassifier(), param_grid_rf)
ParameterTuning (filename1, df1r0, df1t, target1, numerical_features1, categorical_features1, RandomForestClassifier(), param_grid_rf)



_______________________________________________________________________________ Dataset:heart
Fitting 3 folds for each of 144 candidates, totalling 432 fits
{'max_depth': 5, 'max_features': 2, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200}
_______________________________________________________________________________ Dataset:marketing_campaign
Fitting 3 folds for each of 144 candidates, totalling 432 fits
{'max_depth': 15, 'max_features': 5, 'min_samples_leaf': 3, 'min_samples_split': 8, 'n_estimators': 200}
_______________________________________________________________________________ Dataset:online_shoppers_intentions
Fitting 3 folds for each of 144 candidates, totalling 432 fits


In [None]:
# GBE
param_grad_gbe = {
    "n_estimators":[5,50,250],
    "max_depth":[1,3,5,7],
    "learning_rate":[0.01,0.1,1]
}

ParameterTuning (filename3, df3r0, df3t, target3, numerical_features3, categorical_features3, GradientBoostingClassifier(), param_grad_gbe)
ParameterTuning (filename2, df2r0, df2t, target2, numerical_features2, categorical_features2, GradientBoostingClassifier(), param_grad_gbe)
ParameterTuning (filename1, df1r0, df1t, target1, numerical_features1, categorical_features1, GradientBoostingClassifier(), param_grad_gbe)


In [None]:
# DT
param_grad_dt = { 'criterion' : ['gini', 'entropy'],
               'max_depth':range(1,10), "min_samples_split":range(1,10),
              'min_samples_leaf':range(1,5)}

ParameterTuning (filename3, df3r0, df3t, target3, numerical_features3, categorical_features3, DecisionTreeClassifier(), param_grad_dt)
ParameterTuning (filename2, df2r0, df2t, target2, numerical_features2, categorical_features2, DecisionTreeClassifier(), param_grad_dt)
ParameterTuning (filename1, df1r0, df1t, target1, numerical_features1, categorical_features1, DecisionTreeClassifier(), param_grad_dt)
