# Ensemble modeling approach

In this exercise, we will try to ensemble predictions from the best models we trained in our first approach and will try to train a second tier model.

## Logistic Regression

In [1]:
import pickle
from sklearn.metrics import roc_auc_score

with open("/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/X_train_balanced_trans_pl2.pkl","rb") as f:
    X_train_balanced_trans_pl2 = pickle.load(f) 

with open("/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/y_train_balanced.pkl", "rb") as f:
    y_train_balanced = pickle.load(f) 

In [8]:
# Let's use C = 3.0589 to re-train the classifier and prepare another submission
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

logbal = LogisticRegression(verbose=10, n_jobs=3, C= 3.0589,penalty= "l1")

logbal.fit(X_train_balanced_trans_pl2, y_train_balanced)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation sets
# Load validation sets previously prepared


# Features:
with open("X_val1_trans_pl2.pkl","rb") as f:
    X_val1_trans_pl2 = pickle.load(f)
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
    
# Target labels:    
with open("y_val1.pkl","rb") as f:
    y_val1= pickle.load(f)
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 1
probs = logbal.predict_proba(X_val1_trans_pl2)
probs = probs[:,1]
print("Val1 ROC score: " +str(roc_auc_score(y_val1,probs)))
       
# calculate out-of-the-box roc_score using validation set 2
probs = logbal.predict_proba(X_val2_trans_pl2)
probs = probs[:,1]
print("Val2 ROC score: " +str(roc_auc_score(y_val2,probs))) 


# Save the final classifier
with open((diskname + str("log_final.pkl")), "wb") as f:
    pickle.dump(logbal,f)
    
print("Saved final logistic regression classifier.")    

  " = {}.".format(self.n_jobs))


[LibLinear]Trained model, it took: 1430.3166666666666 minutes.
Val1 ROC score: 0.952874287995
Val2 ROC score: 0.951511484007
Saved final logistic regression classifier.


## Support Vector Machines

In [9]:
import datetime
start = datetime.datetime.now()

from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
# Note that we can't get probabilities directly from this LinearSVC function
# We need to wrap into Calibrated Classifier 
# (see: https://stackoverflow.com/questions/35212213/sklearn-how-to-get-decision-probabilities-for-linearsvc-classifier)

lsvcbal = LinearSVC(verbose=10, C = 0.0564)

cal_lsvcbal = CalibratedClassifierCV(base_estimator = lsvcbal,
                                  cv = 3, # Also performs cross-validation
                                  method= "sigmoid") # We use sigmoid function to get probabilities

cal_lsvcbal.fit(X_train_balanced_trans_pl2,y_train_balanced)

end = datetime.datetime.now()
process_time = end - start
print("It took: " + str(process_time.seconds/60) + " minutes.")


# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 1
probs = cal_lsvcbal.predict_proba(X_val1_trans_pl2)
probs = probs[:,1]
print("Val1 ROC score: " +str(roc_auc_score(y_val1,probs)))
       
# calculate out-of-the-box roc_score using validation set 2
probs = cal_lsvcbal.predict_proba(X_val2_trans_pl2)
probs = probs[:,1]
print("Val2 ROC score: " +str(roc_auc_score(y_val2,probs)))


# Save the final classifier
with open((diskname + str("svm_final.pkl")), "wb") as f:
    pickle.dump(cal_lsvcbal,f)
    
print("Saved final SVM regression classifier.") 

[LibLinear][LibLinear][LibLinear]It took: 0.4 minutes.
Val1 ROC score: 0.952513357043
Val2 ROC score: 0.952253716778
Saved final SVM regression classifier.


## Building the ensemble classifier

We will use X_val1 predictions from the two of our best models to train a new classifier against the y_val1. We will try to optimize the performance of this new classifier using X_val2 and y_val2.

In [1]:
import pickle
# Features:
with open("X_val1_trans_pl2.pkl","rb") as f:
    X_val1_trans_pl2 = pickle.load(f)
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
    
# Target labels:    
with open("y_val1.pkl","rb") as f:
    y_val1= pickle.load(f)
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)

In [2]:
# Write a function that return secondary features
diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

def secondary_features(X_train):
    import pandas as pd
    # Load established classifiers
    with open((diskname + str("log_final.pkl")), "rb") as f:
        clf1 = pickle.load(f)
    with open((diskname + str("svm_final.pkl")), "rb") as f:
        clf2 = pickle.load(f)
    print("Loaded classifiers.")    
    # Collect prediction probabilities as new features    
    sec_features = pd.DataFrame()
    sec_features["f1"] = clf1.predict_proba(X_train)[:,1]
    sec_features["f2"] = clf2.predict_proba(X_train)[:,1]
    print("Collected features.")
    
    # Return new features as array
    return sec_features.values

In [3]:
X_train_ensemble = secondary_features(X_val1_trans_pl2)

Loaded classifiers.
Collected features.


In [4]:
X_train_ensemble.shape

(1000000, 2)

### Ensemble Logisticregression classifier

In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = LogisticRegression(verbose=10)
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

[LibLinear]Trained ensemble model, it took: 1439.9666666666667 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.95217429678


### Ensemble SVM classifier

In [29]:
from sklearn.svm import LinearSVC  
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

lsvcbal = LinearSVC(verbose=10, C= 5)

ensb = CalibratedClassifierCV(base_estimator = lsvcbal,
                                  cv = 3, # Also performs cross-validation
                                  method= "sigmoid") # We use sigmoid function to get probabilities
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

[LibLinear][LibLinear][LibLinear]Trained ensemble model, it took: 1439.3833333333334 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.951845649207


### Ensemble Random Forest classifier

In [30]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = RandomForestClassifier()
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

Trained ensemble model, it took: 1439.1166666666666 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.815179565761


### Ensemble Naive Bayes classifier

In [36]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = MultinomialNB()
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

Trained ensemble model, it took: 1439.9833333333333 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.557963136275


### Ensemble XGboost classifier

In [43]:
import warnings
warnings.filterwarnings('ignore')
import xgboost as xgb
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = xgb.XGBClassifier()
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

Trained ensemble model, it took: 1439.2333333333333 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.950612549739


### Ensemble QDA classifier

In [55]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = QuadraticDiscriminantAnalysis(reg_param= 0.005)
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

Trained ensemble model, it took: 1439.9833333333333 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.952400780002


In [77]:
# Hyperparameter optimization
# We start by defining the score we want to be maximized using Bayesian Optimization
# Return validated 'roc_auc' score from Classifier
# Note that the parameters we will optimize are called as generic arguments

seed = 112 # Random seed

def maximizer(reg_param,p1,p2):
    from sklearn.metrics import roc_auc_score
    import numpy as np
    
    estimator_function = QuadraticDiscriminantAnalysis(reg_param= reg_param, priors = [p1,p2])
    
    # Fit the estimator
    estimator_function.fit(X_train_ensemble, y_val1)
        
    # calculate out-of-the-box roc_score using validation set 2
    probs = estimator_function.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
    val2_roc = roc_auc_score(y_val2,probs)
    
    # return the validation score to be maximized 
    return val2_roc

import warnings
warnings.filterwarnings('ignore')

from bayes_opt import BayesianOptimization

# alpha is a parameter for the gaussian process
# Note that this is itself a hyperparemter that can be optimized.
gp_params = {"alpha": 1e-10}

# We create the BayesianOptimization objects using the functions that utilize
# the respective classifiers and return cross-validated scores to be optimized.

seed = 112 # Random seed

# We create the bayes_opt object and pass the function to be maximized
# together with the parameters names and their bounds.

hyperparameter_space = {
        'reg_param': (0.004,0.005),
        "p1": (0.8,0.999),
        "p2": (0.01,0.2)
}

BO = BayesianOptimization(f = maximizer, 
                             pbounds =  hyperparameter_space,
                             random_state = seed,
                             verbose = 10)

# Finally we call .maximize method of the optimizer with the appropriate arguments

BO.maximize(init_points=10,n_iter=10,acq='ucb', kappa= 5, **gp_params)

[31mInitialization[0m
[94m-------------------------------------------------------------------[0m
 Step |   Time |      Value |        p1 |        p2 |   reg_param | 
Loaded classifiers.
Collected features.
    1 | 00m05s | [35m   0.95241[0m | [32m   0.8005[0m | [32m   0.0819[0m | [32m     0.0044[0m | 
Loaded classifiers.
Collected features.


KeyboardInterrupt: 

In [78]:
# Prepare an ensemble prediction using QDA classifier
# Train with tuned parameter
ensb = QuadraticDiscriminantAnalysis(reg_param= 0.0041)
ensb.fit(X_train_ensemble, y_val1)

QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0041,
               store_covariance=False, store_covariances=None, tol=0.0001)

In [81]:
# Prepare a submission using the tuned QDA classifier

import pandas as pd
click_id = pd.read_hdf("/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/click_id.h5")
def prepare_submission(predictions,filename = "new_submission", click_id = click_id):
    """predictions: a list containing the predicted probabilities in the test set. """
    is_attributed = pd.Series(predictions)
    submission_frame = pd.DataFrame()
    submission_frame["click_id"] = click_id
    submission_frame["is_attributed"] = is_attributed.apply(lambda x: format(x,".9f"))  # Reformat the probabilities upto the 9th decimal point
    filename = filename + ".csv"
    submission_frame.to_csv(filename,index = False)
    print("File saved as :" + filename)


# let's perform a prediction using the test set
# Load the sparse matrix 
import scipy.sparse as sp
test_proc_p12 = sp.load_npz("/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/test_proc_pl2.npz").tocsr()
print("Loaded processed test sparse matrix.")

Loaded processed test sparse matrix.


In [82]:
probs = ensb.predict_proba(secondary_features(test_proc_p12))[:,1]
print("Prepared QDA ensemble probs.")
# Prepare the submission file
prepare_submission(predictions= probs, 
                   filename= "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/ENSB_QDA_submission") 

Loaded classifiers.
Collected features.
Prepared QDA ensemble probs.
File saved as :/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/ENSB_QDA_submission.csv


This submission scored 0.9605.

### Ensemble KNN classifier

In [6]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = KNeighborsClassifier(n_neighbors= 150)
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

Trained ensemble model, it took: 1439.95 minutes.
Loaded classifiers.
Collected features.


KeyboardInterrupt: 

### Ensemble LDA classifier

In [13]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = LinearDiscriminantAnalysis(shrinkage= 'auto', solver= 'lsqr')
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

Trained ensemble model, it took: 1439.9833333333333 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.952363803875


### Ensemble Gaussian Naive Bayes Classifier

In [20]:
from sklearn.naive_bayes import GaussianNB
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = GaussianNB(priors = [0.999999,0.000001])
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))

Trained ensemble model, it took: 1439.9833333333333 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.952297825127


### Ensemble MLPClassifier

In [20]:
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import roc_auc_score
import datetime

start = datetime.datetime.now()

ensb = MLPClassifier(alpha = 0.1, hidden_layer_sizes=(10,10,10,))
ensb.fit(X_train_ensemble, y_val1)

end2 = datetime.datetime.now()
process_time = start - end2
print("Trained ensemble model, it took: " + str((process_time.seconds)/60) + " minutes.")

start = datetime.datetime.now()

diskname = "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/"

# Test performance using the validation set 2
# Load validation set previously prepared

# Features:
with open("X_val2_trans_pl2.pkl","rb") as f:
    X_val2_trans_pl2 = pickle.load(f) 
# labels:    
with open("y_val2.pkl","rb") as f:
    y_val2= pickle.load(f)
    
# Make predictions and calculate average valdation roc score 
# calculate out-of-the-box roc_score using validation set 2
# Note that we are converting features to secondary features since we are using ensemble model

probs = ensb.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
print("Ensemble Val ROC score: " +str(roc_auc_score(y_val2,probs)))


Trained ensemble model, it took: 1439.5666666666666 minutes.
Loaded classifiers.
Collected features.
Ensemble Val ROC score: 0.95230705822


In [14]:
ensb.get_params


<bound method BaseEstimator.get_params of MLPClassifier(activation='relu', alpha=0.1, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10, 10, 10), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.5,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)>

In [18]:
# Hyperparameter optimization
# We start by defining the score we want to be maximized using Bayesian Optimization
# Return validated 'roc_auc' score from Classifier
# Note that the parameters we will optimize are called as generic arguments

seed = 112 # Random seed

def maximizer(alpha,N1,N2,N3):
    from sklearn.metrics import roc_auc_score
    import numpy as np
    
    estimator_function = MLPClassifier(alpha = alpha,
                                       hidden_layer_sizes=(int(N1),int(N2),int(N3),),
                                       verbose = True, warm_start = True)
                                       
    
    # Fit the estimator
    estimator_function.fit(X_train_ensemble, y_val1)
        
    # calculate out-of-the-box roc_score using validation set 2
    probs = estimator_function.predict_proba(secondary_features(X_val2_trans_pl2))[:,1]
    val2_roc = roc_auc_score(y_val2,probs)
    
    # return the validation score to be maximized 
    return val2_roc

import warnings
warnings.filterwarnings('ignore')

from bayes_opt import BayesianOptimization

# alpha is a parameter for the gaussian process
# Note that this is itself a hyperparemter that can be optimized.
gp_params = {"alpha": 1e-10}

# We create the BayesianOptimization objects using the functions that utilize
# the respective classifiers and return cross-validated scores to be optimized.

seed = 112 # Random seed

# We create the bayes_opt object and pass the function to be maximized
# together with the parameters names and their bounds.

hyperparameter_space = {
        'alpha': (0.00001,1),
        'N1': (10,200),
        'N2': (10,200),
        'N3': (10,200)
}

BO = BayesianOptimization(f = maximizer, 
                             pbounds =  hyperparameter_space,
                             random_state = seed,
                             verbose = 10)

# Finally we call .maximize method of the optimizer with the appropriate arguments

BO.maximize(init_points=100,n_iter=100,acq='ucb', kappa= 5, **gp_params)

[31mInitialization[0m
[94m-----------------------------------------------------------------------------[0m
 Step |   Time |      Value |        N1 |        N2 |        N3 |     alpha | 
Iteration 1, loss = 0.02328634
Iteration 2, loss = 0.01417183
Iteration 3, loss = 0.01323872
Iteration 4, loss = 0.01295399
Iteration 5, loss = 0.01282552
Iteration 6, loss = 0.01278816
Iteration 7, loss = 0.01274812
Iteration 8, loss = 0.01273363
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded classifiers.
Collected features.
    1 | 04m06s | [35m   0.50000[0m | [32m 138.1691[0m | [32m 146.4545[0m | [32m 163.6315[0m | [32m   0.3751[0m | 
Iteration 1, loss = 0.02400890
Iteration 2, loss = 0.01370346
Iteration 3, loss = 0.01319368
Iteration 4, loss = 0.01297327
Iteration 5, loss = 0.01287496
Iteration 6, loss = 0.01281547
Iteration 7, loss = 0.01278381
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.

Collected features.
   18 | 01m51s |    0.04766 |   39.3856 |   70.8663 |  185.0109 |    0.9558 | 
Iteration 1, loss = 0.02696712
Iteration 2, loss = 0.01435707
Iteration 3, loss = 0.01354445
Iteration 4, loss = 0.01319261
Iteration 5, loss = 0.01300981
Iteration 6, loss = 0.01289042
Iteration 7, loss = 0.01281019
Iteration 8, loss = 0.01276652
Iteration 9, loss = 0.01272297
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded classifiers.
Collected features.
   19 | 03m06s |    0.95212 |  190.6947 |  147.1240 |   34.7259 |    0.8206 | 
Iteration 1, loss = 0.02259393
Iteration 2, loss = 0.01470273
Iteration 3, loss = 0.01462508
Iteration 4, loss = 0.01404491
Iteration 5, loss = 0.01332289
Iteration 6, loss = 0.01305312
Iteration 7, loss = 0.01293119
Iteration 8, loss = 0.01283964
Iteration 9, loss = 0.01278164
Iteration 10, loss = 0.01273478
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded cla

Iteration 7, loss = 0.01279173
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded classifiers.
Collected features.
   37 | 01m04s |    0.04770 |  128.1215 |   14.6908 |  107.9998 |    0.6837 | 
Iteration 1, loss = 0.02865082
Iteration 2, loss = 0.01415810
Iteration 3, loss = 0.01339232
Iteration 4, loss = 0.01309542
Iteration 5, loss = 0.01295149
Iteration 6, loss = 0.01286207
Iteration 7, loss = 0.01279973
Iteration 8, loss = 0.01276512
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded classifiers.
Collected features.
   38 | 01m26s |    0.95231 |   74.0596 |   58.1342 |   82.8534 |    0.7220 | 
Iteration 1, loss = 0.01768165
Iteration 2, loss = 0.01241947
Iteration 3, loss = 0.01236736
Iteration 4, loss = 0.01236734
Iteration 5, loss = 0.01234383
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded classifiers.
Collected features.
   39 | 02m12s 

Iteration 5, loss = 0.00895295
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded classifiers.
Collected features.
   56 | 10m29s |    0.95231 |  125.7681 |  177.4500 |  123.0401 |    0.0614 | 
Iteration 1, loss = 0.02826262
Iteration 2, loss = 0.01393317
Iteration 3, loss = 0.01326702
Iteration 4, loss = 0.01301034
Iteration 5, loss = 0.01289063
Iteration 6, loss = 0.01284299
Iteration 7, loss = 0.01279609
Iteration 8, loss = 0.01275392
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Loaded classifiers.
Collected features.
   57 | 02m59s |    0.95231 |  157.9416 |  120.3326 |  111.7916 |    0.9003 | 
Iteration 1, loss = 0.03073818
Iteration 2, loss = 0.01420939
Loaded classifiers.
Collected features.


KeyboardInterrupt: 

In [21]:
# Prepare a submission using the tuned MLPC classifier

import pandas as pd
click_id = pd.read_hdf("/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/click_id.h5")
def prepare_submission(predictions,filename = "new_submission", click_id = click_id):
    """predictions: a list containing the predicted probabilities in the test set. """
    is_attributed = pd.Series(predictions)
    submission_frame = pd.DataFrame()
    submission_frame["click_id"] = click_id
    submission_frame["is_attributed"] = is_attributed.apply(lambda x: format(x,".9f"))  # Reformat the probabilities upto the 9th decimal point
    filename = filename + ".csv"
    submission_frame.to_csv(filename,index = False)
    print("File saved as :" + filename)


# let's perform a prediction using the test set
# Load the sparse matrix 
import scipy.sparse as sp
test_proc_p12 = sp.load_npz("/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/test_proc_pl2.npz").tocsr()
print("Loaded processed test sparse matrix.")

probs = ensb.predict_proba(secondary_features(test_proc_p12))[:,1]
print("Prepared MLPC ensemble probs.")
# Prepare the submission file
prepare_submission(predictions= probs, 
                   filename= "/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/ENSB_MLPC_submission") 



Loaded processed test sparse matrix.
Loaded classifiers.
Collected features.
Prepared MLPC ensemble probs.
File saved as :/Volumes/Iomega_HDD/2016/Data science/Kaggle/User-click-detection-predictive-modeling/ENSB_MLPC_submission.csv
