In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score, mean_squared_error, accuracy_score
from sklearn import ensemble, linear_model, neural_network, naive_bayes
from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz
import time
import math
import collections
import itertools as it
#import xgboost as xgb
#from xgboost.sklearn import XGBClassifier

  from numpy.core.umath_tests import inner1d


In [5]:
X_train = load_npz('proper_datasets/X_train.npz')
X_val = load_npz('proper_datasets/X_val.npz')

y_train = np.loadtxt('proper_datasets/y_train.csv')
y_val = np.loadtxt('proper_datasets/y_val.csv')

X_test = load_npz('proper_datasets/X_test.npz')

len(y_train), len(y_val)

(2430981, 303925)

In [5]:
def build_combinations(param_grid):
    
    '''
    Create all possible hyperparameter combinations for a given model 
    '''
  
    combinations = []

    allNames = sorted(param_grid)
    tmp_combs = list(it.product(*(param_grid[Name] for Name in allNames)))
    
    for combi in tmp_combs:
        my_dict = {}
        my_dict[allNames[0]] = combi[0]
    
        for i in range(1,len(allNames)):
            my_dict[allNames[i]] = combi[i]
        
        combinations.append(my_dict)
        
    return combinations

clf_models = collections.OrderedDict({#'knn': neighbors.KNeighborsClassifier(),
                                     #'xgb1': XGBClassifier(
                                        #learning_rate =0.1,
                                        #n_estimators=100,
                                        #max_depth=5,
                                        #min_child_weight=1,
                                        #gamma=0,
                                        #subsample=0.8,
                                        #colsample_bytree=0.8,
                                        #objective= 'binary:logistic',
                                        #nthread=4,
                                        #scale_pos_weight=1,
                                        #seed=1234,
                                        #random_state=1234),
                                     #'xgb2': XGBClassifier(
                                        #learning_rate =0.1,
                                        #n_estimators=100,
                                        #max_depth=5,
                                        #min_child_weight=1,
                                        #gamma=0,
                                        #subsample=0.8,
                                        #colsample_bytree=0.8,
                                        #objective= 'binary:logistic',
                                        #nthread=4,
                                        #scale_pos_weight=1,
                                        #seed=1234,
                                        #random_state=1234),  
                                     #'xgb3': XGBClassifier(
                                        #learning_rate =0.1,
                                     #   n_estimators=50,
                                        #max_depth=5,
                                        #min_child_weight=1,
                                        #gamma=0,
                                        #subsample=0.8,
                                        #colsample_bytree=0.8,
                                        #objective= 'binary:logistic',
                                        #nthread=4,
                                        #scale_pos_weight=1,
                                     #   seed=1234,
                                     #   random_state=1234),   
                                     #'xgb4': XGBClassifier(
                                     #   #learning_rate =0.1,
                                     #   n_estimators=50,
                                     #   #max_depth=5,
                                     #   #min_child_weight=1,
                                     #   #gamma=0,
                                     #   #subsample=0.8,
                                     #   #colsample_bytree=0.8,
                                     #   #objective= 'binary:logistic',
                                     #   #nthread=4,
                                     #   #scale_pos_weight=1,
                                     #  seed=1234,
                                     #  random_state=1234),     
                                     #'rf': ensemble.RandomForestClassifier(),
                                     #'kersvm': svm.SVC(kernel = 'rbf', probability=True), 
                                     #'linsvm': svm.SVC(kernel = 'linear', probability=True), 
                                     ###'gpc': gaussian_process.GaussianProcessClassifier(), # doesnt exist in 0.16
                                     #'gnb': naive_bayes.GaussianNB(),
                                     #'gbm': ensemble.GradientBoostingClassifier(), 
                                     'logr': linear_model.LogisticRegression(),
                                     'sgd': linear_model.SGDClassifier(),
                                     #'mlp_nn': neural_network.MLPClassifier(shuffle=True, early_stopping=True) # doesnt exist in 0.16
                                      #'et': ensemble.ExtraTreesClassifier(),
                                      })

clf_param_grid = {#'knn': {'n_neighbors': [1,3,5]}, #,7,9]}, #,11,13,15,17,19,21,23,25,27,29]},
                  #'xgb1': {'max_depth': [3, 6], 'min_child_weight': [1], 'gamma': [0, 3], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1], 'max_delta_step': [0, 5]},
                  #'xgb2': {'max_depth': [3, 6], 'min_child_weight': [1], 'gamma': [0, 3], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1], 'max_delta_step': [0, 5]},
                  #'xgb3': {'learning_rate': [0.01, 0.025, 0.05], 'max_depth': [5,10], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1]},
                  #'xgb4': {'n_estimators': [10,50,100,200], 'gamma': [0, 3], 'learning_rate': [0.01, 0.025, 0.05,0.1,0.2,0.3], 'max_depth': [5,10], 'max_delta_step': [0, 5], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1]},
                  # 'rf': {'n_estimators': [50,100,200], 'max_features': [None], 'min_samples_leaf': [1, 5]}, 
                  #'kersvm': {'C': [1/32.0, 1/8.0, 1/2.0, 2, 8, 32], 'gamma':[1/32.0, 1/8.0, 1/2.0, 2, 8, 32]},
                  #'linsvm': {'C': [1/32.0, 1/8.0, 1/2.0, 2, 8, 32]},
                  #'gpc': {'n_restarts_optimizer': [0]},
                  #'gnb': {},
                  #'gbm': {'n_estimators': [100, 300, 500], 'learning_rate': [0.01, 0.1, 0.5], 'max_depth': [3,6]},
                  'logr': {'solver': ['saga', 'sag'], 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10 , 100, 1e10]},
                  'sgd': {'loss': ['log'], 'shuffle': [True, False], 'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10 , 100, 1e10], 'penalty': ['elasticnet'], 'l1_ratio': [0,0.5,1]},
                  #'mlp_nn': {'activation': ['relu', 'logistic'], 
                  #           'hidden_layer_sizes': [(5,), (10,), (20, ), (100,), (200,), 
                  #                                  (5,5), (10,10), (20, 20), (100,100), (200,200),
                  #                                  (5,5,5), (10,10,10), (20, 20,20), (100,100,100), (200,200,200)], 
                  #           'alpha': [0.0001, 0.001, 0.01, .1, 1]
                  #          } 
                  #'et': {'n_estimators': [50,100,200], 'max_features': ['auto', None], 'min_samples_leaf': [1, 5, 10]},
                  }

unique_ys = np.unique(y_train)
sgd_lower = np.linspace(0,len(y_train),100, dtype = np.int)
sgd_upper = sgd_lower[1:]
sgd_lower = sgd_lower[:-1]

for idx, (model_id, model_class) in enumerate(clf_models.items()):
    
    param_grid = clf_param_grid[model_id]
    
    #print "\n =================================== \n" \
    #      "Starting model selection for model: {}".format(model_id)

    start_time = time.time()

    if param_grid != {}:
        combinations = build_combinations(param_grid)
        
    for i in range(len(combinations)):  
    
        comb_time = time.time()
    
        combination = combinations[i]
        
        print(model_id, i, combination)
           
        # load the current parameter value into the model and perform model selection
        if combination != {}:
            model_class.set_params(**combination)
            
        #print(model_class)            

        if model_id in ['sgd', 'logr', 'gnb']:
            #for batch in range(len(sgd_lower)):
                #model_class.partial_fit(X.iloc[sgd_lower[batch]:sgd_upper[batch], :], 
                #                        y_train[sgd_lower[batch]:sgd_upper[batch]], 
                #                        classes=unique_ys)
            #model_class.partial_fit(X.iloc[:1000, :], y_train[:1000], classes=unique_ys)
            model_class.fit(X_train, y_train)
            #preds = model_class.predict(X_val.iloc[:, :])
            ##print(roc_auc_score(y_val, preds), accuracy_score(y_val, preds))
            probs = model_class.predict_proba(X_val)[:, 1]
            print(roc_auc_score(y_val, probs), math.sqrt(mean_squared_error(y_val, probs))) #, accuracy_score(y_val, probs))
            
        #np.savetxt('output/base_learners_class/r1_' + str(model_id) + '_' + str(i) + '.csv', preds, delimiter=',')
        np.savetxt('output/base_learners_probs/r1_sams_' + str(model_id) + '_' + str(i) + '.csv', probs, delimiter=',')
        
        print('Time: %.5f' % (time.time()-comb_time))

logr 0 {'C': 0.0001, 'solver': 'saga'}




0.6957531538557002 0.025771153405458685
Time: 319.99630
logr 1 {'C': 0.0001, 'solver': 'sag'}
0.709588356678439 0.025771141086961834
Time: 53.79408
logr 2 {'C': 0.001, 'solver': 'saga'}




0.768939906258383 0.02576332480652155
Time: 286.59739
logr 3 {'C': 0.001, 'solver': 'sag'}
0.7780029291932661 0.025764142586747637
Time: 112.74945
logr 4 {'C': 0.01, 'solver': 'saga'}




0.8141000953741624 0.025674517285461336
Time: 279.99101
logr 5 {'C': 0.01, 'solver': 'sag'}




0.8149908725130373 0.025679895188072777
Time: 252.17742
logr 6 {'C': 0.1, 'solver': 'saga'}




0.8050420111498807 0.02560388170711711
Time: 279.80700
logr 7 {'C': 0.1, 'solver': 'sag'}




0.8052286715914903 0.02560516187267451
Time: 254.11153
logr 8 {'C': 1, 'solver': 'saga'}




0.7991480267830026 0.0256393904464077
Time: 279.55999
logr 9 {'C': 1, 'solver': 'sag'}




0.7991447180098933 0.025639453174476846
Time: 246.42910
logr 10 {'C': 10, 'solver': 'saga'}




0.7953781280578646 0.025674036116218723
Time: 275.83978
logr 11 {'C': 10, 'solver': 'sag'}




0.794584120307903 0.025675039329844163
Time: 247.41715
logr 12 {'C': 100, 'solver': 'saga'}




0.7944665040184642 0.025680239826943656
Time: 272.23257
logr 13 {'C': 100, 'solver': 'sag'}




0.7931863022139474 0.025681976382137785
Time: 247.65216
logr 14 {'C': 10000000000.0, 'solver': 'saga'}




0.7943407543409393 0.025680957279657624
Time: 265.28217
logr 15 {'C': 10000000000.0, 'solver': 'sag'}




0.7930380430996546 0.025682616116769183
Time: 252.31843
sgd 0 {'alpha': 0.0001, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.7826516413160858 0.02572416397351493
Time: 9.56355
sgd 1 {'alpha': 0.0001, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.7816283013609684 0.02572578669136769
Time: 6.09935
sgd 2 {'alpha': 0.0001, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.759234793897501 0.02573532629703822
Time: 9.94557
sgd 3 {'alpha': 0.0001, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.7584678675589727 0.02573564254575731
Time: 6.83439
sgd 4 {'alpha': 0.0001, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.7349877704160022 0.025714421425708414
Time: 9.50154
sgd 5 {'alpha': 0.0001, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6988804660239041 0.025753174176889708
Time: 6.30336
sgd 6 {'alpha': 0.001, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6618766634123335 0.02592810431920113
Time: 9.11952
sgd 7 {'alpha': 0.001, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6618280749756902 0.025928372441780433
Time: 6.00534
sgd 8 {'alpha': 0.001, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6608904045351642 0.026322838491512034
Time: 9.96257
sgd 9 {'alpha': 0.001, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.660018355378075 0.026323308659649228
Time: 6.85239
sgd 10 {'alpha': 0.001, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6304863492246044 0.026366173536140892
Time: 9.29553
sgd 11 {'alpha': 0.001, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6284834820341606 0.026386319443092825
Time: 6.18835
sgd 12 {'alpha': 0.01, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6296261660124587 0.031272623874832435
Time: 8.99751
sgd 13 {'alpha': 0.01, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6296034772825668 0.031273248026564796
Time: 5.91134
sgd 14 {'alpha': 0.01, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.634639324008852 0.04201053864935181
Time: 9.80856
sgd 15 {'alpha': 0.01, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6348135317280209 0.04200087419274423
Time: 6.60238
sgd 16 {'alpha': 0.01, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.614287256206582 0.0499449647592608
Time: 9.28853
sgd 17 {'alpha': 0.01, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6137340635714089 0.04994772113704579
Time: 6.15935
sgd 18 {'alpha': 0.1, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6172113396185679 0.08705135995169357
Time: 9.04052
sgd 19 {'alpha': 0.1, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6172015925923644 0.08701534167680218
Time: 5.94534
sgd 20 {'alpha': 0.1, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5973883658256482 0.182903561437475
Time: 9.69655
sgd 21 {'alpha': 0.1, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5981630995647642 0.18275902459531124
Time: 6.59538
sgd 22 {'alpha': 0.1, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5778615679744405 0.2336376806605809
Time: 9.19653
sgd 23 {'alpha': 0.1, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.578443912041662 0.23341802942052245
Time: 6.14435
sgd 24 {'alpha': 1, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6086477295313021 0.2702490420673
Time: 9.01552
sgd 25 {'alpha': 1, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6086457410075615 0.27009016625678806
Time: 5.92234
sgd 26 {'alpha': 1, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.47954839617077405
Time: 9.01452
sgd 27 {'alpha': 1, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.479380914173123
Time: 5.85934
sgd 28 {'alpha': 1, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.47938456818867997
Time: 8.78650
sgd 29 {'alpha': 1, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.479380914173123
Time: 5.74233
sgd 30 {'alpha': 10, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6053600641126133 0.45354467869442805
Time: 9.06052
sgd 31 {'alpha': 10, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6053600315138634 0.4534866230075153
Time: 5.92634
sgd 32 {'alpha': 10, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.49723094107982313
Time: 8.99951
sgd 33 {'alpha': 10, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.497230858503193
Time: 5.91134
sgd 34 {'alpha': 10, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.49723205260138653
Time: 8.92651
sgd 35 {'alpha': 10, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.497230858503193
Time: 5.71733
sgd 36 {'alpha': 100, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6048639437387303 0.4945388939923561
Time: 9.05452
sgd 37 {'alpha': 100, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6048639111399805 0.49453449997142374
Time: 5.89834
sgd 38 {'alpha': 100, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.49939540149146106
Time: 9.02252
sgd 39 {'alpha': 100, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.4993954050477959
Time: 5.90934
sgd 40 {'alpha': 100, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.4993954197243582
Time: 8.81650
sgd 41 {'alpha': 100, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.4993954050477959
Time: 5.69633
sgd 42 {'alpha': 10000000000.0, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.6048078820386854 0.4999960523560448
Time: 8.97951
sgd 43 {'alpha': 10000000000.0, 'l1_ratio': 0, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.6048077434939986 0.49999605235604355
Time: 5.93634
sgd 44 {'alpha': 10000000000.0, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.4999960524052837
Time: 8.98851
sgd 45 {'alpha': 10000000000.0, 'l1_ratio': 0.5, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.499996052405284
Time: 5.88834
sgd 46 {'alpha': 10000000000.0, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': True}




0.5 0.49999605240528355
Time: 8.86251
sgd 47 {'alpha': 10000000000.0, 'l1_ratio': 1, 'loss': 'log', 'penalty': 'elasticnet', 'shuffle': False}




0.5 0.499996052405284
Time: 5.73633


In [None]:
def build_combinations(param_grid):
    
    '''
    Create all possible hyperparameter combinations for a given model 
    '''
  
    combinations = []

    allNames = sorted(param_grid)
    tmp_combs = list(it.product(*(param_grid[Name] for Name in allNames)))
    
    for combi in tmp_combs:
        my_dict = {}
        my_dict[allNames[0]] = combi[0]
    
        for i in range(1,len(allNames)):
            my_dict[allNames[i]] = combi[i]
        
        combinations.append(my_dict)
        
    return combinations

clf_models = collections.OrderedDict({#'knn': neighbors.KNeighborsClassifier(),
                                     #'xgb1': XGBClassifier(
                                        #learning_rate =0.1,
                                        #n_estimators=100,
                                        #max_depth=5,
                                        #min_child_weight=1,
                                        #gamma=0,
                                        #subsample=0.8,
                                        #colsample_bytree=0.8,
                                        #objective= 'binary:logistic',
                                        #nthread=4,
                                        #scale_pos_weight=1,
                                        #seed=1234,
                                        #random_state=1234),
                                     #'xgb2': XGBClassifier(
                                        #learning_rate =0.1,
                                        #n_estimators=100,
                                        #max_depth=5,
                                        #min_child_weight=1,
                                        #gamma=0,
                                        #subsample=0.8,
                                        #colsample_bytree=0.8,
                                        #objective= 'binary:logistic',
                                        #nthread=4,
                                        #scale_pos_weight=1,
                                        #seed=1234,
                                        #random_state=1234),  
                                     #'xgb3': XGBClassifier(
                                        #learning_rate =0.1,
                                     #   n_estimators=50,
                                        #max_depth=5,
                                        #min_child_weight=1,
                                        #gamma=0,
                                        #subsample=0.8,
                                        #colsample_bytree=0.8,
                                        #objective= 'binary:logistic',
                                        #nthread=4,
                                        #scale_pos_weight=1,
                                     #   seed=1234,
                                     #   random_state=1234),   
                                     #'xgb4': XGBClassifier(
                                     #   #learning_rate =0.1,
                                     #   n_estimators=50,
                                     #   #max_depth=5,
                                     #   #min_child_weight=1,
                                     #   #gamma=0,
                                     #   #subsample=0.8,
                                     #   #colsample_bytree=0.8,
                                     #   #objective= 'binary:logistic',
                                     #   #nthread=4,
                                     #   #scale_pos_weight=1,
                                     #  seed=1234,
                                     #  random_state=1234),     
                                     #'rf': ensemble.RandomForestClassifier(),
                                     #'kersvm': svm.SVC(kernel = 'rbf', probability=True), 
                                     #'linsvm': svm.SVC(kernel = 'linear', probability=True), 
                                     ###'gpc': gaussian_process.GaussianProcessClassifier(), # doesnt exist in 0.16
                                     #'gnb': naive_bayes.GaussianNB(),
                                     #'gbm': ensemble.GradientBoostingClassifier(),
                                     #'logr': linear_model.LogisticRegression(),
                                     #'sgd': linear_model.SGDClassifier(loss='log', shuffle =False),
                                     'mlp_nn': neural_network.MLPClassifier(shuffle=True, early_stopping=True) # doesnt exist in 0.16
                                      #'et': ensemble.ExtraTreesClassifier(),
                                      })

clf_param_grid = {#'knn': {'n_neighbors': [1,3,5]}, #,7,9]}, #,11,13,15,17,19,21,23,25,27,29]},
                  #'xgb1': {'max_depth': [3, 6], 'min_child_weight': [1], 'gamma': [0, 3], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1], 'max_delta_step': [0, 5]},
                  #'xgb2': {'max_depth': [3, 6], 'min_child_weight': [1], 'gamma': [0, 3], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1], 'max_delta_step': [0, 5]},
                  #'xgb3': {'learning_rate': [0.01, 0.025, 0.05], 'max_depth': [5,10], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1]},
                  #'xgb4': {'n_estimators': [10,50,100,200], 'gamma': [0, 3], 'learning_rate': [0.01, 0.025, 0.05,0.1,0.2,0.3], 'max_depth': [5,10], 'max_delta_step': [0, 5], 'scale_pos_weight': [y_train[y_train==1].count()/y_train[y_train==0].count(), 1]},
                  # 'rf': {'n_estimators': [50,100,200], 'max_features': [None], 'min_samples_leaf': [1, 5]}, 
                  #'kersvm': {'C': [1/32.0, 1/8.0, 1/2.0, 2, 8, 32], 'gamma':[1/32.0, 1/8.0, 1/2.0, 2, 8, 32]},
                  #'linsvm': {'C': [1/32.0, 1/8.0, 1/2.0, 2, 8, 32]},
                  #'gpc': {'n_restarts_optimizer': [0]},
                  #'gnb': {},
                  #'gbm': {'n_estimators': [100, 300, 500], 'learning_rate': [0.01, 0.1, 0.5], 'max_depth': [3,6]},
                  #'logr': {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10 , 100, 1e10]},
                  #'sgd': {'loss': ['log', 'modified_huber'], 'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10 , 100, 1e10], 'penalty': ['elasticnet'], 'l1_ratio': [0,0.5,1]},
                  'mlp_nn': {'activation': ['logistic', 'relu'], 
                             'hidden_layer_sizes': [(5,), (10,), (20, ), (100,), (200,), 
                                                    (5,5), (10,10), (20, 20), (100,100), (200,200),
                                                    (5,5,5), (10,10,10), (20, 20,20), (100,100,100), (200,200,200)], 
                             'alpha': [0.0001, 0.001, 0.01, 0.1, 1]
                            } 
                  #'et': {'n_estimators': [50,100,200], 'max_features': ['auto', None], 'min_samples_leaf': [1, 5, 10]},
                  }

unique_ys = np.unique(y_train)
sgd_lower = np.linspace(0,len(y_train),100, dtype = np.int)
sgd_upper = sgd_lower[1:]
sgd_lower = sgd_lower[:-1]

for idx, (model_id, model_class) in enumerate(clf_models.items()):
    
    param_grid = clf_param_grid[model_id]
    
    #print "\n =================================== \n" \
    #      "Starting model selection for model: {}".format(model_id)

    start_time = time.time()
    
    if param_grid != {}:
        combinations = build_combinations(param_grid)
        
    for i in range(len(combinations)):  
    
        comb_time = time.time()
        
        combination = combinations[i]
        
        print(model_id, i, combination)
    
        # load the current parameter value into the model and perform model selection
        if combination != {}:
            model_class.set_params(**combination)
            
        if model_id == 'mlp_nn':
            #model_class.fit(X, y_train)  
            #for batch in range(len(sgd_lower)):
                #model_class.partial_fit(X.iloc[sgd_lower[batch]:sgd_upper[batch], :], 
                #                        y_train[sgd_lower[batch]:sgd_upper[batch]], 
                #                        classes=unique_ys)
            #model_class.partial_fit(X.iloc[:1000, :], y_train[:1000], classes=unique_ys)
            model_class.fit(X_train, y_train)
            #preds = model_class.predict(X_val.iloc[:, :])
            ##print(roc_auc_score(y_val, preds), accuracy_score(y_val, preds))
            probs = model_class.predict_proba(X_val)[:, 1]
            print(roc_auc_score(y_val, probs), math.sqrt(mean_squared_error(y_val, probs))) #, accuracy_score(y_val, probs))
            
            test_probs = model_class.predict_proba(X_test)[:, 1]
                        
        else:
            continue
            
        #np.savetxt('output/base_learners_class/r2_' + str(model_id) + '_' + str(i) + '.csv', preds, delimiter=',')
        np.savetxt('output/base_learners_probs/r2_sams_' + str(model_id) + '_' + str(i) + '.csv', probs, delimiter=',')
        np.savetxt('output/base_learners_test_probs/r2_sams_' + str(model_id) + '_' + str(i) + '.csv', test_probs, delimiter=',')
        
        print('Time: %.5f' % (time.time()-comb_time))