# Setup

In [None]:
!pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.8.1-py2.py3-none-any.whl (101 kB)
[?25l[K     |███▎                            | 10 kB 24.0 MB/s eta 0:00:01[K     |██████▌                         | 20 kB 8.6 MB/s eta 0:00:01[K     |█████████▊                      | 30 kB 7.4 MB/s eta 0:00:01[K     |█████████████                   | 40 kB 6.8 MB/s eta 0:00:01[K     |████████████████▏               | 51 kB 2.7 MB/s eta 0:00:01[K     |███████████████████▍            | 61 kB 3.0 MB/s eta 0:00:01[K     |██████████████████████▊         | 71 kB 2.9 MB/s eta 0:00:01[K     |██████████████████████████      | 81 kB 3.3 MB/s eta 0:00:01[K     |█████████████████████████████▏  | 92 kB 3.2 MB/s eta 0:00:01[K     |████████████████████████████████| 101 kB 2.7 MB/s 
Collecting pyaml>=16.9
  Downloading pyaml-21.8.3-py2.py3-none-any.whl (17 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-21.8.3 scikit-optimize-0.8.1


In [None]:
!pip install scikit-learn==0.24

Collecting scikit-learn==0.24
  Downloading scikit_learn-0.24.0-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 1.6 MB/s 
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-2.2.0-py3-none-any.whl (12 kB)
Installing collected packages: threadpoolctl, scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.22.2.post1
    Uninstalling scikit-learn-0.22.2.post1:
      Successfully uninstalled scikit-learn-0.22.2.post1
Successfully installed scikit-learn-0.24.0 threadpoolctl-2.2.0


In [None]:
!pip install Boruta

Collecting Boruta
  Downloading Boruta-0.3-py3-none-any.whl (56 kB)
[K     |████████████████████████████████| 56 kB 1.8 MB/s 
Installing collected packages: Boruta
Successfully installed Boruta-0.3


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib as jb
import os
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, f1_score, precision_recall_curve, confusion_matrix
from sklearn.base import BaseEstimator, TransformerMixin
from skopt import gp_minimize
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from boruta import BorutaPy
from sklearn.feature_selection import SelectFromModel, SequentialFeatureSelector, RFECV

## Important parameters and functions

In [None]:
PATH = "/content/drive/MyDrive/Colab Notebooks/Porto_Seguro_competition/"
def return_var_names(x):
  names = []
  for i in range(len(x)):
    names.append('var'+str(x[i]))

  return tuple(names)

list_combined_idx = [(1,7), (1,20),(7,8),(7,20),(7,23),(7,28),(7,29), (7,39),(1,7,8),(1,7,14),(1,7,20),(1,7,23),(1,7,28),(1,7,29),(1,7,31),(1,7,39),(1,8,20),(1,8,23),(1,20,23),(1,20,28),(1,20,29),(1,20,39),(1,23,28),(2,3,8),(1,7,14),(1,7,23),(1,7,28),(1,7,29),(1,7,31),(1,7,39),(1,8,20),(1,8,23)]
list_combined = [return_var_names(x) for x in list_combined_idx]

In [None]:
def evaluate(y_pred, y_true, plot_matrix=True):
    score = precision_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    if plot_matrix:
        conf_matrix = confusion_matrix(y_true, y_pred)
        plt.matshow(conf_matrix, cmap=plt.cm.gray)
        plt.show()
    return score, f1

def plot_precision_recall_vs_threshold(precisions, recalls, thresholds, thrh):
    plt.figure(figsize=(8, 4))
    plt.axis([0, 1.1, 0, 1])
    precision_by_thrs = precisions[np.argmax(thresholds == thrh)]
    recall_by_thrs = recalls[np.argmax(thresholds == thrh)]
    
    plt.plot([thrh, thrh], [0., precision_by_thrs], "r:")
    plt.plot([thrh, thrh], [0., recall_by_thrs], "r:")
    plt.plot([0, thrh], [precision_by_thrs, precision_by_thrs], "r:")
    plt.plot([0, thrh], [recall_by_thrs, recall_by_thrs], "r:")
    plt.plot([thrh], [precision_by_thrs], "ro")        
    plt.plot([thrh], [recall_by_thrs], "ro")   
    
    plt.plot(thresholds, precisions[:-1], "b--", label="Precision")    
    plt.plot(thresholds, recalls[:-1], "g-", label="Recall")
    plt.legend(loc="center right", fontsize=14)
    plt.xlabel("Threshold")
    plt.ylabel("Value")
    plt.grid(True)
    
def better_threshold(precisions, recalls, thresholds):
    f1_best = 0
    threshold = 0
    for i in range(len(precisions)):
        if precisions[i] != 0 and recalls[i] != 0:
            f1 = 2*(precisions[i]*recalls[i])/(precisions[i] + recalls[i])
        else:
            f1 = 0
        if f1 > f1_best:
            f1_best = f1
            threshold = thresholds[i]
            
    
    return threshold, f1_best

## Loading datasets

In [None]:
df = pd.read_csv(PATH + "datasets/train.csv")
test = pd.read_csv(PATH + "datasets/test.csv")
submission_sample = pd.read_csv(PATH + "datasets/submission_sample.csv")
metadata = pd.read_csv(PATH + "datasets/metadata.csv")
metadata.columns = ["cod", "type"]
num_dis = metadata[metadata.type == "Quantitativo discreto"].cod.to_list()
num_dis.remove("y")
num_con = metadata[metadata.type == "Quantitativo continua"].cod.to_list()
cat_nom = metadata[metadata.type == "Qualitativo nominal"].cod.to_list()
cat_nom.remove("id")
cat_ord = metadata[metadata.type == "Qualitativo ordinal"].cod.to_list() 
y = df["y"]
test_id = test["id"]
test.drop(["id"], axis=1, inplace=True)
X = df.drop(["id", "y"], axis=1)

## Tratamento basico dos dados

In [None]:
class ProcessMissingValues(BaseEstimator, TransformerMixin):
  def __init__(self, 
               columns=cat_nom + cat_ord,
               type_columns='categorical'):
    self.columns = columns
    self.type_columns = type_columns

  def fit(self, X, y=None):
    return self

  def transform(self, X):
    if self.type_columns == 'categorical':
      X = self.transform_categorical(X)

    else:
      X = self.transform_numerical(X)

    return X

  def transform_categorical(self, X):
    for col in self.columns:
      X[col] = X[col].replace(-999, np.nan)
      X['NA_' + col] = X[col].isna().astype(np.int8)
      X[col].fillna('UNKNOWN', inplace=True)

    return X

  def transform_numerical(self, X):
    self.columns = [col for col in X.columns if 'NA_' not in col and col not in cat_nom + cat_ord]
    for col in self.columns:
      X[col] = X[col].replace(-999, np.nan)
      median = X[col].median()
      X['NA_' + col] = X[col].isna().astype(np.int8)
      X = X.fillna(median)

    return X

In [None]:
pmv_num = ProcessMissingValues(columns=num_dis+num_con, type_columns='numerical')
X = pmv_num.fit_transform(X)
pmv_cat = ProcessMissingValues()
X = pmv_cat.fit_transform(X)

# Combination of features

## Numerical

In [None]:
name_dictionary = f"{PATH}trees/kmeans/parameters_cluster.pkl.z"
comb_features = jb.load(name_dictionary)

In [None]:
list_comb = [file.split("_") for file in os.listdir(PATH + "trees/kmeans")][:-1]
list_comb = [lt[:2] + [lt[2][:-4]] for lt in list_comb]

In [None]:
list_comb = sorted(list_comb, key= lambda x: x[2], reverse=True)
list_comb[:15]

[['var66', 'var54', '0.38648488441019563'],
 ['var65', 'var54', '0.3859230539815091'],
 ['var24', 'var50', '0.3843037974683544'],
 ['var48', 'var54', '0.38394728960766694'],
 ['var60', 'var54', '0.38321853475266127'],
 ['var64', 'var53', '0.38304990242542514'],
 ['var63', 'var54', '0.38300455235204856'],
 ['var53', 'var54', '0.38251366120218583'],
 ['var47', 'var54', '0.3824068417837508'],
 ['var45', 'var54', '0.3823529411764706'],
 ['var46', 'var54', '0.3797698364627498'],
 ['var60', 'var24', '0.37968507429585274'],
 ['var51', 'var54', '0.37957957957957955'],
 ['var44', 'var54', '0.3789026977872083'],
 ['var61', 'var54', '0.37822198842714366']]

In [None]:
space = [(2, 20),
         (200, 1000),
         (2, 20),
         (1e-5, 1e-3, 'log-uniform')]
def comb_num(train, test, y):
  for cols in list_comb[:15]:
    def tune_kmeans(params):
      n_clusters, max_iter, n_init, tol = params

      kmeans = KMeans(n_clusters=n_clusters, 
                      max_iter=max_iter, 
                      tol=tol, n_init=n_init, 
                      n_jobs=-1, 
                      random_state=42)
      cluster = df_copy.copy()
      cluster["Cluster"] = kmeans.fit_predict(cluster, y)
      cluster["Cluster"] = cluster["Cluster"].astype("category")
      print(params)
      print()

      cluster["y"] = y.to_numpy()
      return -cluster[["Cluster", "y"]].groupby("Cluster").mean().std()["y"]

    df_copy = train[cols[:2]].copy()
    res = gp_minimize(tune_kmeans, space, random_state=42, verbose=1, n_calls=30)
    name_col = cols[0] + "_" + cols[1]

    n_clusters, max_iter, n_init, tol = res.x
    kmeans = KMeans(n_clusters=n_clusters, 
                      max_iter=max_iter, 
                      tol=tol, n_init=n_init, 
                      n_jobs=-1, 
                      random_state=42)
    

    df_copy[name_col] = kmeans.fit_predict(df_copy, y)
    train[name_col] = df_copy[name_col].astype("category")

    test[name_col] = kmeans.predict(test[cols[:2]].copy())
    test[name_col] = test[name_col].astype("category")

  return train, test

In [None]:
Xtrain

Unnamed: 0,var1,var2,var3,var4,var5,var6,var7,var8,var9,var10,var11,var12,var13,var14,var15,var16,var17,var18,var19,var20,var21,var22,var23,var24,var25,var26,var27,var28,var29,var30,var31,var32,var33,var34,var35,var36,var37,var38,var39,var40,...,NA_var15,NA_var16,NA_var17,NA_var18,NA_var19,NA_var20,NA_var21,NA_var22,NA_var23,NA_var28,NA_var29,NA_var30,NA_var31,NA_var33,NA_var34,NA_var35,NA_var36,NA_var37,NA_var38,NA_var39,NA_var41,NA_var26,NA_var32,NA_var42,NA_var43,var66_var54,var65_var54,var24_var50,var48_var54,var60_var54,var64_var53,var63_var54,var53_var54,var47_var54,var45_var54,var46_var54,var60_var24,var51_var54,var44_var54,var61_var54
10678,4,116,2921,UNKNOWN,6376,UNKNOWN,UNKNOWN,27,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,1387,13,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,1573,13,1445,5,1,1,2,UNKNOWN,0,24,4,1,0,13,3,26,50,0,1,0,4,10,...,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2,2,7,3,2,6,2,2,2,4,3,2,2,2,1
6415,16,53,731,6500,8734,2778,24,18,3,63,29249,6416,2156,17,2,2,2,2,1681,17,2265,0,1,0,1,9,1,20,2,1,0,13,4,20,303,24,7,10,4,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,3,1,1,1,1,0,1,3,0
5371,4,44,821,26208,9007,2381,35,27,1,63,29065,7025,1896,26,71,72,28,5,1573,26,1971,5,3,2,0,7,0,24,4,2,0,17,6,48,227,29,7,0,1,7,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,10,0,3,9,1,1,1,1,1,1,1,0,0
8166,4,44,540,23042,9048,994,35,27,3,63,10936,4458,800,26,44,44,10,4,188,26,818,5,3,2,1,7,0,24,4,1,0,13,1,49,490,2,3,0,4,2,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,10,0,0,1,1,1,1,1,1,1,1,0,0
8279,7,124,3137,11117,6231,2453,35,27,2,63,10802,4933,2033,26,2,2,2,2,664,26,2093,3,3,2,2,1,1,24,4,2,0,22,5,19,533,2,1,0,4,14,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,10,0,0,1,3,1,1,1,1,1,1,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5191,18,39,1180,5723,11347,2447,31,24,1,33,4541,5451,2022,23,1,3,3,3,348,23,2085,5,4,2,3,4,0,26,5,1,0,8,3,12,532,27,1,0,4,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,10,0,0,7,1,1,1,1,1,1,1,3,0
13418,4,44,459,8055,3764,2453,35,27,3,14,836,2396,2033,26,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,307,26,2093,3,3,2,1,UNKNOWN,0,24,4,1,0,15,3,21,495,28,1,0,4,5,...,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,10,0,0,7,1,1,1,1,1,1,1,0,0
5390,23,126,1470,16062,6392,2086,26,27,3,63,21846,804,1666,18,13,11,11,4,1573,18,1729,5,4,1,4,10,1,24,4,2,0,10,5,26,495,28,1,0,4,6,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,7,0,0,1,1,1,1,1,1,2,1,0,0
860,20,89,1906,3869,3740,1363,35,27,3,14,400,4181,1075,26,1,3,3,3,58,26,1114,5,3,2,1,5,0,24,4,1,0,16,4,57,89,31,4,0,4,18,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,1,1,7,0,0,0,0,0,1,5,1,2


## Categorical

In [None]:
class CombiningColumns(BaseEstimator, TransformerMixin):
  def __init__(self, list_combined=list_combined):
    self.list_combined = list_combined

  def fit(self, X, y=None):
    return self

  def transform(self, X):
    name_col = []
    for cols in self.list_combined:
      if len(cols) == 2:
        name_col.append(cols[0] + '_' + cols[1])
        X[name_col[-1]] = X[cols[0]].astype(str) + "_" +  X[cols[1]].astype(str)
      else:
        name_col.append(cols[0] + '_' + cols[1] + '_' + cols[2])
        X[name_col[-1]] = X[cols[0]].astype(str) + "_" +  X[cols[1]].astype(str) + '_' + X[cols[2]].astype(str)

    return X

In [None]:
class Categorify(BaseEstimator, TransformerMixin):
  def __init__(self, 
               columns=cat_nom, 
               freq_treshhold=5,
               lowfrequency_id=0,
                unkown_id=1):
    self.columns = columns
    self.freqs = []
    self.freq_treshhold = freq_treshhold
    self.lowfrequency_id = lowfrequency_id
    self.unkown_id = unkown_id

  def fit(self, X, y=None):
    return self

  def transform(self, X):
    self.make_columns(X)
    for idx, col in enumerate(self.columns):
      X = X.merge(self.freqs[idx], how='left', on=col)
      # edit
      X.loc[:, col] = X.loc[:, col + '_Categorify']
      X.drop(col + '_Categorify', axis=1, inplace=True)

    return X

  def make_columns(self, X):
    self.freqs = []
    for col in self.columns:
      freq = X[col].value_counts()
      freq = freq.reset_index()
      freq.columns = [col, 'count']
      freq = freq.reset_index()
      freq.columns = [col + '_Categorify', col, 'count']
      freq[col + '_Categorify'] = freq[col + '_Categorify']+2
      freq.loc[freq['count']<self.freq_treshhold, col + '_Categorify'] = self.lowfrequency_id
      freq.loc[freq[col]=='UNKNOWN', col + '_Categorify'] = self.unkown_id
      freq = freq.drop('count', axis=1)
      self.freqs.append(freq)

In [None]:
X = CombiningColumns().fit_transform(X)

## Pipeline of combinations

In [None]:
X = CombiningColumns().fit_transform(X)
comb_col_names = [col for col in X.columns if "NA" not in col and col not in cat_nom+cat_ord+num_con+num_dis]
X = Categorify(columns=comb_col_names).fit_transform(X)
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)
Xtrain, Xtest = comb_num(Xtrain, Xtest, ytrain)

Iteration No: 1 started. Evaluating function at random point.
[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 1.0409
Function value obtained: -0.1374
Current minimum: -0.1374
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1079
Function value obtained: -0.1022
Current minimum: -0.1374
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0363
Function value obtained: -0.0975
Current minimum: -0.1374
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3084
Function value obtained: -0.1343
Current minimum: -0.1374
Iteration No: 5 started. Evaluating function at random point.
[13, 20



[20, 200, 20, 0.001]

Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 0.7973
Function value obtained: -0.1508
Current minimum: -0.1508
Iteration No: 18 started. Searching for the next optimal point.
[7, 518, 20, 0.0004711927112437452]

Iteration No: 18 ended. Search finished for the next optimal point.
Time taken: 0.5959
Function value obtained: -0.0991
Current minimum: -0.1508
Iteration No: 19 started. Searching for the next optimal point.
[12, 337, 20, 7.070068590323513e-05]

Iteration No: 19 ended. Search finished for the next optimal point.
Time taken: 0.6653
Function value obtained: -0.0985
Current minimum: -0.1508
Iteration No: 20 started. Searching for the next optimal point.
[8, 962, 2, 0.00045835608089786207]

Iteration No: 20 ended. Search finished for the next optimal point.
Time taken: 0.4642
Function value obtained: -0.0969
Current minimum: -0.1508
Iteration No: 21 started. Searching for the next optimal point.
[20, 857, 20, 4.74612730456383



[20, 1000, 20, 1e-05]

Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.7604
Function value obtained: -0.1508
Current minimum: -0.1508
Iteration No: 30 started. Searching for the next optimal point.
[5, 387, 20, 1.1628870030794489e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.5928
Function value obtained: -0.0975
Current minimum: -0.1508


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.
[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2450
Function value obtained: -0.1263
Current minimum: -0.1263
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1097
Function value obtained: -0.1060
Current minimum: -0.1263
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0363
Function value obtained: -0.0975
Current minimum: -0.1263
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3090
Function value obtained: -0.1190
Current minimum: -0.1263
Iteration No: 5 started. Evaluating function at random point.
[13, 20



[20, 200, 20, 0.001]

Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.7876
Function value obtained: -0.1328
Current minimum: -0.1328
Iteration No: 30 started. Searching for the next optimal point.
[5, 387, 20, 1.1628870030794489e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.6554
Function value obtained: -0.0975
Current minimum: -0.1328


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3467
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1631
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0502
Function value obtained: -0.0822
Current minimum: -0.0875
Iteration No: 4 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.4584
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0471
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 6 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[9, 237, 20, 2.9210748185657167e-05]

Iteration No: 6 ended. Evaluation done at random point.
Time taken: 0.2198
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 7 started. Evaluating function at random point.
[4, 695, 9, 0.0009256818992066885]

Iteration No: 7 ended. Evaluation done at random point.
Time taken: 0.0636
Function value obtained: -0.0838
Current minimum: -0.0875
Iteration No: 8 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[10, 888, 14, 7.961566078062952e-05]

Iteration No: 8 ended. Evaluation done at random point.
Time taken: 0.2049
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 9 started. Evaluating function at random point.
[2, 954, 12, 5.899741796710488e-05]

Iteration No: 9 ended. Evaluation done at random point.
Time taken: 0.0628
Function value obtained: -0.0443
Current minimum: -0.0875
Iteration No: 10 started. Evaluating function at random point.
[2, 385, 6, 0.00023255572624036796]

Iteration No: 10 ended. Evaluation done at random point.
Time taken: 0.7181
Function value obtained: -0.0443
Current minimum: -0.0875
Iteration No: 11 started. Searching for the next optimal point.
[8, 1000, 20, 0.001]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 11 ended. Search finished for the next optimal point.
Time taken: 0.7412
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 12 started. Searching for the next optimal point.
[20, 1000, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 0.6829
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 13 started. Searching for the next optimal point.
[7, 200, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 0.4894
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 14 started. Searching for the next optimal point.
[18, 1000, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 14 ended. Search finished for the next optimal point.
Time taken: 0.4494
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 15 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[14, 971, 19, 1.0069323300900946e-05]

Iteration No: 15 ended. Search finished for the next optimal point.
Time taken: 0.7463
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 16 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[20, 215, 19, 0.0005353455662573544]

Iteration No: 16 ended. Search finished for the next optimal point.
Time taken: 0.8540
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 17 started. Searching for the next optimal point.
[6, 1000, 20, 0.001]

Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 0.5665
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 18 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[12, 910, 18, 0.0009099022917431051]

Iteration No: 18 ended. Search finished for the next optimal point.
Time taken: 0.7176
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 19 started. Searching for the next optimal point.
[16, 962, 3, 0.0007517397610470148]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 19 ended. Search finished for the next optimal point.
Time taken: 0.4908
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 20 started. Searching for the next optimal point.
[20, 947, 3, 0.0009720998977437016]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 20 ended. Search finished for the next optimal point.
Time taken: 0.5417
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 21 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[18, 200, 20, 0.001]

Iteration No: 21 ended. Search finished for the next optimal point.
Time taken: 0.7973
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 22 started. Searching for the next optimal point.
[7, 201, 3, 0.0008936027392403577]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 0.5473
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 23 started. Searching for the next optimal point.
[14, 344, 2, 0.0009659990079768904]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 0.4559
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 24 started. Searching for the next optimal point.
[11, 1000, 3, 1.0923289007471062e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 0.4723
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 25 started. Searching for the next optimal point.
[6, 943, 19, 1.0034250436075407e-05]

Iteration No: 25 ended. Search finished for the next optimal point.
Time taken: 0.5498
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 26 started. Searching for the next optimal point.
[7, 1000, 2, 0.001]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 26 ended. Search finished for the next optimal point.
Time taken: 0.4734
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 27 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[15, 298, 20, 1.2251656298260895e-05]

Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.8061
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 28 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[20, 388, 20, 1.0652949801193552e-05]

Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 0.8249
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 29 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[12, 200, 20, 0.001]

Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.7428
Function value obtained: -0.0875
Current minimum: -0.0875
Iteration No: 30 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[17, 992, 20, 1.1435119086300149e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.8003
Function value obtained: -0.0875
Current minimum: -0.0875


  return self.fit(X, sample_weight=sample_weight).labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.
[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2369
Function value obtained: -0.1243
Current minimum: -0.1243
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1257
Function value obtained: -0.0942
Current minimum: -0.1243
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0346
Function value obtained: -0.0832
Current minimum: -0.1243
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3174
Function value obtained: -0.1334
Current minimum: -0.1334
Iteration No: 5 started. Evaluating function at random point.
[13, 20



[20, 1000, 20, 0.001]

Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 0.8443
Function value obtained: -0.1359
Current minimum: -0.1359
Iteration No: 13 started. Searching for the next optimal point.
[20, 200, 2, 0.001]

Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 0.6062
Function value obtained: -0.1359
Current minimum: -0.1359
Iteration No: 14 started. Searching for the next optimal point.
[20, 1000, 20, 1e-05]

Iteration No: 14 ended. Search finished for the next optimal point.
Time taken: 0.9513
Function value obtained: -0.1359
Current minimum: -0.1359
Iteration No: 15 started. Searching for the next optimal point.
[18, 1000, 2, 1e-05]

Iteration No: 15 ended. Search finished for the next optimal point.
Time taken: 0.5447
Function value obtained: -0.1307
Current minimum: -0.1359
Iteration No: 16 started. Searching for the next optimal point.
[17, 208, 20, 0.0007925772463635299]

Iteration No: 16 ended. Search finish



Iteration No: 26 ended. Search finished for the next optimal point.
Time taken: 0.5293
Function value obtained: -0.1359
Current minimum: -0.1359
Iteration No: 27 started. Searching for the next optimal point.
[20, 200, 20, 0.001]

Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.7464
Function value obtained: -0.1359
Current minimum: -0.1359
Iteration No: 28 started. Searching for the next optimal point.
[11, 356, 20, 0.0009671374764836826]

Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 0.6389
Function value obtained: -0.0898
Current minimum: -0.1359
Iteration No: 29 started. Searching for the next optimal point.
[20, 200, 2, 1e-05]





Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.6143
Function value obtained: -0.1359
Current minimum: -0.1359
Iteration No: 30 started. Searching for the next optimal point.
[19, 317, 2, 1.0595957680645148e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.6179
Function value obtained: -0.1351
Current minimum: -0.1359


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.
[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2491
Function value obtained: -0.0799
Current minimum: -0.0799
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1474
Function value obtained: -0.0817
Current minimum: -0.0817
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0406
Function value obtained: -0.1013
Current minimum: -0.1013
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3436
Function value obtained: -0.0800
Current minimum: -0.1013
Iteration No: 5 started. Evaluating function at random point.
[13, 20

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3004
Function value obtained: -0.0968
Current minimum: -0.0968
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1452
Function value obtained: -0.0868
Current minimum: -0.0968
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0380
Function value obtained: -0.0827
Current minimum: -0.0968
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3916
Function value obtained: -0.0956
Current minimum: -0.0968
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluati

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.
[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.4109
Function value obtained: -0.0887
Current minimum: -0.0887
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1947
Function value obtained: -0.0811
Current minimum: -0.0887
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0453
Function value obtained: -0.0946
Current minimum: -0.0946
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.5815
Function value obtained: -0.0897
Current minimum: -0.0946
Iteration No: 5 started. Evaluating function at random point.
[13, 20

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3001
Function value obtained: -0.0997
Current minimum: -0.0997
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1205
Function value obtained: -0.0997
Current minimum: -0.0997
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0356
Function value obtained: -0.1135
Current minimum: -0.1135
Iteration No: 4 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.4194
Function value obtained: -0.0997
Current minimum: -0.1135
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0429
Function value obtained: -0.0997
Current minimum: -0.1135
Iteration No: 6 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[9, 237, 20, 2.9210748185657167e-05]

Iteration No: 6 ended. Evaluation done at random point.
Time taken: 0.2095
Function value obtained: -0.0997
Current minimum: -0.1135
Iteration No: 7 started. Evaluating function at random point.
[4, 695, 9, 0.0009256818992066885]

Iteration No: 7 ended. Evaluation done at random point.
Time taken: 0.0637
Function value obtained: -0.1174
Current minimum: -0.1174
Iteration No: 8 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[10, 888, 14, 7.961566078062952e-05]

Iteration No: 8 ended. Evaluation done at random point.
Time taken: 0.1755
Function value obtained: -0.0997
Current minimum: -0.1174
Iteration No: 9 started. Evaluating function at random point.
[2, 954, 12, 5.899741796710488e-05]

Iteration No: 9 ended. Evaluation done at random point.
Time taken: 0.0692
Function value obtained: -0.0150
Current minimum: -0.1174
Iteration No: 10 started. Evaluating function at random point.
[2, 385, 6, 0.00023255572624036796]

Iteration No: 10 ended. Evaluation done at random point.
Time taken: 0.5074
Function value obtained: -0.0150
Current minimum: -0.1174
Iteration No: 11 started. Searching for the next optimal point.
[20, 1000, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 11 ended. Search finished for the next optimal point.
Time taken: 0.7246
Function value obtained: -0.0997
Current minimum: -0.1174
Iteration No: 12 started. Searching for the next optimal point.
[7, 1000, 20, 0.001]

Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 0.7354
Function value obtained: -0.0997
Current minimum: -0.1174
Iteration No: 13 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[14, 1000, 20, 1e-05]

Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 0.8438
Function value obtained: -0.0997
Current minimum: -0.1174
Iteration No: 14 started. Searching for the next optimal point.
[17, 1000, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 14 ended. Search finished for the next optimal point.
Time taken: 0.5222
Function value obtained: -0.0997
Current minimum: -0.1174
Iteration No: 15 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[12, 980, 20, 2.5285549028920616e-05]

Iteration No: 15 ended. Search finished for the next optimal point.
Time taken: 0.6718
Function value obtained: -0.0997
Current minimum: -0.1174
Iteration No: 16 started. Searching for the next optimal point.
[3, 984, 18, 0.0009203918257721153]

Iteration No: 16 ended. Search finished for the next optimal point.
Time taken: 0.6928
Function value obtained: -0.1184
Current minimum: -0.1184
Iteration No: 17 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[20, 200, 19, 1e-05]

Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 1.1390
Function value obtained: -0.0997
Current minimum: -0.1184
Iteration No: 18 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[19, 273, 17, 1.0406739971869935e-05]

Iteration No: 18 ended. Search finished for the next optimal point.
Time taken: 0.8920
Function value obtained: -0.0997
Current minimum: -0.1184
Iteration No: 19 started. Searching for the next optimal point.
[8, 1000, 2, 0.0003218108596508764]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 19 ended. Search finished for the next optimal point.
Time taken: 0.6385
Function value obtained: -0.0997
Current minimum: -0.1184
Iteration No: 20 started. Searching for the next optimal point.
[4, 1000, 2, 0.001]

Iteration No: 20 ended. Search finished for the next optimal point.
Time taken: 0.6294
Function value obtained: -0.1174
Current minimum: -0.1184
Iteration No: 21 started. Searching for the next optimal point.
[2, 1000, 15, 0.001]

Iteration No: 21 ended. Search finished for the next optimal point.
Time taken: 0.7427
Function value obtained: -0.0150
Current minimum: -0.1184
Iteration No: 22 started. Searching for the next optimal point.
[6, 201, 5, 1.3998020184515221e-05]

Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 0.6527
Function value obtained: -0.1086
Current minimum: -0.1184
Iteration No: 23 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[18, 258, 20, 0.0007348729659899378]

Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 0.9836
Function value obtained: -0.0997
Current minimum: -0.1184
Iteration No: 24 started. Searching for the next optimal point.
[15, 348, 2, 0.0008821603181675329]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 0.6422
Function value obtained: -0.0997
Current minimum: -0.1184
Iteration No: 25 started. Searching for the next optimal point.
[11, 240, 2, 0.0008584627734777592]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 25 ended. Search finished for the next optimal point.
Time taken: 0.7937
Function value obtained: -0.0997
Current minimum: -0.1184
Iteration No: 26 started. Searching for the next optimal point.
[3, 200, 20, 1e-05]

Iteration No: 26 ended. Search finished for the next optimal point.
Time taken: 0.7698
Function value obtained: -0.1184
Current minimum: -0.1184
Iteration No: 27 started. Searching for the next optimal point.
[3, 200, 20, 1e-05]





Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.7889
Function value obtained: -0.1184
Current minimum: -0.1184
Iteration No: 28 started. Searching for the next optimal point.
[3, 200, 20, 1e-05]





Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 0.9450
Function value obtained: -0.1184
Current minimum: -0.1184
Iteration No: 29 started. Searching for the next optimal point.
[5, 390, 3, 9.400754349195071e-05]

Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.5522
Function value obtained: -0.1135
Current minimum: -0.1184
Iteration No: 30 started. Searching for the next optimal point.
[3, 420, 20, 1.7150570221195156e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.8844
Function value obtained: -0.1184
Current minimum: -0.1184
Iteration No: 1 started. Evaluating function at random point.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2356
Function value obtained: -0.2466
Current minimum: -0.2466
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1102
Function value obtained: -0.1307
Current minimum: -0.2466
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0318
Function value obtained: -0.1006
Current minimum: -0.2466
Iteration No: 4 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3473
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0431
Function value obtained: -0.1305
Current minimum: -0.2466
Iteration No: 6 started. Evaluating function at random point.
[9, 237, 20, 2.9210748185657167e-05]

Iteration No: 6 ended. Evaluation done at random point.
Time taken: 0.2004
Function value obtained: -0.1334
Current minimum: -0.2466
Iteration No: 7 started. Evaluating function at random point.
[4, 695, 9, 0.0009256818992066885]

Iteration No: 7 ended. Evaluation done at random point.
Time taken: 0.0652
Function value obtained: -0.0953
Current minimum: -0.2466
Iteration No: 8 started. Evaluating function at random point.
[10, 888, 14, 7.961566078062952e-05]

Iteration No: 8 ended. Evaluati

  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 0.5380
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 13 started. Searching for the next optimal point.
[18, 1000, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 0.5365
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 14 started. Searching for the next optimal point.
[17, 1000, 2, 1e-05]

Iteration No: 14 ended. Search finished for the next optimal point.
Time taken: 0.4467
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 15 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[20, 596, 13, 0.001]

Iteration No: 15 ended. Search finished for the next optimal point.
Time taken: 0.6435
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 16 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[20, 522, 14, 1e-05]

Iteration No: 16 ended. Search finished for the next optimal point.
Time taken: 0.7910
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 17 started. Searching for the next optimal point.
[19, 1000, 2, 0.001]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 0.5328
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 18 started. Searching for the next optimal point.
[19, 702, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 18 ended. Search finished for the next optimal point.
Time taken: 0.5491
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 19 started. Searching for the next optimal point.
[16, 962, 3, 0.0007517397610470148]

Iteration No: 19 ended. Search finished for the next optimal point.
Time taken: 0.6619
Function value obtained: -0.2466
Current minimum: -0.2466
Iteration No: 20 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[18, 235, 18, 0.0009225057285446044]

Iteration No: 20 ended. Search finished for the next optimal point.
Time taken: 0.8937
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 21 started. Searching for the next optimal point.
[17, 200, 2, 0.001]

Iteration No: 21 ended. Search finished for the next optimal point.
Time taken: 0.6136
Function value obtained: -0.2457
Current minimum: -0.2466
Iteration No: 22 started. Searching for the next optimal point.
[16, 306, 19, 1.1665312222526986e-05]

Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 0.7796
Function value obtained: -0.2466
Current minimum: -0.2466
Iteration No: 23 started. Searching for the next optimal point.
[15, 969, 19, 1.1244681921761391e-05]

Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 0.9195
Function value obtained: -0.2469
Current minimum: -0.2469
Iteration No: 24 started. Searching for the next optimal point.
[15, 348, 2, 0.000882750693

  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.7884
Function value obtained: -0.2457
Current minimum: -0.2469
Iteration No: 28 started. Searching for the next optimal point.
[13, 1000, 20, 1e-05]

Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 1.0019
Function value obtained: -0.1305
Current minimum: -0.2469
Iteration No: 29 started. Searching for the next optimal point.
[20, 200, 2, 0.001]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.5867
Function value obtained: -0.2457
Current minimum: -0.2469
Iteration No: 30 started. Searching for the next optimal point.
[7, 803, 20, 0.0006718910940652978]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.7457
Function value obtained: -0.0910
Current minimum: -0.2469


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.
[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2816
Function value obtained: -0.1025
Current minimum: -0.1025
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1289
Function value obtained: -0.0801
Current minimum: -0.1025
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0415
Function value obtained: -0.0849
Current minimum: -0.1025
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3417
Function value obtained: -0.1043
Current minimum: -0.1043
Iteration No: 5 started. Evaluating function at random point.
[13, 20



Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 0.5196
Function value obtained: -0.1144
Current minimum: -0.1144
Iteration No: 14 started. Searching for the next optimal point.
[20, 200, 2, 0.001]





Iteration No: 14 ended. Search finished for the next optimal point.
Time taken: 0.5441
Function value obtained: -0.1144
Current minimum: -0.1144
Iteration No: 15 started. Searching for the next optimal point.
[20, 974, 2, 0.00012040349990809159]

Iteration No: 15 ended. Search finished for the next optimal point.
Time taken: 0.5568
Function value obtained: -0.1144
Current minimum: -0.1144
Iteration No: 16 started. Searching for the next optimal point.
[7, 638, 2, 1.5949171711358344e-05]

Iteration No: 16 ended. Search finished for the next optimal point.
Time taken: 0.5819
Function value obtained: -0.0707
Current minimum: -0.1144
Iteration No: 17 started. Searching for the next optimal point.
[20, 1000, 20, 1e-05]

Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 1.0252
Function value obtained: -0.1144
Current minimum: -0.1144
Iteration No: 18 started. Searching for the next optimal point.
[15, 200, 2, 4.8772995665798175e-05]

Iteration No: 18 ended. Sear

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2468
Function value obtained: -0.0881
Current minimum: -0.0881
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1183
Function value obtained: -0.0797
Current minimum: -0.0881
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0378
Function value obtained: -0.0828
Current minimum: -0.0881
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3385
Function value obtained: -0.1038
Current minimum: -0.1038
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluati



[20, 1000, 20, 0.001]

Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.9620
Function value obtained: -0.1282
Current minimum: -0.1282
Iteration No: 30 started. Searching for the next optimal point.
[6, 200, 20, 1e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.9028
Function value obtained: -0.0729
Current minimum: -0.1282


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.
[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2677
Function value obtained: -0.0617
Current minimum: -0.0617
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1202
Function value obtained: -0.0618
Current minimum: -0.0618
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0404
Function value obtained: -0.0716
Current minimum: -0.0716
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3449
Function value obtained: -0.0617
Current minimum: -0.0716
Iteration No: 5 started. Evaluating function at random point.
[13, 20

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2771
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1186
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0326
Function value obtained: -0.1139
Current minimum: -0.1512
Iteration No: 4 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3647
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0397
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 6 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[9, 237, 20, 2.9210748185657167e-05]

Iteration No: 6 ended. Evaluation done at random point.
Time taken: 0.2128
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 7 started. Evaluating function at random point.
[4, 695, 9, 0.0009256818992066885]

Iteration No: 7 ended. Evaluation done at random point.
Time taken: 0.0667
Function value obtained: -0.0975
Current minimum: -0.1512
Iteration No: 8 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[10, 888, 14, 7.961566078062952e-05]

Iteration No: 8 ended. Evaluation done at random point.
Time taken: 0.1801
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 9 started. Evaluating function at random point.
[2, 954, 12, 5.899741796710488e-05]

Iteration No: 9 ended. Evaluation done at random point.
Time taken: 0.0616
Function value obtained: -0.0485
Current minimum: -0.1512
Iteration No: 10 started. Evaluating function at random point.
[2, 385, 6, 0.00023255572624036796]

Iteration No: 10 ended. Evaluation done at random point.
Time taken: 0.5840
Function value obtained: -0.0485
Current minimum: -0.1512
Iteration No: 11 started. Searching for the next optimal point.
[20, 1000, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 11 ended. Search finished for the next optimal point.
Time taken: 0.5057
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 12 started. Searching for the next optimal point.
[17, 1000, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 0.4706
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 13 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[12, 1000, 20, 0.001]

Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 0.6587
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 14 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[20, 200, 20, 0.001]

Iteration No: 14 ended. Search finished for the next optimal point.
Time taken: 0.7618
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 15 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[14, 971, 19, 1.0069323300900946e-05]

Iteration No: 15 ended. Search finished for the next optimal point.
Time taken: 0.6894
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 16 started. Searching for the next optimal point.
[18, 886, 3, 0.0009980259194730184]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 16 ended. Search finished for the next optimal point.
Time taken: 0.4743
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 17 started. Searching for the next optimal point.
[15, 200, 2, 0.001]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 0.5212
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 18 started. Searching for the next optimal point.
[9, 894, 2, 0.0007856178671452215]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 18 ended. Search finished for the next optimal point.
Time taken: 0.4584
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 19 started. Searching for the next optimal point.
[11, 949, 3, 1.0030385708573271e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 19 ended. Search finished for the next optimal point.
Time taken: 0.4581
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 20 started. Searching for the next optimal point.
[20, 947, 3, 0.0009720998977437016]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 20 ended. Search finished for the next optimal point.
Time taken: 0.5974
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 21 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[18, 294, 20, 1.108815726880793e-05]

Iteration No: 21 ended. Search finished for the next optimal point.
Time taken: 0.7719
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 22 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[12, 211, 20, 0.0009387538255894772]

Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 0.6742
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 23 started. Searching for the next optimal point.
[8, 906, 20, 1.0219454424313239e-05]

Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 0.6126
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 24 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[16, 952, 20, 0.0008224457858700119]

Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 0.7448
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 25 started. Searching for the next optimal point.
[20, 288, 4, 1.0758872197482023e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 25 ended. Search finished for the next optimal point.
Time taken: 0.4970
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 26 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[10, 1000, 20, 0.001]

Iteration No: 26 ended. Search finished for the next optimal point.
Time taken: 0.6828
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 27 started. Searching for the next optimal point.
[14, 915, 3, 1.0548464451936584e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.4980
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 28 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[20, 917, 20, 1.3922545795613625e-05]

Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 0.7780
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 29 started. Searching for the next optimal point.
[10, 200, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.5303
Function value obtained: -0.1512
Current minimum: -0.1512
Iteration No: 30 started. Searching for the next optimal point.
[18, 929, 2, 0.0007055001005823937]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.5080
Function value obtained: -0.1512
Current minimum: -0.1512


  return self.fit(X, sample_weight=sample_weight).labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Iteration No: 1 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2757
Function value obtained: -0.1008
Current minimum: -0.1008
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1181
Function value obtained: -0.1008
Current minimum: -0.1008
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0349
Function value obtained: -0.1098
Current minimum: -0.1098
Iteration No: 4 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_
  return self.fit(X, sample_weight=sample_weight).labels_


[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3895
Function value obtained: -0.1008
Current minimum: -0.1098
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0407
Function value obtained: -0.1008
Current minimum: -0.1098
Iteration No: 6 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[9, 237, 20, 2.9210748185657167e-05]

Iteration No: 6 ended. Evaluation done at random point.
Time taken: 0.2067
Function value obtained: -0.1008
Current minimum: -0.1098
Iteration No: 7 started. Evaluating function at random point.
[4, 695, 9, 0.0009256818992066885]

Iteration No: 7 ended. Evaluation done at random point.
Time taken: 0.0670
Function value obtained: -0.0964
Current minimum: -0.1098
Iteration No: 8 started. Evaluating function at random point.


  return self.fit(X, sample_weight=sample_weight).labels_


[10, 888, 14, 7.961566078062952e-05]

Iteration No: 8 ended. Evaluation done at random point.
Time taken: 0.1801
Function value obtained: -0.1008
Current minimum: -0.1098
Iteration No: 9 started. Evaluating function at random point.
[2, 954, 12, 5.899741796710488e-05]

Iteration No: 9 ended. Evaluation done at random point.
Time taken: 0.0732
Function value obtained: -0.0485
Current minimum: -0.1098
Iteration No: 10 started. Evaluating function at random point.
[2, 385, 6, 0.00023255572624036796]

Iteration No: 10 ended. Evaluation done at random point.
Time taken: 0.6803
Function value obtained: -0.0485
Current minimum: -0.1098
Iteration No: 11 started. Searching for the next optimal point.
[6, 1000, 2, 0.001]

Iteration No: 11 ended. Search finished for the next optimal point.
Time taken: 0.5275
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 12 started. Searching for the next optimal point.
[6, 200, 20, 1e-05]

Iteration No: 12 ended. Search finished for the 

  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 0.4927
Function value obtained: -0.1008
Current minimum: -0.1179
Iteration No: 14 started. Searching for the next optimal point.
[20, 847, 2, 0.001]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 14 ended. Search finished for the next optimal point.
Time taken: 0.6053
Function value obtained: -0.1008
Current minimum: -0.1179
Iteration No: 15 started. Searching for the next optimal point.
[7, 962, 2, 0.0008817515568449327]

Iteration No: 15 ended. Search finished for the next optimal point.
Time taken: 0.3907
Function value obtained: -0.1089
Current minimum: -0.1179
Iteration No: 16 started. Searching for the next optimal point.
[6, 275, 20, 1.4054538698374402e-05]

Iteration No: 16 ended. Search finished for the next optimal point.
Time taken: 0.6415
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 17 started. Searching for the next optimal point.
[2, 951, 19, 1e-05]

Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 0.7226
Function value obtained: -0.0485
Current minimum: -0.1179
Iteration No: 18 started. Searching for the next optimal point.
[20, 200, 2, 1e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 18 ended. Search finished for the next optimal point.
Time taken: 0.4467
Function value obtained: -0.1008
Current minimum: -0.1179
Iteration No: 19 started. Searching for the next optimal point.
[18, 945, 3, 2.2405314524568526e-05]



  return self.fit(X, sample_weight=sample_weight).labels_


Iteration No: 19 ended. Search finished for the next optimal point.
Time taken: 0.5089
Function value obtained: -0.1008
Current minimum: -0.1179
Iteration No: 20 started. Searching for the next optimal point.
[6, 224, 20, 0.0008832430556219548]

Iteration No: 20 ended. Search finished for the next optimal point.
Time taken: 0.5952
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 21 started. Searching for the next optimal point.
[7, 244, 3, 1.1733789274263828e-05]

Iteration No: 21 ended. Search finished for the next optimal point.
Time taken: 0.4739
Function value obtained: -0.1089
Current minimum: -0.1179
Iteration No: 22 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[14, 979, 19, 2.213472419219798e-05]

Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 0.7695
Function value obtained: -0.1008
Current minimum: -0.1179
Iteration No: 23 started. Searching for the next optimal point.
[6, 994, 20, 0.0006771100929448737]

Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 0.6416
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 24 started. Searching for the next optimal point.


  return self.fit(X, sample_weight=sample_weight).labels_


[12, 872, 20, 0.0009562498837656471]

Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 0.7229
Function value obtained: -0.1008
Current minimum: -0.1179
Iteration No: 25 started. Searching for the next optimal point.
[6, 943, 19, 1.0034250436075407e-05]

Iteration No: 25 ended. Search finished for the next optimal point.
Time taken: 0.5972
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 26 started. Searching for the next optimal point.
[6, 200, 20, 0.001]

Iteration No: 26 ended. Search finished for the next optimal point.
Time taken: 0.5759
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 27 started. Searching for the next optimal point.
[6, 200, 20, 1e-05]





Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.6599
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 28 started. Searching for the next optimal point.
[7, 985, 19, 0.0009657147727760432]

Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 0.6016
Function value obtained: -0.1089
Current minimum: -0.1179
Iteration No: 29 started. Searching for the next optimal point.
[6, 200, 20, 1e-05]





Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.6631
Function value obtained: -0.1179
Current minimum: -0.1179
Iteration No: 30 started. Searching for the next optimal point.
[5, 985, 3, 1.4647634681590077e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.5324
Function value obtained: -0.1098
Current minimum: -0.1179
Iteration No: 1 started. Evaluating function at random point.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[16, 347, 16, 0.0001562069367563987]

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2507
Function value obtained: -0.0995
Current minimum: -0.0995
Iteration No: 2 started. Evaluating function at random point.
[10, 280, 10, 4.649617447336329e-05]

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1090
Function value obtained: -0.0976
Current minimum: -0.0995
Iteration No: 3 started. Evaluating function at random point.
[5, 721, 3, 0.0002779697551526683]

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0369
Function value obtained: -0.0973
Current minimum: -0.0995
Iteration No: 4 started. Evaluating function at random point.
[19, 201, 20, 0.00017177621112338383]

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.3287
Function value obtained: -0.1192
Current minimum: -0.1192
Iteration No: 5 started. Evaluating function at random point.
[13, 206, 2, 0.00011208547084229366]

Iteration No: 5 ended. Evaluati



[20, 200, 20, 0.001]

Iteration No: 26 ended. Search finished for the next optimal point.
Time taken: 0.7724
Function value obtained: -0.1198
Current minimum: -0.1198
Iteration No: 27 started. Searching for the next optimal point.




[20, 200, 20, 1e-05]

Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.7611
Function value obtained: -0.1198
Current minimum: -0.1198
Iteration No: 28 started. Searching for the next optimal point.
[20, 917, 20, 1.3922545795613625e-05]

Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 0.8374
Function value obtained: -0.1198
Current minimum: -0.1198
Iteration No: 29 started. Searching for the next optimal point.




[20, 200, 20, 1e-05]

Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.8857
Function value obtained: -0.1198
Current minimum: -0.1198
Iteration No: 30 started. Searching for the next optimal point.
[11, 985, 2, 4.247523182347324e-05]

Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.5386
Function value obtained: -0.0986
Current minimum: -0.1198


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### Save this datasets

In [None]:
Xtrain["y"] = ytrain.to_numpy()
Xtest["y"] = ytest.to_numpy()

Xtrain.to_csv(PATH + "tests_notebooks/train.csv", index=False)
Xtest.to_csv(PATH + "tests_notebooks/test.csv", index=False)

# Analise features importance

In [None]:
Xtrain = pd.read_csv(PATH + "tests_notebooks/train.csv")
Xtest = pd.read_csv(PATH + "tests_notebooks/test.csv")

In [None]:
ytrain = Xtrain.pop("y")
ytest = Xtest.pop("y")

In [None]:
Xtrain.head()

Unnamed: 0,var1,var2,var3,var4,var5,var6,var7,var8,var9,var10,var11,var12,var13,var14,var15,var16,var17,var18,var19,var20,var21,var22,var23,var24,var25,var26,var27,var28,var29,var30,var31,var32,var33,var34,var35,var36,var37,var38,var39,var40,...,var68,var1_var7,var1_var20,var7_var8,var7_var20,var7_var23,var7_var28,var7_var29,var7_var39,var1_var7_var8,var1_var7_var14,var1_var7_var20,var1_var7_var23,var1_var7_var28,var1_var7_var29,var1_var7_var31,var1_var7_var39,var1_var8_var20,var1_var8_var23,var1_var20_var23,var1_var20_var28,var1_var20_var29,var1_var20_var39,var1_var23_var28,var2_var3_var8,var66_var54,var65_var54,var24_var50,var48_var54,var60_var54,var64_var53,var63_var54,var53_var54,var47_var54,var45_var54,var46_var54,var60_var24,var51_var54,var44_var54,var61_var54
0,4,116,2921,-999,6376,-999,-999,27,-999,-999,-999,-999,1387,13,-999,-999,-999,-999,1573,13,1445,5,1,1,2,-999,0,24,4,1,0,13,3,26,50,0,1,0,4,10,...,0.297794,4,9,4,4,3,3,3,3,9,7,9,7,7,7,4,4,17,17,9,13,12,11,12,0,2,3,7,3,2,6,2,2,2,4,3,1,2,2,18
1,16,53,731,6500,8734,2778,24,18,3,63,29249,6416,2156,17,2,2,2,2,1681,17,2265,0,1,0,1,9,1,20,2,1,0,13,4,20,303,24,7,10,4,0,...,0.169118,199,198,30,21,22,23,21,24,291,289,235,212,242,221,202,239,300,220,199,215,215,252,187,0,0,2,0,0,3,1,3,1,1,1,1,0,1,3,16
2,4,44,821,26208,9007,2381,35,27,1,63,29065,7025,1896,26,71,72,28,5,1573,26,1971,5,3,2,0,7,0,24,4,2,0,17,6,48,227,29,7,0,1,7,...,0.165441,2,2,2,2,2,2,2,4,2,2,2,2,2,2,2,10,2,2,2,2,2,7,2,127,0,2,10,0,3,9,1,1,1,1,1,2,1,0,2
3,4,44,540,23042,9048,994,35,27,3,63,10936,4458,800,26,44,44,10,4,188,26,818,5,3,2,1,7,0,24,4,1,0,13,1,49,490,2,3,0,4,2,...,0.150735,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,34,0,2,10,0,1,1,1,1,1,1,1,3,1,0,15
4,7,124,3137,11117,6231,2453,35,27,2,63,10802,4933,2033,26,2,2,2,2,664,26,2093,3,3,2,2,1,1,24,4,2,0,22,5,19,533,2,1,0,4,14,...,0.165441,7,5,2,2,2,2,2,2,7,6,6,6,6,6,7,6,7,7,5,5,5,5,5,0,0,2,10,0,3,1,3,1,1,1,1,2,1,3,5


# Variable Importance

In [None]:
clf = RandomForestClassifier(max_depth=2, random_state=42)

clf.fit(Xtest, ytest)

res_var_imp = pd.DataFrame({
    "feature": Xtest.columns,
    "var_imp": clf.feature_importances_
})

res_var_imp.sort_values('var_imp', ascending=False)[:10]

Unnamed: 0,feature,var_imp
7,var8,0.120276
6,var7,0.115761
3,var4,0.115735
72,var7_var23,0.060044
71,var7_var20,0.041823
74,var7_var29,0.041676
22,var23,0.0352
99,var53_var54,0.030038
79,var1_var7_var23,0.029003
19,var20,0.028495


# Mutual Information

In [None]:
mutual_info = mutual_info_classif(Xtest, ytest)
res_mutual_info = pd.DataFrame({
    "feature": Xtest.columns,
    "mutual_info": mutual_info
})

res_mutual_info.sort_values('mutual_info', ascending=False)

Unnamed: 0,feature,mutual_info
3,var4,0.141659
70,var7_var8,0.092099
85,var1_var8_var23,0.090754
76,var1_var7_var8,0.087634
84,var1_var8_var20,0.077787
...,...,...
64,var65,0.000000
63,var64,0.000000
60,var61,0.000000
48,var49,0.000000


# Zero Proportion

In [None]:
res_pzeros = pd.DataFrame({
    "feature": Xtest.columns,
    "pzeros": (Xtest.shape[0] - Xtest.astype(bool).sum(axis=0)) / Xtest.shape[0] * 100
})

res_pzeros.sort_values('pzeros', ascending=False)

Unnamed: 0,feature,pzeros
var31,var31,96.341751
var47,var47,95.893321
var51,var51,94.005192
var48,var48,93.061128
var46,var46,88.010385
...,...,...
var61,var61,0.000000
var7_var23,var7_var23,0.000000
var64,var64,0.000000
var65,var65,0.000000


# Null proporcion

In [None]:
res_pnull = pd.DataFrame({
    "feature": Xtest.columns,
    "pnull": 100 - (Xtest.shape[0] - Xtest.replace(-999, np.nan).isnull().sum()) / Xtest.shape[0] * 100
})

res_pnull.sort_values('pnull', ascending=False)

Unnamed: 0,feature,pnull
var65,var65,85.886240
var66,var66,85.886240
var60,var60,45.621902
var10,var10,16.025490
var8,var8,15.553458
...,...,...
var41,var41,0.000000
var40,var40,0.000000
var39,var39,0.000000
var38,var38,0.000000


## Backward Feature Elimination

In [None]:
lasso_newton = LogisticRegression(C=1, penalty="l2", solver='sag', tol = 0.1, random_state=42)
bfs=SequentialFeatureSelector(lasso_newton,
                              direction='backward',
                              scoring='f1',
                              cv=2,
                              n_jobs=-1)
bfs.fit(Xtest, ytest)

SequentialFeatureSelector(cv=2, direction='backward',
                          estimator=LogisticRegression(C=1, random_state=42,
                                                       solver='sag', tol=0.1),
                          n_jobs=-1, scoring='f1')

In [None]:
res_bfs = pd.DataFrame({
    "feature": Xtest.columns,
    "bfs": ["to_keep" if bfs.support_[i] else "to_remove" for i in range(Xtest.columns.shape[0])]
})
res_bfs.sort_values('bfs', ascending=True)

Unnamed: 0,feature,bfs
106,var61_var54,to_keep
87,var1_var20_var28,to_keep
59,var60,to_keep
85,var1_var8_var23,to_keep
84,var1_var8_var20,to_keep
...,...,...
38,var39,to_remove
37,var38,to_remove
36,var37,to_remove
33,var34,to_remove


## Lasso Regularization (L1)

In [None]:
lasso = LogisticRegression(C=1, penalty="l1", solver="liblinear", random_state=314).fit(Xtest, ytest)
lasso_selector = SelectFromModel(lasso, prefit=True, threshold="median")

In [None]:
res_lasso = pd.DataFrame({
    "feature": Xtest.columns,
    "lasso": np.where(lasso_selector.get_support(), "to_keep", "to_remove")
})
res_lasso.sort_values('lasso', ascending=True)

Unnamed: 0,feature,lasso
0,var1,to_keep
68,var1_var7,to_keep
67,var68,to_keep
62,var63,to_keep
54,var55,to_keep
...,...,...
16,var17,to_remove
75,var7_var39,to_remove
76,var1_var7_var8,to_remove
78,var1_var7_var20,to_remove


## RFE

In [None]:
rf = RandomForestClassifier(n_jobs=-1, max_depth=4)
rfe_selector = RFECV(rf, min_features_to_select=20, step=1, n_jobs=1, verbose=1)
rfe_selector.fit(Xtest.values, ytest)

Fitting estimator with 107 features.
Fitting estimator with 106 features.
Fitting estimator with 105 features.
Fitting estimator with 104 features.
Fitting estimator with 103 features.
Fitting estimator with 102 features.
Fitting estimator with 101 features.
Fitting estimator with 100 features.
Fitting estimator with 99 features.
Fitting estimator with 98 features.
Fitting estimator with 97 features.
Fitting estimator with 96 features.
Fitting estimator with 95 features.
Fitting estimator with 94 features.
Fitting estimator with 93 features.
Fitting estimator with 92 features.
Fitting estimator with 91 features.
Fitting estimator with 90 features.
Fitting estimator with 89 features.
Fitting estimator with 88 features.
Fitting estimator with 87 features.
Fitting estimator with 86 features.
Fitting estimator with 85 features.
Fitting estimator with 84 features.
Fitting estimator with 83 features.
Fitting estimator with 82 features.
Fitting estimator with 81 features.
Fitting estimator wi

RFECV(estimator=RandomForestClassifier(max_depth=4, n_jobs=-1),
      min_features_to_select=20, n_jobs=1, verbose=1)

In [None]:
res_rfe = pd.DataFrame({
    "feature": Xtest.columns,
    "rfe": np.where(rfe_selector.support_, "to_keep", "to_remove")
})
res_rfe.sort_values('rfe', ascending=True)

Unnamed: 0,feature,rfe
0,var1,to_keep
22,var23,to_keep
70,var7_var8,to_keep
78,var1_var7_var20,to_keep
71,var7_var20,to_keep
...,...,...
33,var34,to_remove
32,var33,to_remove
31,var32,to_remove
41,var42,to_remove


## Boruta

In [None]:
rf = RandomForestClassifier(n_jobs=-1, max_depth=4)
boruta_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=314)
boruta_selector.fit(Xtest.values, ytest)

Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	107
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	107
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	107
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	107
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	107
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	107
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	107
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	50
Tentative: 	25
Rejected: 	32
Iteration: 	9 / 100
Confirmed: 	50
Tentative: 	25
Rejected: 	32
Iteration: 	10 / 100
Confirmed: 	50
Tentative: 	25
Rejected: 	32
Iteration: 	11 / 100
Confirmed: 	50
Tentative: 	25
Rejected: 	32
Iteration: 	12 / 100
Confirmed: 	51
Tentative: 	18
Rejected: 	38
Iteration: 	13 / 100
Confirmed: 	51
Tentative: 	18
Rejected: 	38
Iteration: 	14 / 100
Confirmed: 	51
Tentative: 	18
Rejected: 	38
Iteration: 	15 / 100
Confirmed: 	51
Tentative: 	18
Rejected: 	38
Iteration: 	16 / 100
Confirmed: 	51
Tenta

BorutaPy(estimator=RandomForestClassifier(max_depth=4, n_estimators=276,
                                          n_jobs=-1,
                                          random_state=RandomState(MT19937) at 0x7F0237B089E0),
         n_estimators='auto',
         random_state=RandomState(MT19937) at 0x7F0237B089E0, verbose=2)

In [None]:
res_boruta = pd.DataFrame({
    "feature": Xtest.columns,
    "boruta": np.where(boruta_selector.support_, "to_keep", "to_remove")
})
res_boruta.sort_values('boruta', ascending=True)

Unnamed: 0,feature,boruta
0,var1,to_keep
79,var1_var7_var23,to_keep
78,var1_var7_var20,to_keep
77,var1_var7_var14,to_keep
76,var1_var7_var8,to_keep
...,...,...
32,var33,to_remove
31,var32,to_remove
30,var31,to_remove
35,var36,to_remove


## Random Column

In [None]:
X_random = pd.concat([Xtest, pd.DataFrame({'random':[np.random.uniform(0.0, 100.0) for i in range(Xtest.shape[0])]})], axis=1)

In [None]:
rf = RandomForestClassifier(n_jobs=-1, max_depth=3)
rf.fit(X_random, ytest)

RandomForestClassifier(max_depth=3, n_jobs=-1)

In [None]:
varip_random = np.float(rf.feature_importances_[X_random.columns=="random"])
print("Random VarImp:", varip_random)

res_rand_var_imp = pd.DataFrame({
    "feature": X_random.columns,
    "rand_var_imp": rf.feature_importances_,
    "rand_var": np.where(rf.feature_importances_ > varip_random, "to_keep", "to_remove")
})
res_rand_var_imp.sort_values('rand_var_imp', ascending=False)

Random VarImp: 0.0


Unnamed: 0,feature,rand_var_imp,rand_var
3,var4,0.256014,to_keep
7,var8,0.154576,to_keep
70,var7_var8,0.043125,to_keep
6,var7,0.041938,to_keep
5,var6,0.033400,to_keep
...,...,...,...
36,var37,0.000000,to_remove
49,var50,0.000000,to_remove
30,var31,0.000000,to_remove
29,var30,0.000000,to_remove


# Compile the results

In [None]:
feature_selection = res_var_imp.\
                    merge(res_mutual_info).\
                    merge(res_pzeros).\
                    merge(res_pnull).\
                    merge(res_bfs).\
                    merge(res_lasso).\
                    merge(res_boruta).\
                    merge(res_rfe).\
                    merge(res_rand_var_imp.drop('rand_var_imp', axis=1))

feature_selection.to_csv(PATH + 'feature_selection.csv', index=False)

In [None]:
feature_selection.style.\
    bar(subset=['var_imp'],color='#205ff2').\
    bar(subset=['mutual_info'],color='#205ff2').\
    background_gradient(subset=['pzeros'],cmap='coolwarm').\
    background_gradient(subset=['pnull'],cmap='coolwarm').\
    
    apply(lambda x: ["background: red" if v == "to_remove" else "" for v in x], axis = 1)

Unnamed: 0,feature,var_imp,mutual_info,pzeros,pnull,bfs,lasso,boruta,rfe,rand_var
0,var1,0.009824,0.014084,1.18008,0.0,to_remove,to_keep,to_keep,to_keep,to_keep
1,var2,0.0,0.002555,0.0,4.153882,to_keep,to_remove,to_keep,to_remove,to_keep
2,var3,0.0,0.006848,0.0,4.153882,to_keep,to_remove,to_keep,to_remove,to_keep
3,var4,0.115735,0.141659,0.0,6.042011,to_remove,to_remove,to_keep,to_keep,to_keep
4,var5,0.0,0.054128,0.0,0.0,to_keep,to_remove,to_keep,to_remove,to_keep
5,var6,0.011526,0.051435,0.0,12.626859,to_keep,to_remove,to_keep,to_keep,to_keep
6,var7,0.115761,0.025738,0.0,12.862875,to_keep,to_remove,to_keep,to_keep,to_keep
7,var8,0.120276,0.053312,0.0,15.553458,to_keep,to_remove,to_keep,to_keep,to_keep
8,var9,0.00105,0.00495,0.0,10.2903,to_keep,to_keep,to_remove,to_remove,to_keep
9,var10,0.0,0.008635,0.0,16.02549,to_remove,to_remove,to_remove,to_remove,to_keep
