In [13]:
import os

import numpy as np
import pandas as pd
from scipy.stats import median_absolute_deviation
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import optuna
from sklearn.preprocessing import StandardScaler
import random
random_state = 1234
from sklearn.metrics import roc_auc_score
import os
from pathlib import Path

In [3]:
parent_dir = Path(os.getcwd()).parent
cross_validation_dir = os.path.join(parent_dir, "Data", "train_test_indices.npy")
data_dir = os.path.join(parent_dir, "Data" , "MSK_Impact_train")
train_test_indices = np.load(cross_validation_dir, allow_pickle = True)

In [4]:
def list_files_in_dir(dirname):
    dir_files = list()
    for root, _, files in os.walk(dirname):
        for file in files:
            dir_files.append(os.path.join(root, file))    
    return dir_files

In [5]:
def build_kneigh_model(n_neighbors, X, y):
    model = KNeighborsClassifier(n_neighbors=n_neighbors)
    model.fit(X, y)
    
    return model

In [6]:
def scale_rows(X):
    return X.div(X.sum(axis=1), axis=0)

In [7]:
def get_row_indices_with_sum_zero(X):
    return X.index[(X.sum(axis=1) == 0)].tolist()

In [8]:
def dataset_generator(data_dir, y_col):

    for datafile in list_files_in_dir(data_dir):
        data = pd.read_csv(datafile)
        X = data.iloc[:, :96]
        y = (data[y_col]).astype(np.int_)
        X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
        X = scale_rows(X)
        yield X, y

In [None]:
def train_val_test_generator(data_dir, num_samples, y_col, test_frac=0.1, n_folds = 10):
    val_frac = test_frac/(1.0 - test_frac)
    
    for X, y in dataset_generator(data_dir, num_samples, y_col):
        fold_data = list()
        skf = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = random_state)
#         print(random_state)
#         skf = StratifiedKFold(n_splits=num_folds)
        
        for train_val_index, test_index in skf.split(X, y):
            X_test, y_test = X.iloc[test_index, :], y.iloc[test_index]
            X_train, X_val, y_train, y_val = train_test_split(
                X.iloc[train_val_index,:], y.iloc[train_val_index], 
                test_size=val_frac, 
                random_state=random_state, 
                stratify=y.iloc[train_val_index]
            )
#             print(train_val_index, test_index)
            
            #X_train, X_val, X_test = scale_columns(X_train, X_val, X_test)
            fold_data.append(((X_train, y_train), 
                              (X_val, y_val), 
                              (X_test, y_test)))
            
        yield fold_data

In [9]:
def train_val_test_generator_default(data_dir, y_col, test_frac=0.1):
    
    val_frac = test_frac/(1.0 - test_frac)
    
    for X, y in dataset_generator(data_dir, y_col):
        fold_data = list()

        
        for train_val_index, test_index in train_test_indices:
            X_test, y_test = X.iloc[test_index, :], y.iloc[test_index]
            X_train, X_val, y_train, y_val = train_test_split(
                X.iloc[train_val_index,:], y.iloc[train_val_index], 
                test_size=val_frac, 
                random_state=random_state, 
                stratify=y.iloc[train_val_index]
            )
#             print(train_val_index, test_index)
            
            #X_train, X_val, X_test = scale_columns(X_train, X_val, X_test)
            fold_data.append(((X_train, y_train), 
                              (X_val, y_val), 
                              (X_test, y_test)))
            
        yield fold_data

In [27]:
def compute_mlp_performance(trial, data_dir=data_dir, n_neighbors=5, n_folds=10, y_col='is_sig3_20'):
    aucs = list()

    for folds_data in train_val_test_generator_default(data_dir, y_col=y_col):
        fold_aucs = list()
        
        for fold_data in folds_data:
            # get data
            (X_train, y_train),(X_val, y_val), (X_test, y_test) = fold_data

            model = build_kneigh_model(trial.suggest_int('n_neighbors', 1, 100), X_train, y_train)

            # evaluate
            fold_aucs.append(roc_auc_score(y_test, model.predict(X_test)))
            
        aucs.append(fold_aucs)
    
    aucs = np.array(aucs)
    
    median_of_median_auc = np.median(np.median(aucs, axis=1))
    mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
#     return median_of_median_auc, median_absolute_deviation(aucs, axis=1)
    return median_of_median_auc

    

In [28]:
study_is_sig3 = optuna.create_study(direction="maximize")
study_is_sig3.optimize(compute_mlp_performance, n_trials=200)

[32m[I 2022-06-27 13:51:36,973][0m A new study created in memory with name: no-name-017c0571-2924-4596-81fb-21e12c627d34[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:37,108][0m Trial 0 finished with value: 0.5 and parameters: {'n_neighbors': 55}. Best is trial 0 with value: 0.5.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserv

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:37,910][0m Trial 6 finished with value: 0.5 and parameters: {'n_neighbors': 74}. Best is trial 1 with value: 0.5126621407241507.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 i

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:38,711][0m Trial 12 finished with value: 0.5632372249005646 and parameters: {'n_neighbors': 4}. Best is trial 12 with value: 0.5632372249005646.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:39,550][0m Trial 18 finished with value: 0.49468085106382975 and parameters: {'n_neighbors': 18}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.


  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:40,390][0m Trial 24 finished with value: 0.5457297767316578 and parameters: {'n_neighbors': 2}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:41,206][0m Trial 30 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:42,007][0m Trial 36 finished with value: 0.5168257653938768 and parameters: {'n_neighbors': 15}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:42,830][0m Trial 42 finished with value: 0.5513324629958027 and parameters: {'n_neighbors': 6}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:43,628][0m Trial 48 finished with value: 0.5 and parameters: {'n_neighbors': 68}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:44,410][0m Trial 54 finished with value: 0.5858662613981762 and parameters: {'n_neighbors': 5}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:45,214][0m Trial 60 finished with value: 0.5 and parameters: {'n_neighbors': 47}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:46,025][0m Trial 66 finished with value: 0.5632372249005646 and parameters: {'n_neighbors': 4}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:46,821][0m Trial 72 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:47,621][0m Trial 78 finished with value: 0.5404706218871989 and parameters: {'n_neighbors': 11}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:48,409][0m Trial 84 finished with value: 0.515348072867591 and parameters: {'n_neighbors': 13}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:49,195][0m Trial 90 finished with value: 0.5 and parameters: {'n_neighbors': 53}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:49,982][0m Trial 96 finished with value: 0.5513324629958027 and parameters: {'n_neighbors': 6}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
Th

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:50,786][0m Trial 102 finished with value: 0.5858662613981762 and parameters: {'n_neighbors': 5}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:51,573][0m Trial 108 finished with value: 0.5 and parameters: {'n_neighbors': 87}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.482

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:52,356][0m Trial 114 finished with value: 0.5453340798805524 and parameters: {'n_neighbors': 9}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:53,161][0m Trial 120 finished with value: 0.5 and parameters: {'n_neighbors': 69}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.482

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:53,952][0m Trial 126 finished with value: 0.5498810190369541 and parameters: {'n_neighbors': 7}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:54,740][0m Trial 132 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:55,535][0m Trial 138 finished with value: 0.5513324629958027 and parameters: {'n_neighbors': 6}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:56,329][0m Trial 144 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:57,128][0m Trial 150 finished with value: 0.5854832332517854 and parameters: {'n_neighbors': 3}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:57,929][0m Trial 156 finished with value: 0.5854832332517854 and parameters: {'n_neighbors': 3}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:58,725][0m Trial 162 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:51:59,507][0m Trial 168 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:52:00,305][0m Trial 174 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:52:01,116][0m Trial 180 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:52:01,898][0m Trial 186 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:52:02,696][0m Trial 192 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
The value 1.4826 is not numerically precise for scaling
with a normal distribution. For a numerically precise value, use
`scipy.stats.median_abs_deviation(..., scale='normal')`.

  mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
[32m[I 2022-06-27 13:52:03,500][0m Trial 198 finished with value: 0.6222960992907802 and parameters: {'n_neighbors': 1}. Best is trial 16 with value: 0.6222960992907802.[0m
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
  X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')

To preserve the existing default behavior, use
`scipy.stats.median_abs_deviation(..., scale=1/1.4826)`.
T

In [24]:
print(study_is_sig3.best_trial)

FrozenTrial(number=12, values=[0.6110182088442958], datetime_start=datetime.datetime(2022, 6, 27, 13, 48, 19, 990363), datetime_complete=datetime.datetime(2022, 6, 27, 13, 48, 20, 140959), params={'n_neighbors': 1}, distributions={'n_neighbors': IntUniformDistribution(high=100, low=1, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=12, state=TrialState.COMPLETE, value=None)


In [21]:
!conda env export | grep -v "^prefix: " > environment.yml