# Notebook B: Model Training
This notebook trains 6 algoritms to predict the production rates of 5 outputs of syngas fermentation based on the extracellular metabolite concentration, and gas composition.

### Set up imports

In [1]:
import pandas as pd
import numpy as np
import sklearn.preprocessing, sklearn.neural_network, sklearn.svm, sklearn.ensemble

### Load data that was generated in notebook A

In [2]:
rates_df = pd.read_csv(f'../data/rates_data.csv')
print(f'Shape of the rates data: {rates_df.shape[0]} rows by {rates_df.shape[1]} columns')

Shape of the rates data: 836 rows by 18 columns


### Create train and test sets 

In [3]:
train_data = rates_df[rates_df.composition.isin([1,2,3,4,5,6,7])]
test_data = rates_df[rates_df.composition.isin([8,9,10])]
print(f'Shape of the training data: {train_data.shape[0]} rows by {train_data.shape[1]} columns')
print(f'Shape of the testing data: {test_data.shape[0]} rows by {test_data.shape[1]} columns')

Shape of the training data: 657 rows by 18 columns
Shape of the testing data: 179 rows by 18 columns


### Define a function that generates the input and output arrays for scikit learn's API

In [5]:
def get_X_y_arrays(data):

    # prevent set with copy error
    data_copy = data.copy()
    
    # ML input
    X = data_copy [[
        'biomass (g/L)', 'ethanol (mM)', 'acetate (mM)', 'butanol (mM)', 
         'butyrate (mM)', 'N2', 'CO', 'CO2', 'H2', 'flow rate (mL/min)'
    ]]
    
    # ML output
    y = data_copy [[
        'biomass rate', 'ethanol rate', 'acetate rate', 'butanol rate', 'butyrate rate'
    ]]
 
    return np.array(X), np.array(y)

In [6]:
X_train, y_train = get_X_y_arrays(train_data)
X_test, y_test = get_X_y_arrays(test_data)

print(f'Shape of the train X array: {X_train.shape[0]} rows by {X_train.shape[1]} columns')
print(f'Shape of the trainn y array: {y_train.shape[0]} rows by {y_train.shape[1]} columns')
print(f'Shape of the test X array: {X_test.shape[0]} rows by {X_test.shape[1]} columns')
print(f'Shape of the test y array: {y_test.shape[0]} rows by {y_test.shape[1]} columns')

Shape of the train X array: 657 rows by 10 columns
Shape of the trainn y array: 657 rows by 5 columns
Shape of the test X array: 179 rows by 10 columns
Shape of the test y array: 179 rows by 5 columns


## Train 30 different models (5 outputs each modeled with 6 algorithms)
algorithms = neural network, support vector machine, random forest, support vector, neural net, lasso <br>
outputs = acetate, biomass, butanol, butyrate, ethanol

### Define a functions to generate neural network architectures

In [7]:
def gen_NN_fixed_n_layers(n_layers, n_neurons, neuron_step):
    """Generate NN hidden_layer_sizes of n_layers and up to n_neurons per layer 
    """
    # print (n_layers)
    if n_layers == 1: 
        return [[i] for i in range(neuron_step, n_neurons+1, neuron_step)]
    else:
        pairs =  [  (i,  tail) for tail in gen_NN_fixed_n_layers(n_layers-1, n_neurons+1, neuron_step) for i in range(neuron_step, n_neurons+1, neuron_step) ]
        return [[i]+ t for (i, t) in pairs]

# print (gen_NN_fixed_n_layers(4, 10, 5))

def gen_NN_uni(n_layers, n_neurons, layer_step, neuron_step):
    """Generate hidden layers of various number of layers and number of neurons 
    """ 
    various_NNs = [ gen_NN_fixed_n_layers(i , n_neurons, neuron_step) for i in range(2, n_layers+1, layer_step)]
    return  functools.reduce(operator.add, various_NNs)

### Define a model configuration dictionary to guide ML training

Test grid is used for debugging, should be replaced with full grid

In [8]:
model_cfgs = {
        "nn":{
            'estimator': sklearn.neural_network.MLPRegressor(shuffle=True),
            # Test grid
            'param_grid':   {
                'activation': ['tanh', 'logistic', 'relu'], 
                'max_iter':   [400*i for i in range(1, 2)]
            }
            # Full grid
            # 'param_grid':   {
                # 'hidden_layer_sizes': gen_NN_uni(5, 100, 1, 10),  
                # 'activation':         ['tanh', 'logistic', 'relu'], 
                # 'max_iter':           [400*i for i in range(1, 10, 2)]
            # }                
        },
        "svm_rbf":{
            'estimator': sklearn.svm.SVR(kernel='rbf'),
            # Test grid
            'param_grid':   {
                'C':       [10**i for i in range(-1, 1)], 
                'epsilon': [10**i for i in range(-1, 1)],
            }
                # Full grid
                # 'param_grid':   {
                    # 'C':       [10**i for i in range(-5, 5)], 
                    # 'epsilon': [10**i for i in range(-5, 5)],
                    # 'gamma':   [10**i for i in range(-5, 5)] # gamma gave me an error
            # }
        },
        "rf":{
            'estimator': sklearn.ensemble.RandomForestRegressor(),
            # Test grid
            'param_grid':   {
                'n_estimators': [10*i for i in range(1, 2)],
                'max_depth':     [2*i for i in range(1, 1+1)],
            }
            # Full grid 
            # 'param_grid':   {
                # 'n_estimators': [10*i for i in range(1, 20)],
                # 'max_depth':     [2*i for i in range(20)], 
                # 'max_samples': [0.05*i for i in range(1, 10+1)] # max samples gave me an error
            # }
        },
        'en': {
            'estimator': sklearn.linear_model.ElasticNet(),
            # Test grid
            'param_grid':   {
                'alpha': [0.0001, 0.001, 0.01, 0.1],
                'l1_ratio': [0.1, 1],
            }
            # Full grid 
            # 'param_grid': {
                # 'alpha': [0.0001, 0.001, 0.01, 0.1],
                # 'l1_ratio': [0.1, 1],
            #}
        },
        'lasso': {
            'estimator': sklearn.linear_model.Lasso(),
            # Test grid
            'param_grid':   {
                'alpha': [0.0001, 0.001, 0.01, 0.1],
            }
            # Full grid 
            # 'param_grid':   {
                # 'alpha': [0.0001, 0.001, 0.01, 0.1],
            # }
        },
        'knn': {
            'estimator': sklearn.neighbors.KNeighborsRegressor(),
            # Test grid
            'param_grid':   {
                'algorithm': ['ball_tree', ],
                'leaf_size': [4,5,6],
                'n_neighbors': [2,3,4],
                'weights': ['distance'],
            }
            # Full grid 
            # 'param_grid':   {
                # 'algorithm': [0.0001, 0.001, 0.01, 0.1],
                # 'leaf_size': [4, 5, 6],
                # 'n_neighbors': [2, 3, 4],
                # 'weights': ['distance'],
            # }
        },
    }

### Perform grid search for each output and algorithm

In [9]:
# define a dictionary to hold results for all outputs
trained_model_dictionary = {}

# define a scaler to standardize the input values of all features between 0 and 1
Scaler = sklearn.preprocessing.MinMaxScaler()
X = Scaler.fit_transform(X_train, y_train)

# loop over outputs
for index, output in enumerate(['biomass', 'ethanol', 'acetate', 'butanol', 'butyrate']):
    print(f'{output}\n')
    
    # define a dictionary to hold results for a single output
    trained_models = {} 
    
    # loop over models
    for model_name, model_conf in model_cfgs.items():
        print (model_name)
        
        # define grid search parameters
        search = sklearn.model_selection.GridSearchCV(
            estimator = model_conf["estimator"], 
            param_grid = model_conf["param_grid"], 
            scoring = "r2",
            refit = True,
            cv = sklearn.model_selection.ShuffleSplit(n_splits=10, test_size=0.1, random_state=0), 
            n_jobs=30, # This is a limitation of the server I am using. -gr
            verbose=3
        )

        # output array is a vector of a single output, not 2d array of all outputs
        y_output=y_train[:,index]

        # run grid search
        search.fit(X_train, y_output)
        
        # report results
        print("Best CV score: %0.3f:" % search.best_score_)
        print("Best parameters:",  search.best_params_, '\n')
        
        # save results of each model to a dictionary
        trained_models[model_name] = search 

    # save results from each output to a dictionary
    trained_model_dictionary[output] = trained_models

biomass

nn
Fitting 10 folds for each of 3 candidates, totalling 30 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   4 out of  30 | elapsed:    1.4s remaining:    9.3s
[Parallel(n_jobs=30)]: Done  15 out of  30 | elapsed:    2.2s remaining:    2.2s
[Parallel(n_jobs=30)]: Done  26 out of  30 | elapsed:    3.5s remaining:    0.5s
[Parallel(n_jobs=30)]: Done  30 out of  30 | elapsed:    4.2s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.


Best CV score: 0.470:
Best parameters: {'activation': 'logistic', 'max_iter': 400} 

svm_rbf
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.125:
Best parameters: {'C': 1, 'epsilon': 0.1} 

rf
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Best CV score: 0.319:
Best parameters: {'max_depth': 2, 'n_estimators': 10} 

en
Fitting 10 folds for each of 8 candidates, totalling 80 fits


[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done  21 out of  80 | elapsed:    0.1s remaining:    0.3s
[Parallel(n_jobs=30)]: Done  48 out of  80 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  80 out of  80 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurren

Best CV score: 0.261:
Best parameters: {'alpha': 0.0001, 'l1_ratio': 0.1} 

lasso
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.260:
Best parameters: {'alpha': 0.0001} 

knn
Fitting 10 folds for each of 9 candidates, totalling 90 fits
Best CV score: 0.837:
Best parameters: {'algorithm': 'ball_tree', 'leaf_size': 4, 'n_neighbors': 4, 'weights': 'distance'} 

ethanol

nn
Fitting 10 folds for each of 3 candidates, totalling 30 fits


[Parallel(n_jobs=30)]: Done   4 out of  30 | elapsed:   11.4s remaining:  1.2min
[Parallel(n_jobs=30)]: Done  15 out of  30 | elapsed:   13.0s remaining:   13.0s
[Parallel(n_jobs=30)]: Done  26 out of  30 | elapsed:   14.5s remaining:    2.2s
[Parallel(n_jobs=30)]: Done  30 out of  30 | elapsed:   14.8s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.0s remaining:    0.2s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  10 out of  10 | elapsed:   

Best CV score: 0.895:
Best parameters: {'activation': 'tanh', 'max_iter': 400} 

svm_rbf
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.294:
Best parameters: {'C': 1, 'epsilon': 1} 

rf
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Best CV score: 0.686:
Best parameters: {'max_depth': 2, 'n_estimators': 10} 

en
Fitting 10 folds for each of 8 candidates, totalling 80 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done  21 out of  80 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  48 out of  80 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  80 out of  80 | elapsed:    0.1s finished
  positive)
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.0s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.


Best CV score: 0.552:
Best parameters: {'alpha': 0.001, 'l1_ratio': 0.1} 

lasso
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.552:
Best parameters: {'alpha': 0.01} 

knn
Fitting 10 folds for each of 9 candidates, totalling 90 fits
Best CV score: 0.933:
Best parameters: {'algorithm': 'ball_tree', 'leaf_size': 4, 'n_neighbors': 4, 'weights': 'distance'} 

acetate

nn
Fitting 10 folds for each of 3 candidates, totalling 30 fits


[Parallel(n_jobs=30)]: Done  31 out of  90 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  90 out of  90 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   4 out of  30 | elapsed:   10.9s remaining:  1.2min
[Parallel(n_jobs=30)]: Done  15 out of  30 | elapsed:   12.6s remaining:   12.6s
[Parallel(n_jobs=30)]: Done  26 out of  30 | elapsed:   14.1s remaining:    2.2s
[Parallel(n_jobs=30)]: Done  30 out of  30 | elapsed:   14.3s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.1s remaining:    0.3s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurren

Best CV score: 0.850:
Best parameters: {'activation': 'tanh', 'max_iter': 400} 

svm_rbf
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: -0.043:
Best parameters: {'C': 1, 'epsilon': 1} 

rf
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Best CV score: 0.621:
Best parameters: {'max_depth': 2, 'n_estimators': 10} 

en
Fitting 10 folds for each of 8 candidates, totalling 80 fits
Best CV score: 0.480:
Best parameters: {'alpha': 0.1, 'l1_ratio': 0.1} 

lasso
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.480:
Best parameters: {'alpha': 0.1} 

knn
Fitting 10 folds for each of 9 candidates, totalling 90 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done  21 out of  80 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  48 out of  80 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  80 out of  80 | elapsed:    0.1s finished
  positive)
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.0s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done  31 out of  90 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  90 out of  90 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 c

Best CV score: 0.819:
Best parameters: {'algorithm': 'ball_tree', 'leaf_size': 4, 'n_neighbors': 4, 'weights': 'distance'} 

butanol

nn
Fitting 10 folds for each of 3 candidates, totalling 30 fits


[Parallel(n_jobs=30)]: Done   4 out of  30 | elapsed:    9.6s remaining:  1.0min
[Parallel(n_jobs=30)]: Done  15 out of  30 | elapsed:   12.3s remaining:   12.3s
[Parallel(n_jobs=30)]: Done  26 out of  30 | elapsed:   13.7s remaining:    2.1s
[Parallel(n_jobs=30)]: Done  30 out of  30 | elapsed:   14.2s finished


Best CV score: 0.968:
Best parameters: {'activation': 'tanh', 'max_iter': 400} 

svm_rbf
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.643:
Best parameters: {'C': 1, 'epsilon': 0.1} 

rf
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done  21 out of  80 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  48 out of  80 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  80 out of  80 | elapsed:    0.1

Best CV score: 0.684:
Best parameters: {'max_depth': 2, 'n_estimators': 10} 

en
Fitting 10 folds for each of 8 candidates, totalling 80 fits
Best CV score: 0.743:
Best parameters: {'alpha': 0.0001, 'l1_ratio': 0.1} 

lasso
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.743:
Best parameters: {'alpha': 0.0001} 

knn
Fitting 10 folds for each of 9 candidates, totalling 90 fits


[Parallel(n_jobs=30)]: Done  31 out of  90 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  62 out of  90 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  90 out of  90 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.


Best CV score: 0.979:
Best parameters: {'algorithm': 'ball_tree', 'leaf_size': 4, 'n_neighbors': 2, 'weights': 'distance'} 

butyrate

nn
Fitting 10 folds for each of 3 candidates, totalling 30 fits


[Parallel(n_jobs=30)]: Done   4 out of  30 | elapsed:    9.2s remaining:   59.7s
[Parallel(n_jobs=30)]: Done  15 out of  30 | elapsed:   12.8s remaining:   12.8s
[Parallel(n_jobs=30)]: Done  26 out of  30 | elapsed:   13.7s remaining:    2.1s
[Parallel(n_jobs=30)]: Done  30 out of  30 | elapsed:   14.2s finished


Best CV score: 0.902:
Best parameters: {'activation': 'tanh', 'max_iter': 400} 

svm_rbf
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.650:
Best parameters: {'C': 1, 'epsilon': 0.1} 

rf
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.0s remaining:    0.2s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.1s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  10 out of  10 | elapsed:    0.0s finished


Best CV score: 0.438:
Best parameters: {'max_depth': 2, 'n_estimators': 10} 

en
Fitting 10 folds for each of 8 candidates, totalling 80 fits
Best CV score: 0.406:
Best parameters: {'alpha': 0.0001, 'l1_ratio': 0.1} 

lasso
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Best CV score: 0.406:
Best parameters: {'alpha': 0.0001} 

knn
Fitting 10 folds for each of 9 candidates, totalling 90 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done  21 out of  80 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  48 out of  80 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  80 out of  80 | elapsed:    0.1s finished
  positive)
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   9 out of  40 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  23 out of  40 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  37 out of  40 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=30)]: Done  40 out of  40 | elapsed:    0.0s finished
[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.


Best CV score: 0.918:
Best parameters: {'algorithm': 'ball_tree', 'leaf_size': 4, 'n_neighbors': 4, 'weights': 'distance'} 



[Parallel(n_jobs=30)]: Done  31 out of  90 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=30)]: Done  90 out of  90 | elapsed:    0.1s finished
