In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Libraries

In [None]:
import os
import skimage
import matplotlib.pyplot as plt
import cv2 as cv
import numpy as np
import gc
from tqdm import tqdm
import pickle
import copy

#Model creation
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
import scipy
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics
from sklearn.metrics import f1_score
from sklearn.svm import LinearSVC
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import matthews_corrcoef, make_scorer

#Oversamplig for unbalance
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE, ADASYN, SMOTENC
from imblearn.pipeline import Pipeline as Pipelineim
from sklearn.base import BaseEstimator, TransformerMixin

# Functions

In [None]:
#Global dictionaries
mag_dict = {0:'40',1:'100',2:'200',3:'400'}
tt_dict = {0:'train',1:'test'}

## Pickle-based functions

In [None]:
def train_test_from_pickle(category, fold, mag):
  """
  Loads feature matrices, enpoints or patient's ID for each folder and magnification.
  It returns the train and test arrays of the selected category
  
  :param category: Category of the data (feature matrices (X), Enpoints (y) or patient's ID)
  :param f: Fold
  :param mag: Magnification
  :return: two np.arrays, train and test
  """
  path_train = f'/content/drive/MyDrive/Ars_machinae_autodiscentis/Inceptum/fold{fold+1}/train/{category}_f{fold+1}_train_{mag_dict[mag]}x_fv.p'
  path_test = f'/content/drive/MyDrive/Ars_machinae_autodiscentis/Inceptum/fold{fold+1}/test/{category}_f{fold+1}_test_{mag_dict[mag]}x_fv.p'
  with open(path_train,'rb') as handle:
    train_array = pickle.load(handle)
  with open(path_test, 'rb') as handle:
    test_array = pickle.load(handle)
  
  return train_array, test_array

def read_files_pickle(extractor, fold, mag): #Name given to mimic csv files function
  """
  Given the feature extractor name, folder nad magnifications, it returns the train-test split for the three categories (X, y, Patient ID)

  :param extractor: feature extractor name. e.g. extractor='GLCM'
  :param fold: fold -1
  :param mag: magnification following the mag_dicitonary indexing
  :return: train-test split
  """
  X_train, X_test = train_test_from_pickle(category=extractor, fold=fold, mag=mag)
  y_train, y_test = train_test_from_pickle(category='endpoints', fold=fold, mag=mag)
  ID_train, ID_test = train_test_from_pickle(category='ID', fold=fold, mag=mag)

  return X_train, X_test, y_train, y_test, ID_train, ID_test

def remove_correlated(X_train, X_test, max_corr):
  """
  Removes high correlated features. Input can be np. array of pandas df, but output will be pandas df.
  :param X_train: training feature matrix
  :param X_test: testing feature matrix
  :param corr: Maximum correlation accepted ebtween features.
  :return: Training and test un correlated feature matrices as dataframe
  """
  X_train = pd.DataFrame(X_train)
  X_test = pd.DataFrame(X_test)
  cor_matrix = X_train.corr().abs()
  upper_tri = cor_matrix.where(np.triu(np.ones(cor_matrix.shape),k=1).astype(bool))
  to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > max_corr)]
  X_train_nocorr = X_train.drop(X_train[to_drop], axis=1)
  X_test_nocorr = X_test.drop(X_train[to_drop], axis=1)
  
  return X_train_nocorr, X_test_nocorr

def machine_learning_pipeline(X_train, X_test, y_train, y_test, ID_test, pipe, max_corr=1):
  """
  Main machine_learning pipeline.It receives the data (feature matrices, label and patients' ID) and produces the csv with the comparison between real and predicted labes. The model is also returned.
  
  :param X_train, X_test...etc: feature matrices, labels and patients' ID. X matrices are dataframes, the rest are np.arrays
  :param pipe: Pipeline defined using sklearn
  :param max_corr: Maximum correlation accepted among features
  :return: dataframe with the ID, prediction and real label, and model used
  """
  print(f'Input features: {X_train.shape[1]}\n')
  X_train_nocorr, X_test_nocorr = remove_correlated(X_train, X_test, max_corr=max_corr) #Remove correlated features from X_train and X_test
  print(f'No-correlated features: {X_train_nocorr.shape[1]}\n')
  model = pipe.fit(X_train_nocorr, y_train.ravel()) #Model fit
  y_pred = model.predict(X_test_nocorr) #Prediction

  y_pred = pd.DataFrame(y_pred) # Dataframe of predictions
  y_pred.columns = ['y_pred']
  y_test = pd.DataFrame(y_test) # Dataframe of test labels
  y_test.columns = ['y_test']
  df_comparison = pd.DataFrame(ID_test).copy() # Copy the main dataframe with Patient ID's for test set.
  df_comparison.columns = ['ID'] # Naming the column in df_p_test
  df_comparison["y_test"]=y_test["y_test"] # Adding the dataframe y_test
  df_comparison["y_pred"]=y_pred["y_pred"] # Adding the dataframe y_pred.
  df_comparison['comparison'] = np.where(df_comparison['y_test'] == df_comparison['y_pred'], 1, 0) # Adding the comparison coloumn, where y_pred==y_test, it's true.

  return df_comparison, model

def patient_score_beta(df_comparison):
  """
  Computes patient score based on the patients' ID as list. An extra list with the patient score and the ID as tuple is given to analyse individual response.
  :param df_comparison: Dataframe with 3 columns: ID, prediction and real label
  :return: two list with the pscores and pscores with ID
  """
  p_score_list = [] #List with the patient scores
  p_score_ID_list = [] #List with the patient score and IDs

  for d in df_comparison.groupby('ID'): # Groupby the patients based on their Id's
    true_counts=d[1].comparison.sum() # Creating Nrec value. 
    num_img=d[1].ID.value_counts(dropna=False)[0] #Creating Np value.
    p_score=true_counts/num_img # For each index, calculate p score, Nrec/Np
    p_score_list.append(p_score) #Append at the end of the patient score list
    p_score_ID_list.append([p_score,d[0]]) #Append at the end of the ID list
  
  return p_score_list, p_score_ID_list

def performance_metrics(df_comparison):
  """
  Here the performance metrics are computed. Currently patient score and recognition rate are the basis.
  Image-wise accuracy has also been added.
  :param df_comparison: Dataframe with 3 columns: ID, prediction and real label
  :return: performance metrics
  """
  p_score_list, p_score_ID_list = patient_score_beta(df_comparison) #Get patient scores
  rec_rate = np.mean(p_score_list) #Get recognition rate
  acc = df_comparison.comparison.mean()

  return p_score_list, p_score_ID_list, rec_rate, acc

def get_problematic_patients(p_score_ID_list, min_score=0.5):
  """
  Given a pscore with patients ID, the ID and performance of problematic (pscore<min_score) patients os given.
  :param p_score_ID_list: list of pscores and IDs
  :return: list of problematic patients
  """
  problem_list = []
  for p in p_score_ID_list:
    if p[0]<0.5:
      problem_list.append(p)
  return problem_list

def model2performance_metrics(extractor='dense', read_files_type=read_files_pickle, pipe=SVC(), max_corr=1, fold=0, mag=0):
  """
  Input ML model settings, as well as folder to extract
  :param extractor: name of extractor of features used
  :param read_files_type: type of file to be read (csv, pickle)
  :param pipe: Pipeline of the classification method.
  :param max_corr: Maximum correlation allowed between features
  :param fold, mag: known
  :return: metrics of the model + model. Additionally, problematic patients are displayed.
  """
  X_train, X_test, y_train, y_test, _, ID_test = read_files_type(extractor=extractor, fold=fold, mag=mag) #Split data
  df_comparison, model = machine_learning_pipeline(X_train, X_test, y_train, y_test, ID_test, pipe=pipe, max_corr=max_corr) #Main pipeline. Obtain comparison of labels
  p_score_list, p_score_ID_list, rec_rate, acc = performance_metrics(df_comparison) #Get performance metrics
  p_problem = get_problematic_patients(p_score_ID_list)

  print(f'-For fold {fold+1} and magnification {mag_dict[mag]}:\n') #Print metrics
  print(f'Recognition rate: {rec_rate}')
  print(f'Image-wise accuracy: {acc}')
  print(f'Problematic patients: {p_problem}\n') #Show problematic patients' ID and score

  return p_score_list, p_score_ID_list, rec_rate, acc, model

def result_all_folders(extractor, read_files_type, pipe, max_corr=1, mag=0):
  """
  All 5 folds are run for the same magnification. The extractor name, pipe definition as well as the max_correaltion have to be given
  :param extractor: name of extractor of features used
  :param read_files_type: type of file to be read (csv, pickle)
  :param pipe: Pipeline of the classification method.
  :param max_corr: Maximum correlation allowed between features
  :param mag: known
  :return: NONE
  """
  all_rec_rates = np.zeros(5) #To save recognition rates
  for fold in range(5):
    p_score_list, p_score_ID_list, rec_rate, acc, model = model2performance_metrics(extractor, read_files_type, pipe=pipe, max_corr=max_corr, fold=fold, mag=mag) #Fold-wise learning method
    all_rec_rates[fold] = rec_rate #save recognition rate
    if hasattr(model,'best_estimator_'):
      print(f'Grid best estimator: {model.best_estimator_}\n') #Print the best estimator hyperparameters of the grid search
      print(f'Best estimator number of Principal COmponents: {model.best_estimator_.named_steps["reductor"].explained_variance_ratio_.shape[0]}\n')
      print('############\n')

  print(f'---- Mean recognition rate for magnification {mag_dict[mag]}x: {all_rec_rates.mean()}') #Mean recognition rate (final metric)
  return model

## Multiclass function

In [None]:
def dataframe_unifier(extractor, read_files_type,fold, mag):
  """
  Given a feature extractor name and specifi fold and magnification, returns the train and test dataframes of the whole data information.
  This information is ID, X, and the two endpoints (binary and multiclass)
  :param extractor: name of feature extracted
  :param fold and mag: fold and mag used
  :return: two dataframes with all this information put together.
  """

  X_train, X_test, y_train, y_test, ID_train, ID_test = read_files_type(extractor=extractor, fold=fold, mag=mag) #Read files
  #Ensure extracted files are numpy and give proper shape
  X_train = np.array(X_train)
  X_test = np.array(X_test)
  y_train = np.array(y_train).reshape(-1,1)
  y_test = np.array(y_test).reshape(-1,1)
  ID_train = np.array(ID_train)
  ID_test = np.array(ID_test)
  #Continue
  multi_train, multi_test = train_test_from_pickle(category='endpoints_multi', fold=fold, mag=mag) #Read multi-endpoints
  train_df = pd.DataFrame(np.concatenate((ID_train,X_train,y_train,multi_train),axis=1)) #Unify matrices in DataFrame
  test_df = pd.DataFrame(np.concatenate((ID_test,X_test,y_test,multi_test),axis=1)) #Unify matrices in DataFrame


  mapping = {train_df.columns[0]: 'ID', train_df.columns[-2]: 'Binary', train_df.columns[-1]: 'Multi'} #New columns names
  train_df = train_df.rename(columns=mapping) #Rename
  test_df = test_df.rename(columns=mapping) #Rename

  return train_df, test_df

def binary_dataloader(extractor, read_files_type, fold, mag, bm):
  """
  Give the train and test split for a specific feature extractor, fold, maginificaiton and type of binary problem (benignant or malignant)
  :return: Data split
  """
  train, test = dataframe_unifier(extractor=extractor, read_files_type=read_files_type, fold=fold, mag=mag) #Get train and test dataframes
  #Train columns
  binary_train = train[train['Binary']==bm]
  X_train = binary_train.iloc[:,1:-2].astype('float').reset_index(drop=True)
  y_train = binary_train.iloc[:,-1].astype('int32').reset_index(drop=True)
  #if bm==1 : y_train = y_train - 4 #Move endpoints to zero if malignant
  ID_train = binary_train.iloc[:,0].reset_index(drop=True)
  #Test columns
  binary_test = test[test['Binary']==bm]
  X_test = binary_test.iloc[:,1:-2].astype('float').reset_index(drop=True)
  y_test = binary_test.iloc[:,-1].astype('int32').reset_index(drop=True)
  #if bm==1 : y_test = y_test - 4
  ID_test = binary_test.iloc[:,0].reset_index(drop=True)

  return X_train, X_test, y_train, y_test, ID_train, ID_test

#Define identity transformation
class IdentityTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, input_array, y=None):
        return self
    
    def transform(self, input_array, y=None):
        return input_array*1

def classifier_and_grid_2(X, y, ID, method, grid='normal',verbose=1, reductor_components=[1],scoring='accuracy', sampler = SMOTE()):
  """
  Definition of the classifier grid method for the hyperparameters search.
  :param method: learning method name (SVM, random forest, etc.)
  :param grid: type of grid search given (random or normal)
  """
  #CV
  gkf = list(StratifiedGroupKFold(n_splits=4).split(X,y,groups=ID))
  #Decision function for SVM
  decision_function = ['ovr']
  #Scorer
  scoring = make_scorer(matthews_corrcoef) if scoring=='MCC' else None
  #Sampler for unbalanced data
  
  #Random iterations
  n_iter = 30

  #KNN
  if(method=='KNN'):
    param_grid = {'classifier__n_neighbors': list(range(1,30))}
    pipe = Pipelineim([('scaler', StandardScaler()),('sampler',sampler),('classifier',KNeighborsClassifier())])
    grid = GridSearchCV(pipe, param_grid, scoring=scoring, cv = gkf,verbose = verbose)
    return grid, gkf
  #KNN with PCA
  elif(method=='KNN_PCA'):
    param_grid = {'reductor__n_components': reductor_components,'classifier__n_neighbors': list(range(1,30))}
    pipe = Pipelineim([('scaler', StandardScaler()),('reductor',PCA()), ('sampler',sampler), ('classifier',KNeighborsClassifier())])
    grid = GridSearchCV(pipe, param_grid, scoring=scoring, cv = gkf,verbose = verbose)
    return grid, gkf

  #SVM with PCA
  if(method=='SVM_PCA'):
    if(grid=='normal'):
      param_grid = {'reductor__n_components': reductor_components,'classifier__C': [0.1, 1, 10, 100],
              'classifier__gamma': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001],
              'classifier__kernel': ['rbf'], 'classifier__class_weight':['balanced']}
      pipe = Pipelineim([('scaler', StandardScaler()),('reductor',PCA()), ('sampler',sampler), ('classifier',SVC())])
      grid = GridSearchCV(pipe, param_grid, scoring=scoring, cv = gkf ,verbose = verbose)
      return grid, gkf
    elif(grid=='random'):
      parameters = {'reductor__n_components': reductor_components,'classifier__C': scipy.stats.expon(scale=10), 'classifier__gamma': scipy.stats.expon(scale=.001), #Parameters for grid search
      'classifier__kernel': ['rbf'], 'classifier__class_weight':['balanced']}
      pipe = Pipelineim([('scaler', StandardScaler()),('reductor',PCA()), ('sampler',sampler), ('classifier',SVC())]) #Definition of pipeline
      grid = RandomizedSearchCV(pipe, parameters,n_iter=n_iter, scoring=scoring, cv = gkf , verbose=verbose,return_train_score=False) #Random search
      return grid, gkf
  #SVM no PCA
  elif(method=='SVM'):
    if(grid=='normal'):
      param_grid = {'classifier__C': [0.1, 1, 10, 100],
              'classifier__gamma': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001],
              'classifier__kernel': ['rbf'],'classifier__class_weight':['balanced']}
      pipe = Pipelineim([('scaler', StandardScaler()), ('sampler',sampler), ('classifier',SVC())])
      grid = GridSearchCV(pipe, param_grid, scoring=scoring, cv = gkf ,verbose = verbose)
      return grid, gkf
    elif(grid=='random'):
      parameters = {'classifier__C': scipy.stats.expon(scale=10), 'classifier__gamma': scipy.stats.expon(scale=.001), #Parameters for grid search
      'classifier__kernel': ['rbf'],'classifier__class_weight':['balanced']}
      pipe = Pipelineim([('scaler', StandardScaler()), ('sampler',sampler),('classifier',SVC())]) #Definition of pipeline
      grid = RandomizedSearchCV(pipe, parameters,n_iter=n_iter, scoring=scoring, cv = gkf , verbose=verbose,return_train_score=False) #Random search
      return grid, gkf


  #Random forest
  elif(method=='RF'):
    param_grid = {'classifier__n_estimators': [100, 200, 400, 600, 800],}
    pipe = Pipelineim([('scaler', StandardScaler()), ('sampler',sampler),('classifier',RandomForestClassifier())])
    grid = GridSearchCV(pipe, param_grid, scoring=scoring, cv = gkf, verbose = verbose)
    return grid, gkf
  #Random forest with PCA
  elif(method=='RF_PCA'):
    param_grid = {'reductor__n_components': reductor_components,'classifier__n_estimators': [100, 200, 400, 600, 800],}
    pipe = Pipelineim([('scaler', StandardScaler()),('reductor',PCA()), ('sampler',sampler),('classifier',RandomForestClassifier())])
    grid = GridSearchCV(pipe, param_grid, scoring=scoring, cv = gkf, verbose = verbose)
    return grid, gkf

def model2performance_metrics_multi(extractor='dense', method = 'SVM', grid = 'random', verbose = 1, reductor_components = [1], scoring = 'MCC', sampler = SMOTE(), max_corr=0.99, fold=0, mag=0, bm=0, multi_label=0):
  """
  Input ML model settings, as well as folder to extract
  :param extractor: name of extractor of features used
  :param read_files_type: type of file to be read (csv, pickle)
  :param pipe: Pipeline of the classification method.
  :param max_corr: Maximum correlation allowed between features
  :param fold, mag: known
  :return: metrics of the model + model. Additionally, problematic patients are displayed.
  """

  #Data split
  X_train, X_test, y_train, y_test, ID_train, ID_test = binary_dataloader(extractor=extractor, read_files_type = read_files_pickle, fold=fold, mag=mag, bm=bm) #Split data in train and test
  #Multiclass labels one vs all
  y_train_one = y_train==multi_label
  y_test_one = y_test==multi_label

  #Pipeline definition and 
  pipe, gfk = classifier_and_grid_2(X_train, y_train_one, ID_train, method=method, grid=grid, verbose=verbose, reductor_components=reductor_components,scoring=scoring, sampler = sampler) #Pipe is defined after data loading because it is needed for the split
  df_comparison, model = machine_learning_pipeline(X_train, X_test, y_train_one, y_test_one, ID_test, pipe, max_corr=1)

  p_score_list, p_score_ID_list, rec_rate, acc = performance_metrics(df_comparison) #Get performance metrics
  p_problem = get_problematic_patients(p_score_ID_list)

  print(f'-For fold {fold+1} and magnification {mag_dict[mag]}:\n') #Print metrics
  print(f'Recognition rate: {rec_rate}')
  print(f'Image-wise accuracy: {acc}')
  print(f'Problematic patients: {p_problem}\n') #Show problematic patients' ID and score

  return p_score_list, p_score_ID_list, rec_rate, acc, model

def result_all_folders_strat(extractor = 'dense', method = 'SVM', grid = 'normal', verbose = 1, reductor_components = [1], scoring = 'accuracy', sampler = IdentityTransformer(), max_corr=0.99, mag=0, bm=0, multi_label=0):
  """
  All 5 folds are run for the same magnification. The extractor name, pipe definition as well as the max_correaltion have to be given
  :param extractor: name of extractor of features used
  :param read_files_type: type of file to be read (csv, pickle)
  :param pipe: Pipeline of the classification method.
  :param max_corr: Maximum correlation allowed between features
  :param mag: known
  :return: NONE
  """
  all_rec_rates = np.zeros(5) #To save recognition rates
  for fold in range(5):
    p_score_list, p_score_ID_list, rec_rate, acc, model = model2performance_metrics_multi(extractor=extractor, method = method, grid = grid, verbose = verbose, reductor_components = reductor_components, scoring = scoring, sampler = sampler, max_corr=0.99, fold=fold, mag=mag, bm=bm, multi_label=multi_label) #Fold-wise learning method
    all_rec_rates[fold] = rec_rate #save recognition rate
    if hasattr(model,'best_estimator_'):
      print(f'Grid best estimator: {model.best_estimator_}\n') #Print the best estimator hyperparameters of the grid search
      #print(f'Best estimator number of Principal Components: {model.best_estimator_.named_steps["reductor"].explained_variance_ratio_.shape[0]}\n')
      print('############\n')
  rec_rate_mean = all_rec_rates.mean()
  rec_rate_std = all_rec_rates.std()
  print(f'---- Mean recognition rate for magnification {mag_dict[mag]}x: {rec_rate_mean}') #Mean recognition rate (final metric)
  return rec_rate_mean, rec_rate_std

# Model training

Train binary models for each main binary class (benign-malignant) and the histopatological subtype

In [None]:
bm = 1 #Which binary class
multi_label = 5 #Which subtype #For benign 0-3, for malignant 4-7

super_multi = np.zeros((12,9),dtype='object') #Matrix containing all results
i = 0 #Matrix row counter
for extractor in ['dense']:
  for method in ['RF','RF_PCA']:
    for sampler in [SMOTE(),IdentityTransformer()]:
      
      #Magnification level model comparison
      rec_rate_all = np.zeros((4,2)) #Row is magnification, column is mean and standard deviation
      for mag in [0,1,2,3]:

        grid = 'random'
        verbose = 1
        reductor_components = [0.95] #For PCA only
        scoring = 'MCC'
        max_corr = 0.99

        #Predict for magnification
        rec_rate_all[mag,:] = result_all_folders_strat(extractor = extractor, method = method, grid = grid, verbose = verbose, reductor_components = reductor_components, scoring = scoring, sampler =sampler, max_corr=max_corr, mag = mag, bm=bm, multi_label=multi_label)
      super_multi[i,1:] = rec_rate_all.ravel()
      super_multi[i,0] = extractor+'_'+method+'_'+str(sampler)
      i = i+1
with open(f'/content/drive/MyDrive/Ars_machinae_autodiscentis/Inceptum/Machine_Learning/Results/super_multi_{bm}_{multi_label}.p','wb') as handle:
  pickle.dump(super_multi, handle, pickle.HIGHEST_PROTOCOL)

# Extracting best models

In [None]:
#Matrix where we save the best models for multiclasses
best_pred_multi = np.zeros((4,2,2,4),dtype='object')

In [None]:
fold = 0
#Unchangable settings
extractor = 'dense'
grid = 'random'
reductor_components = [0.95, 0.99]
scoring = 'MCC'
verbose = 3

#Settings to change
mag = 3
bm = 1
multi_label = 0
method = 'RF'
sampler = IdentityTransformer()
max_corr = 0.99


p_score_list, p_score_ID_list, rec_rate, acc, model = model2performance_metrics_multi(extractor=extractor, method = method, grid = grid, verbose = verbose, reductor_components = reductor_components, scoring = scoring, sampler = sampler, max_corr=0.99, fold=fold, mag=mag, bm=bm, multi_label=multi_label+4) #Fold-wise learning method
best_pred_multi[mag, bm, :,multi_label] = [model,rec_rate]

Input features: 1024

No-correlated features: 1024

Fitting 4 folds for each of 5 candidates, totalling 20 fits
[CV 1/4] END ......classifier__n_estimators=100;, score=0.517 total time=   1.3s
[CV 2/4] END ......classifier__n_estimators=100;, score=0.469 total time=   1.3s
[CV 3/4] END .....classifier__n_estimators=100;, score=-0.160 total time=   1.3s
[CV 4/4] END ......classifier__n_estimators=100;, score=0.268 total time=   1.3s
[CV 1/4] END ......classifier__n_estimators=200;, score=0.569 total time=   2.6s
[CV 2/4] END ......classifier__n_estimators=200;, score=0.440 total time=   2.5s
[CV 3/4] END .....classifier__n_estimators=200;, score=-0.041 total time=   2.5s
[CV 4/4] END ......classifier__n_estimators=200;, score=0.188 total time=   2.5s
[CV 1/4] END ......classifier__n_estimators=400;, score=0.479 total time=   5.2s
[CV 2/4] END ......classifier__n_estimators=400;, score=0.509 total time=   5.0s
[CV 3/4] END .....classifier__n_estimators=400;, score=-0.107 total time=   5.

In [None]:
with open('/content/drive/MyDrive/Ars_machinae_autodiscentis/Inceptum/Machine_Learning/Results/Best_models/Best_pred_multi_2.p','wb') as handle:
  pickle.dump(best_pred_multi, handle, pickle.HIGHEST_PROTOCOL)