In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,recall_score,confusion_matrix, cohen_kappa_score, precision_score, recall_score
from sklearn.metrics import precision_recall_curve, auc, average_precision_score, f1_score, roc_auc_score, roc_curve
import pandas as pd
import math
from joblib import dump, load
import random
import pickle
import scipy.stats
from PIL import Image
import scipy.stats as stats

from Functions import post_processing
from Functions import general_functions
from Functions import feature_creation

from skimage.restoration import denoise_bilateral

import operator

In [2]:
zone_1_path = "dataset/zone_4.pickle"
zone_2_path = "dataset/zone_6.pickle"
zone_3_path = "dataset/zone_9.pickle"

In [3]:
file = open(zone_1_path, "rb")
zone_1 = pickle.load(file)

file = open(zone_2_path, "rb")
zone_2 = pickle.load(file)

file = open(zone_3_path, "rb")
zone_3 = pickle.load(file)

file = None

In [4]:
#zone_1_resampled = general_functions.create_balanced_dataset([zone_1_path])
#zone_2_resampled = general_functions.create_balanced_dataset([zone_2_path])
#zone_3_resampled = general_functions.create_balanced_dataset([zone_3_path])

#with open("dataset/zone_1_resampled.pickle", "wb") as file:
#    pickle.dump(zone_1_resampled, file)
#with open("dataset/zone_2_resampled.pickle", "wb") as file:
#    pickle.dump(zone_2_resampled, file)
#with open("dataset/zone_3_resampled.pickle", "wb") as file:
#    pickle.dump(zone_3_resampled, file)

In [None]:
with open("dataset/zone_1_resampled.pickle", "rb") as file:
    zone_1_resampled = pickle.load(file)
with open("dataset/zone_2_resampled.pickle", "rb") as file:
    zone_2_resampled = pickle.load(file)
with open("dataset/zone_3_resampled.pickle", "rb") as file:
    zone_3_resampled = pickle.load(file)

In [5]:
zone_1.shape

(7852140, 82)

In [8]:
experiment_arr = [(zone_1_resampled, pd.concat([zone_2, zone_3], ignore_index=True)),
                  (zone_2_resampled, pd.concat([zone_1, zone_3], ignore_index=True)),
                  (zone_3_resampled, pd.concat([zone_1, zone_2], ignore_index=True))]

In [9]:
zone_1 = None
zone_2 = None
zone_3 = None
zone_1_resampled = None
zone_2_resampled = None
zone_3_resampled = None

In [7]:
most_important_features = zone_1.columns.tolist()[1:]
for num_features in [81, 70, 60, 50, 40]:
    
    most_important_features = most_important_features[:num_features]
    print(f"Features used in experiment:\n{most_important_features}")
    print("\n")
    feature_importances = {i:0 for i in most_important_features}
    y_test_all = []
    pred_all = []
    
    
    for (training_dataset, test_dataset) in experiment_arr:

        X_train = training_dataset.filter(items=most_important_features).loc[:, training_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_train = training_dataset["label_3m"]
        training_dataset = None

        _max_features = 25 if num_features > 25 else num_features
        clf = RandomForestClassifier(n_estimators=200, class_weight="balanced", max_features=_max_features, n_jobs=-1)

        clf.fit(X_train, y_train)

        X_test = test_dataset.filter(items=most_important_features).loc[:, test_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_test = test_dataset["label_3m"]
        
        print(f"Amount of features X_train: {len(X_train.columns)}, X_test: {len(X_test.columns)}")

        proba = clf.predict_proba(X_test)[:,1:].reshape(2997,2620)
        
        proba_post_process = post_processing.proba_post_process(proba, 6, 0.4)
    
        labels_grid = post_processing.raster_to_zones(y_test.reshape(2997, 2620), 6, 4)

        for i, pred_var in enumerate(proba_post_process.reshape(-1)):
            y_test_all.append(labels_grid.reshape(-1)[i])
            pred_all.append(pred_var)

        importances = clf.feature_importances_

        
        for i, importance in enumerate(importances):
            feature_importances[most_important_features[i]] += importance
        
    for importance_name in most_important_features:
        feature_importances[importance_name] /= 2
    
    
    
    most_important_features = list(dict(sorted(feature_importances.items(),
                                               key=operator.itemgetter(1),reverse=True)).keys())

    print("\n")
    print(f"Importances for experiment:")
    for key in most_important_features:
        print(key, " - ", feature_importances[key])
    print("\n")
    print(f"Amount of features used:   {num_features}")
    print("Cohen's kappa score        ", cohen_kappa_score(y_test_all, pred_all))
    print("Accuracy score             ", accuracy_score(y_test_all, pred_all))
    print("Recall score               ", recall_score(y_test_all, pred_all))
    print("Precision score            ", precision_score(y_test_all, pred_all))
    print("\n------------------------------------------------------\n")

Features used in experiment:
['hpmf_raw', 'skyview_raw', 'impundment_raw', 'slope_raw', 'DEM_ditch_detection', 'DEM_ditch_detection_no_streams', 'conic_mean', 'skyview_non_ditch', 'skyview_gabor', 'conic_mean_no_streams', 'skyview_gabor_no_streams', 'skyview_mean_2', 'skyview_mean_3', 'skyview_mean_4', 'skyview_mean_6', 'skyview_median_2', 'skyview_median_4', 'skyview_median_6', 'skyview_min_2', 'skyview_min_4', 'skyview_min_6', 'skyview_max_2', 'skyview_max_4', 'skyview_max_6', 'skyview_std_2', 'skyview_std_4', 'skyview_std_6', 'impundment_amplified', 'impoundment_amplified_no_streams', 'impundment_mean_2', 'impundment_mean_3', 'impundment_mean_4', 'impundment_mean_6', 'impundment_median_2', 'impundment_median_4', 'impundment_median_6', 'impundment_min_2', 'impundment_min_4', 'impundment_min_6', 'impundment_max_2', 'impundment_max_4', 'impundment_max_6', 'impundment_std_2', 'impundment_std_4', 'impundment_std_6', 'hpmf_filter', 'hpmf_gabor', 'hpmf_gabor_no_streams', 'hpmf_filter_no_st

Cohen's kappa score         0.4287659646094656
Accuracy score              0.9587795174309169
Recall score                0.7124483846470013
Precision score             0.3257753874914897

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'impundment_amplified', 'impoundment_amplified_no_streams', 'hpmf_median_4', 'skyview_max_6', 'hpmf_mean_4', 'skyview_gabor', 'skyview_non_ditch', 'slope_min_6', 'skyview_max_4', 'skyview_gabor_no_streams', 'slope_median_6', 'impundment_max_6', 'hpmf_filter', 'skyview_median_6', 'slope_non_ditch', 'impundment_std_4', 'slope_std_6', 'hpmf_filter_no_streams', 'impundment_median_6', 'hpmf_mean_6', 'slope_min_4', 'hpmf_mean_3', 'impundment_mean_6', 'slope_min_2', 'slope_mean_6', 'skyview_max_2', 'slope_std_4', 'impundment_median_2', 'hpmf_std_6', 'skyview_min_6', 'impundment_std_6']


Amount of features X_train: 35, X_test: 35
Amount o

Cohen's kappa score         0.3320598002814307
Accuracy score              0.941501934504479
Recall score                0.6889087895127947
Precision score             0.23933660717578606

------------------------------------------------------



In [50]:
most_important_features = zone_4.columns.tolist()[1:]
zone_4 = None
zone_7 = None
zone_4_resampled = None
zone_7_resampled = None
for num_features in [81, 65, 62, 59, 56, 53, 50, 47, 44, 41, 38, 35]:
    
    most_important_features = most_important_features[:num_features]
    print(f"Features used in experiment:\n{most_important_features}")
    print("\n")
    feature_importances = {i:0 for i in most_important_features}
    y_test_all = []
    pred_all = []
    
    
    for (training_dataset, test_dataset) in experiment_arr:

        X_train = training_dataset.filter(items=most_important_features).loc[:, training_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_train = training_dataset["label_3m"]
        training_dataset = None

        _max_features = 25 if num_features > 25 else num_features
        clf = RandomForestClassifier(n_estimators=200, class_weight="balanced", max_features=_max_features, n_jobs=-1)

        clf.fit(X_train, y_train)

        X_test = test_dataset.filter(items=most_important_features).loc[:, test_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_test = test_dataset["label_3m"]
        
        print(f"Amount of features X_train: {len(X_train.columns)}, X_test: {len(X_test.columns)}")

        pred = clf.predict(X_test)

        for i, pred_var in enumerate(pred):
            y_test_all.append(y_test[i])
            pred_all.append(pred_var)

        importances = clf.feature_importances_

        
        for i, importance in enumerate(importances):
            feature_importances[most_important_features[i]] += importance
        
    for importance_name in most_important_features:
        feature_importances[importance_name] /= 2
    
    
    
    most_important_features = list(dict(sorted(feature_importances.items(),
                                               key=operator.itemgetter(1),reverse=True)).keys())

    print("\n")
    print(f"Importances for experiment:")
    for key in most_important_features:
        print(key, " - ", feature_importances[key])
    print("\n")
    print(f"Amount of features used:   {num_features}")
    print("Cohen's kappa score        ", cohen_kappa_score(y_test_all, pred_all))
    print("Accuracy score             ", accuracy_score(y_test_all, pred_all))
    print("Recall score               ", recall_score(y_test_all, pred_all))
    print("Precision score            ", precision_score(y_test_all, pred_all))
    print("\n------------------------------------------------------\n")

Features used in experiment:
['hpmf_raw', 'skyview_raw', 'impundment_raw', 'slope_raw', 'DEM_ditch_detection', 'DEM_ditch_detection_no_streams', 'conic_mean', 'skyview_non_ditch', 'skyview_gabor', 'conic_mean_no_streams', 'skyview_gabor_no_streams', 'skyview_mean_2', 'skyview_mean_3', 'skyview_mean_4', 'skyview_mean_6', 'skyview_median_2', 'skyview_median_4', 'skyview_median_6', 'skyview_min_2', 'skyview_min_4', 'skyview_min_6', 'skyview_max_2', 'skyview_max_4', 'skyview_max_6', 'skyview_std_2', 'skyview_std_4', 'skyview_std_6', 'impundment_amplified', 'impoundment_amplified_no_streams', 'impundment_mean_2', 'impundment_mean_3', 'impundment_mean_4', 'impundment_mean_6', 'impundment_median_2', 'impundment_median_4', 'impundment_median_6', 'impundment_min_2', 'impundment_min_4', 'impundment_min_6', 'impundment_max_2', 'impundment_max_4', 'impundment_max_6', 'impundment_std_2', 'impundment_std_4', 'impundment_std_6', 'hpmf_filter', 'hpmf_gabor', 'hpmf_gabor_no_streams', 'hpmf_filter_no_st

Cohen's kappa score         0.42758002488020674
Accuracy score              0.9584068801626053
Recall score                0.7157283582983334
Precision score             0.32394397430574634

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'hpmf_mean_4', 'impundment_amplified', 'impoundment_amplified_no_streams', 'hpmf_median_4', 'skyview_max_6', 'hpmf_mean_3', 'skyview_gabor', 'skyview_non_ditch', 'slope_min_6', 'skyview_max_4', 'skyview_gabor_no_streams', 'impundment_max_2', 'impundment_median_2', 'impundment_std_4', 'impundment_max_6', 'slope_non_ditch', 'hpmf_filter', 'hpmf_min_2', 'hpmf_filter_no_streams', 'impundment_mean_6', 'impundment_median_6', 'hpmf_mean_6', 'impundment_std_6', 'slope_std_6', 'slope_min_4', 'slope_median_6', 'skyview_median_6', 'skyview_min_6', 'impundment_max_4', 'hpmf_min_4', 'skyview_max_2', 'slope_mean_6', 'hpmf_gabor_no_streams', 's

Amount of features X_train: 56, X_test: 56
Amount of features X_train: 56, X_test: 56


Importances for experiment:
impundment_mean_3  -  0.15724250428216957
impundment_mean_4  -  0.08180407803189814
impundment_median_4  -  0.0622845124549371
impundment_mean_2  -  0.047266316461828876
impundment_amplified  -  0.027051403706817544
hpmf_mean_4  -  0.026849187788144643
impoundment_amplified_no_streams  -  0.023458540661683062
hpmf_median_4  -  0.020255672890071262
skyview_max_6  -  0.019431705768611824
skyview_gabor  -  0.016912709508749763
skyview_non_ditch  -  0.016792733485545275
slope_min_6  -  0.015266900367479585
skyview_max_4  -  0.014875177133376506
skyview_gabor_no_streams  -  0.014457425343139273
impundment_std_4  -  0.014262227339428203
impundment_max_6  -  0.013402964341934848
slope_non_ditch  -  0.013251726497364126
hpmf_filter  -  0.013005088332447741
hpmf_mean_3  -  0.012933113757200514
hpmf_filter_no_streams  -  0.012584455105503902
hpmf_mean_6  -  0.012474571090879037
imp

Cohen's kappa score         0.4283547633162076
Accuracy score              0.9587223992440277
Recall score                0.7123041202457394
Precision score             0.3253890687715424

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'impundment_amplified', 'impoundment_amplified_no_streams', 'hpmf_median_4', 'skyview_max_6', 'hpmf_mean_4', 'skyview_non_ditch', 'skyview_gabor', 'slope_min_6', 'skyview_max_4', 'hpmf_mean_3', 'skyview_gabor_no_streams', 'impundment_std_4', 'impundment_max_6', 'slope_non_ditch', 'hpmf_filter', 'hpmf_mean_6', 'hpmf_filter_no_streams', 'slope_std_6', 'impundment_median_6', 'skyview_median_6', 'impundment_median_2', 'slope_median_6', 'slope_min_4', 'impundment_mean_6', 'impundment_max_2', 'hpmf_mean_2', 'hpmf_std_6', 'skyview_min_6', 'slope_mean_6', 'impundment_std_6', 'skyview_std_6', 'skyview_max_2', 'slope_min_2', 'hpmf_min_2', 'h

Cohen's kappa score         0.42389556100738757
Accuracy score              0.9581493707447906
Recall score                0.7098271276957708
Precision score             0.3213886495926212

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'impundment_amplified', 'hpmf_median_4', 'impoundment_amplified_no_streams', 'skyview_max_6', 'skyview_non_ditch', 'skyview_gabor', 'slope_min_6', 'skyview_max_4', 'hpmf_mean_6', 'hpmf_mean_4', 'impundment_max_6', 'skyview_gabor_no_streams', 'slope_median_6', 'hpmf_filter', 'impundment_std_4', 'impundment_median_6', 'slope_std_6', 'hpmf_filter_no_streams', 'slope_non_ditch', 'skyview_median_6', 'skyview_min_6', 'hpmf_std_6', 'slope_min_4', 'hpmf_mean_3', 'slope_mean_6', 'impundment_mean_6', 'skyview_std_6', 'hpmf_min_4', 'impundment_median_2', 'slope_std_4', 'impundment_std_6', 'slope_min_2', 'slope_median_2', 'skyview_max_2']


A

In [7]:
from itertools import product

In [8]:
params = [["gini", "entropy"],
          [35, None],
          [2, 10],
          ["balanced", None],
          [200, 300]]

In [9]:
most_important_features = ['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2',
                           'impundment_amplified', 'hpmf_mean_4', 'impoundment_amplified_no_streams', 'hpmf_median_4',
                           'skyview_max_6', 'skyview_gabor', 'skyview_non_ditch', 'slope_min_6', 'skyview_max_4',
                           'skyview_gabor_no_streams', 'impundment_std_4', 'impundment_max_6', 'slope_non_ditch',
                           'hpmf_filter', 'hpmf_mean_3', 'hpmf_filter_no_streams', 'hpmf_mean_6', 'impundment_median_6',
                           'impundment_median_2', 'slope_std_6', 'impundment_mean_6', 'slope_median_6', 'slope_min_4',
                           'hpmf_min_2', 'impundment_std_6', 'skyview_median_6', 'skyview_min_6', 'slope_mean_6',
                           'impundment_max_2', 'skyview_std_6', 'slope_min_2', 'skyview_max_2', 'hpmf_min_4',
                           'impundment_max_4', 'slope_std_4', 'hpmf_gabor', 'hpmf_std_6', 'slope_max_4',
                           'hpmf_gabor_no_streams', 'skyview_min_4', 'slope_median_4', 'hpmf_mean_2', 'slope_median_2',
                           'slope_std_2', 'hpmf_min_6', 'hpmf_max_6', 'skyview_mean_6', 'skyview_median_4', 'hpmf_std_4']

In [10]:
param_combos = list(product(*params))

In [11]:
len(param_combos)

144

In [12]:
zone_4 = None
zone_7 = None
zone_4_resampled = None
zone_7_resampled = None
params_and_score = []
j = 0
for combo in param_combos:
    j += 1
    y_test_all = []
    pred_all = []
    
    params = {'criterion': combo[0],
    'max_depth': combo[1],
    'min_samples_split': combo[2],
    'class_weight': combo[3],
    'n_estimators': combo[4]}
    print(f"params: {params}")
    
    for (training_dataset, test_dataset) in experiment_arr:

        X_train = training_dataset.filter(items=most_important_features).loc[:, training_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_train = training_dataset["label_3m"]
        training_dataset = None

        clf = RandomForestClassifier(**params, n_jobs=-1, random_state=89)
        print(clf)

        clf.fit(X_train, y_train)

        X_test = test_dataset.filter(items=most_important_features).loc[:, test_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_test = test_dataset["label_3m"]
        
        print(f"Amount of features X_train: {len(X_train.columns)}, X_test: {len(X_test.columns)}")

        pred = clf.predict(X_test)

        for i, pred_var in enumerate(pred):
            y_test_all.append(y_test[i])
            pred_all.append(pred_var)

    print("\n")
    print(f"Iteration {j} of {len(param_combos)}")
    print("Cohen's kappa score        ", cohen_kappa_score(y_test_all, pred_all))
    print("Accuracy score             ", accuracy_score(y_test_all, pred_all))
    print("Recall score               ", recall_score(y_test_all, pred_all))
    print("Precision score            ", precision_score(y_test_all, pred_all))
    print("\n------------------------------------------------------\n")
    params_and_score.append((params, cohen_kappa_score(y_test_all, pred_all)))

print(params_and_score)

params: {'criterion': 'gini', 'max_depth': 35, 'max_features': 'log2', 'min_samples_split': 2, 'class_weight': 'balanced', 'n_estimators': 100}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=35, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=35, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0,

Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=35, max_features='log2', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       n_estimators=300, n_jobs=-1, oob_score=False,
                       random_state=89, verbose=0, warm_start=False)
Amount of features X_train: 53, X_test: 53


Iteration 6 of 144
Cohen's kappa score         0.41765441554749516
Accuracy score              0.9563667993693439
Recall score                0.7229061927535719
Precision score             0.31281337933195213

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': 35, 'max_features': 'log2', 'min_samples_split

Amount of features X_train: 53, X_test: 53


Iteration 11 of 144
Cohen's kappa score         0.42227894657701326
Accuracy score              0.9571729490304554
Recall score                0.7222964715105027
Precision score             0.3174507089937469

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': 35, 'max_features': 'log2', 'min_samples_split': 10, 'class_weight': 'balanced', 'n_estimators': 300}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=35, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=Fal

Recall score                0.7201107297328931
Precision score             0.31336987962938834

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': 35, 'max_features': 'log2', 'min_samples_split': 10, 'class_weight': None, 'n_estimators': 200}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=35, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=35, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       n_estimators=100, n_jobs=-1, oob_score=False,
                       random_state=89, verbose=0, warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=35, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=2, min_weight_

Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=35, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53


Iteration 27 of 144
Cohen's kappa score         0.4272640436107519
Accuracy score              0.9581376541936338
Recall score                0.7196153312228993
Precision score             0.32288530660748943

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': 35, 'max_features': 'sqrt', 'min_samples_split': 10, 'class_we

Amount of features X_train: 53, X_test: 53


Iteration 32 of 144
Cohen's kappa score         0.42630796503729795
Accuracy score              0.9578202248049577
Recall score                0.7225060631878077
Precision score             0.32139190112024335

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': 35, 'max_features': 'sqrt', 'min_samples_split': 10, 'class_weight': 'balanced_subsample', 'n_estimators': 300}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=35, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=10, min_weight_fraction_leaf=0.0,
                       n_estimators=300, n_jobs=-1, oob_score=False,
                       random_state=89, verb

params: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'min_samples_split': 2, 'class_weight': 'balanced', 'n_estimators': 200}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decreas

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimat

Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53


Iteration 48 of 144
Cohen's kappa score         0.4233324801932614
Accuracy score              0.9572897961574807
Recall score                0.7232872685304902
Precision score             0.3183077063138779

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'min_samples_split': 10,

Amount of features X_train: 53, X_test: 53


Iteration 53 of 144
Cohen's kappa score         0.42298641261833736
Accuracy score              0.9573016400624543
Recall score                0.7220841578633626
Precision score             0.3181860707234751

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'min_samples_split': 10, 'class_weight': None, 'n_estimators': 300}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amou

Precision score             0.3149386192192594

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 2, 'class_weight': 'balanced_subsample', 'n_estimators': 200}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=None, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       n_estimators=200, n_jobs=-1, oob_score=False,
                       random_state=89, verbose=0, warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
        

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=

Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=None, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=10, min_weight_fraction_leaf=0.0,
                       n_estimators=300, n_jobs=-1, oob_score=False,
                       random_state=89, verbose=0, warm_start=False)
Amount of features X_train: 53, X_test: 53


Iteration 69 of 144
Cohen's kappa score         0.4285003160562595
Accuracy score              0.9581992934410237
Recall score                0.7220596601348463
Precision score             0.32365475817186445

------------------------------------------------------

params: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_

Amount of features X_train: 53, X_test: 53


Iteration 74 of 144
Cohen's kappa score         0.4038591508263386
Accuracy score              0.9536956804132377
Recall score                0.7274056088910422
Precision score             0.2988320016997199

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': 35, 'max_features': 'log2', 'min_samples_split': 2, 'class_weight': 'balanced', 'n_estimators': 300}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=35, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=

Recall score                0.7243760564645423
Precision score             0.29872482769458725

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': 35, 'max_features': 'log2', 'min_samples_split': 2, 'class_weight': None, 'n_estimators': 200}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=35, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max

Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='entropy',
                       max_depth=35, max_features='log2', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=10, min_weight_fraction_leaf=0.0,
                       n_estimators=100, n_jobs=-1, oob_score=False,
                       random_state=89, verbose=0, warm_start=False)
Amount of features X_train: 53, X_test: 53


Iteration 85 of 144
Cohen's kappa score         0.4041114999652273
Accuracy score              0.9536770867559672
Recall score                0.728445401368062
Precision score             0.29890620235647325

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': 35, 'max_features': 'log2', 'min_samples

Amount of features X_train: 53, X_test: 53


Iteration 90 of 144
Cohen's kappa score         0.40729831851297327
Accuracy score              0.9542778147103846
Recall score                0.7279309490692224
Precision score             0.3020048356369049

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': 35, 'max_features': 'sqrt', 'min_samples_split': 2, 'class_weight': 'balanced', 'n_estimators': 100}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=35, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start

Recall score                0.7272314028215939
Precision score             0.2996228514137745

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': 35, 'max_features': 'sqrt', 'min_samples_split': 2, 'class_weight': 'balanced_subsample', 'n_estimators': 300}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='entropy',
                       max_depth=35, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       n_estimators=300, n_jobs=-1, oob_score=False,
                       random_state=89, verbose=0, warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=35, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=35, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_lea

                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=35, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53


Iteration 106 of 144
Cohen's kappa score         0.4029180264572312
Accuracy score              0.9535615131671111
Recall score                0.7268312732558297
Precision score             0.2980344885317261

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': 35, 'max_featu

Amount of features X_train: 53, X_test: 53


Iteration 111 of 144
Cohen's kappa score         0.40658412270854416
Accuracy score              0.9541723657499739
Recall score                0.7275798149604906
Precision score             0.30138334684470863

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'min_samples_split': 2, 'class_weight': 'balanced_subsample', 'n_estimators': 100}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='entropy',
                       max_depth=None, max_features='log2', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       n_estimators=100, n_jobs=-1, oob_score=False,
                       random_state

Accuracy score              0.9544446482105515
Recall score                0.7257424853217777
Precision score             0.30254141414599883

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'min_samples_split': 2, 'class_weight': None, 'n_estimators': 300}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=No

params: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'min_samples_split': 10, 'class_weight': 'balanced_subsample', 'n_estimators': 200}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='entropy',
                       max_depth=None, max_features='log2', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=10, min_weight_fraction_leaf=0.0,
                       n_estimators=200, n_jobs=-1, oob_score=False,
                       random_state=89, verbose=0, warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='entropy',
                       max_depth=None, max_features='log2', max_leaf_nodes=None,
                    

Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53


Iteration 127 of 144
Cohen's kappa score         0.40605155391143943
Accuracy score              0.9543802071791894
Recall score                0.7226231078907184
Precision score             0.3016792026372697

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_spli

Amount of features X_train: 53, X_test: 53


Iteration 132 of 144
Cohen's kappa score         0.4062406743295781
Accuracy score              0.9541092619336894
Recall score                0.7276124785985122
Precision score             0.3010523446879343

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 2, 'class_weight': None, 'n_estimators': 100}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)


Precision score             0.301531031532495

------------------------------------------------------

params: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 10, 'class_weight': 'balanced', 'n_estimators': 300}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=None, max_features

params: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 10, 'class_weight': None, 'n_estimators': 200}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=-1, oob_score=False, random_state=89, verbose=0,
                       warm_start=False)
Amount of features X_train: 53, X_test: 53
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, 

In [13]:
sorted(params_and_score,key=lambda x:(-x[1],x[0]))

[({'criterion': 'gini',
   'max_depth': None,
   'max_features': 'sqrt',
   'min_samples_split': 10,
   'class_weight': None,
   'n_estimators': 300},
  0.4297846249129419),
 ({'criterion': 'gini',
   'max_depth': 35,
   'max_features': 'sqrt',
   'min_samples_split': 10,
   'class_weight': None,
   'n_estimators': 300},
  0.42912782104665137),
 ({'criterion': 'gini',
   'max_depth': None,
   'max_features': 'sqrt',
   'min_samples_split': 10,
   'class_weight': 'balanced_subsample',
   'n_estimators': 300},
  0.4285003160562595),
 ({'criterion': 'gini',
   'max_depth': 35,
   'max_features': 'sqrt',
   'min_samples_split': 10,
   'class_weight': 'balanced_subsample',
   'n_estimators': 300},
  0.42844265073097276),
 ({'criterion': 'gini',
   'max_depth': None,
   'max_features': 'sqrt',
   'min_samples_split': 2,
   'class_weight': None,
   'n_estimators': 200},
  0.428351471627776),
 ({'criterion': 'gini',
   'max_depth': None,
   'max_features': 'sqrt',
   'min_samples_split': 10,
 