In [20]:
import pickle
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np
import operator

from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score, recall_score, precision_score
from bayes_opt import BayesianOptimization

from general_functions import create_balanced_dataset

In [None]:
file = open("dataset/zone_4.pickle", "rb")
zone_4 = pickle.load(file)

file = open("dataset/zone_7.pickle", "rb")
zone_7 = pickle.load(file)
file = None

In [None]:
with open("dataset/zone_4_resampled.pickle", "rb") as file:
    zone_4_resampled = pickle.load(file)
with open("dataset/zone_7_resampled.pickle", "rb") as file:
    zone_7_resampled = pickle.load(file)

In [4]:
experiment_arr = [(zone_4_resampled, zone_7), (zone_7_resampled, zone_4)]

In [5]:
zone_4 = None
zone_7 = None
zone_4_resampled = None
zone_7_resampled = None

In [6]:
features_to_use = [
    ['hpmf_raw', 'skyview_raw', 'impundment_raw', 'slope_raw', 'DEM_ditch_detection', 'DEM_ditch_detection_no_streams',
     'conic_mean', 'skyview_non_ditch', 'skyview_gabor', 'conic_mean_no_streams', 'skyview_gabor_no_streams',
     'skyview_mean_2', 'skyview_mean_3', 'skyview_mean_4', 'skyview_mean_6', 'skyview_median_2', 'skyview_median_4',
     'skyview_median_6', 'skyview_min_2', 'skyview_min_4', 'skyview_min_6', 'skyview_max_2', 'skyview_max_4',
     'skyview_max_6', 'skyview_std_2', 'skyview_std_4', 'skyview_std_6', 'impundment_amplified',
     'impoundment_amplified_no_streams', 'impundment_mean_2', 'impundment_mean_3', 'impundment_mean_4',
     'impundment_mean_6', 'impundment_median_2', 'impundment_median_4', 'impundment_median_6', 'impundment_min_2',
     'impundment_min_4', 'impundment_min_6', 'impundment_max_2', 'impundment_max_4', 'impundment_max_6',
     'impundment_std_2', 'impundment_std_4', 'impundment_std_6', 'hpmf_filter', 'hpmf_gabor', 'hpmf_gabor_no_streams',
     'hpmf_filter_no_streams', 'hpmf_mean_2', 'hpmf_mean_3', 'hpmf_mean_4', 'hpmf_mean_6', 'hpmf_median_2',
     'hpmf_median_4', 'hpmf_median_6', 'hpmf_min_2', 'hpmf_min_4', 'hpmf_min_6', 'hpmf_max_2', 'hpmf_max_4',
     'hpmf_max_6', 'hpmf_std_2', 'hpmf_std_4', 'hpmf_std_6', 'slope_non_ditch', 'slope_mean_2', 'slope_mean_3',
     'slope_mean_4', 'slope_mean_6', 'slope_median_2', 'slope_median_4', 'slope_median_6', 'slope_min_2', 'slope_min_4',
     'slope_min_6', 'slope_max_2', 'slope_max_4', 'slope_std_2', 'slope_std_4', 'slope_std_6'],
    ['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'hpmf_mean_4', 'hpmf_mean_3',
     'impundment_amplified', 'hpmf_median_4', 'impoundment_amplified_no_streams', 'skyview_max_6', 'skyview_gabor',
     'hpmf_min_2', 'impundment_max_2', 'impundment_std_4', 'skyview_non_ditch', 'skyview_gabor_no_streams', 'slope_min_6',
     'skyview_max_4', 'impundment_median_2', 'impundment_max_6', 'slope_non_ditch', 'impundment_mean_6', 'hpmf_filter',
     'impundment_std_6', 'hpmf_filter_no_streams', 'impundment_median_6', 'hpmf_mean_6', 'slope_min_4', 'slope_std_6',
     'hpmf_mean_2', 'slope_median_6', 'impundment_max_4', 'impundment_std_2', 'hpmf_gabor', 'hpmf_min_4', 'skyview_min_6',
     'skyview_median_6', 'slope_mean_6', 'hpmf_gabor_no_streams', 'skyview_std_6', 'slope_min_2', 'hpmf_median_2',
     'skyview_max_2', 'hpmf_std_6', 'slope_std_4', 'hpmf_min_6', 'slope_max_4', 'skyview_mean_6', 'hpmf_max_6', 'skyview_min_4'],
    ['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'impundment_amplified',
     'impoundment_amplified_no_streams', 'hpmf_median_4', 'skyview_max_6', 'hpmf_mean_4', 'skyview_gabor',
     'skyview_non_ditch', 'slope_min_6', 'skyview_max_4', 'skyview_gabor_no_streams', 'slope_median_6', 'impundment_max_6',
     'hpmf_filter', 'skyview_median_6', 'slope_non_ditch', 'impundment_std_4', 'slope_std_6', 'hpmf_filter_no_streams',
     'impundment_median_6', 'hpmf_mean_6', 'slope_min_4', 'hpmf_mean_3', 'impundment_mean_6', 'slope_min_2', 'slope_mean_6',
     'skyview_max_2', 'slope_std_4', 'impundment_median_2', 'hpmf_std_6', 'skyview_min_6', 'impundment_std_6'],
    ['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_amplified', 'hpmf_median_4',
     'impoundment_amplified_no_streams', 'impundment_mean_2', 'skyview_max_6', 'skyview_gabor', 'skyview_non_ditch',
     'slope_min_6', 'hpmf_mean_6', 'skyview_median_6', 'skyview_max_4', 'skyview_min_6', 'impundment_max_6', 'slope_std_6',
     'slope_median_6', 'skyview_gabor_no_streams', 'hpmf_std_6', 'impundment_std_4', 'slope_std_4', 'hpmf_filter',
     'impundment_median_6', 'slope_mean_6'],
    ['impundment_mean_3', 'hpmf_median_4', 'impundment_mean_4', 'impundment_amplified', 'skyview_non_ditch', 'skyview_max_6',
     'hpmf_filter', 'impoundment_amplified_no_streams', 'hpmf_mean_6', 'slope_min_6', 'impundment_max_6', 'skyview_median_6',
     'skyview_gabor', 'skyview_min_6', 'skyview_max_4', 'slope_median_6', 'impundment_mean_2', 'impundment_median_6',
     'hpmf_std_6', 'slope_std_6'],
    ['impundment_mean_3', 'impundment_mean_4', 'hpmf_median_4', 'skyview_gabor', 'impundment_amplified', 'skyview_max_6',
     'skyview_non_ditch', 'impundment_max_6', 'hpmf_filter', 'impoundment_amplified_no_streams', 'slope_min_6',
     'hpmf_mean_6', 'slope_median_6', 'slope_std_6', 'skyview_median_6'],
    ['impundment_mean_3', 'impundment_mean_4', 'skyview_max_6', 'hpmf_median_4', 'skyview_gabor', 'impundment_max_6',
     'impundment_amplified', 'hpmf_filter', 'slope_min_6', 'skyview_non_ditch']
]

In [7]:
for most_important_features in features_to_use:
    num_features = len(most_important_features)
    print(f"Features used in experiment:\n{most_important_features}")
    print("\n")
    y_test_all = []
    pred_all = []
    
    for (training_dataset, test_dataset) in experiment_arr:

        X_train = training_dataset.filter(items=most_important_features).loc[:, training_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_train = training_dataset["label_3m"]
        training_dataset = None

        
        clf = make_pipeline(StandardScaler(), LinearSVC(tol=1e-5))

        clf.fit(X_train, y_train)

        X_test = test_dataset.filter(items=most_important_features).loc[:, test_dataset.filter(items=most_important_features).columns != "label_3m"]
        y_test = test_dataset["label_3m"]
        
        print(f"Amount of features X_train: {len(X_train.columns)}, X_test: {len(X_test.columns)}")

        pred = clf.predict(X_test)

        for i, pred_var in enumerate(pred):
            y_test_all.append(y_test[i])
            pred_all.append(pred_var)

    print("\n")
    print(f"Amount of features used:   {num_features}")
    print("Cohen's kappa score        ", cohen_kappa_score(y_test_all, pred_all))
    print("Accuracy score             ", accuracy_score(y_test_all, pred_all))
    print("Recall score               ", recall_score(y_test_all, pred_all))
    print("Precision score            ", precision_score(y_test_all, pred_all))
    print("\n------------------------------------------------------\n")

Features used in experiment:
['hpmf_raw', 'skyview_raw', 'impundment_raw', 'slope_raw', 'DEM_ditch_detection', 'DEM_ditch_detection_no_streams', 'conic_mean', 'skyview_non_ditch', 'skyview_gabor', 'conic_mean_no_streams', 'skyview_gabor_no_streams', 'skyview_mean_2', 'skyview_mean_3', 'skyview_mean_4', 'skyview_mean_6', 'skyview_median_2', 'skyview_median_4', 'skyview_median_6', 'skyview_min_2', 'skyview_min_4', 'skyview_min_6', 'skyview_max_2', 'skyview_max_4', 'skyview_max_6', 'skyview_std_2', 'skyview_std_4', 'skyview_std_6', 'impundment_amplified', 'impoundment_amplified_no_streams', 'impundment_mean_2', 'impundment_mean_3', 'impundment_mean_4', 'impundment_mean_6', 'impundment_median_2', 'impundment_median_4', 'impundment_median_6', 'impundment_min_2', 'impundment_min_4', 'impundment_min_6', 'impundment_max_2', 'impundment_max_4', 'impundment_max_6', 'impundment_std_2', 'impundment_std_4', 'impundment_std_6', 'hpmf_filter', 'hpmf_gabor', 'hpmf_gabor_no_streams', 'hpmf_filter_no_st



Amount of features X_train: 81, X_test: 81




Amount of features X_train: 81, X_test: 81


Amount of features used:   81
Cohen's kappa score         0.18727655799575305
Accuracy score              0.8805862478254336
Recall score                0.7177208402176487
Precision score             0.12955318187178178

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'hpmf_mean_4', 'hpmf_mean_3', 'impundment_amplified', 'hpmf_median_4', 'impoundment_amplified_no_streams', 'skyview_max_6', 'skyview_gabor', 'hpmf_min_2', 'impundment_max_2', 'impundment_std_4', 'skyview_non_ditch', 'skyview_gabor_no_streams', 'slope_min_6', 'skyview_max_4', 'impundment_median_2', 'impundment_max_6', 'slope_non_ditch', 'impundment_mean_6', 'hpmf_filter', 'impundment_std_6', 'hpmf_filter_no_streams', 'impundment_median_6', 'hpmf_mean_6', 'slope_min_4', 'slope_std_6', 'hpmf_mean_2', 'slope_median_6', 'impundment_max_4', 'impundment_std_2', '



Amount of features X_train: 50, X_test: 50




Amount of features X_train: 50, X_test: 50


Amount of features used:   50
Cohen's kappa score         0.18824868151082264
Accuracy score              0.8811226621023058
Recall score                0.7183142296417071
Precision score             0.13017118402282454

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_mean_2', 'impundment_amplified', 'impoundment_amplified_no_streams', 'hpmf_median_4', 'skyview_max_6', 'hpmf_mean_4', 'skyview_gabor', 'skyview_non_ditch', 'slope_min_6', 'skyview_max_4', 'skyview_gabor_no_streams', 'slope_median_6', 'impundment_max_6', 'hpmf_filter', 'skyview_median_6', 'slope_non_ditch', 'impundment_std_4', 'slope_std_6', 'hpmf_filter_no_streams', 'impundment_median_6', 'hpmf_mean_6', 'slope_min_4', 'hpmf_mean_3', 'impundment_mean_6', 'slope_min_2', 'slope_mean_6', 'skyview_max_2', 'slope_std_4', 'impundment_median_2', 'hpmf_std_6', 'skyview_min_



Amount of features X_train: 35, X_test: 35




Amount of features X_train: 35, X_test: 35


Amount of features used:   35
Cohen's kappa score         0.1714795066932714
Accuracy score              0.8722774937787661
Recall score                0.7021811144288899
Precision score             0.11974312378008165

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'impundment_median_4', 'impundment_amplified', 'hpmf_median_4', 'impoundment_amplified_no_streams', 'impundment_mean_2', 'skyview_max_6', 'skyview_gabor', 'skyview_non_ditch', 'slope_min_6', 'hpmf_mean_6', 'skyview_median_6', 'skyview_max_4', 'skyview_min_6', 'impundment_max_6', 'slope_std_6', 'slope_median_6', 'skyview_gabor_no_streams', 'hpmf_std_6', 'impundment_std_4', 'slope_std_4', 'hpmf_filter', 'impundment_median_6', 'slope_mean_6']






Amount of features X_train: 25, X_test: 25




Amount of features X_train: 25, X_test: 25


Amount of features used:   25
Cohen's kappa score         0.24865794283515552
Accuracy score              0.9182624736695983
Recall score                0.6677590838938323
Precision score             0.1743715935551556

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'hpmf_median_4', 'impundment_mean_4', 'impundment_amplified', 'skyview_non_ditch', 'skyview_max_6', 'hpmf_filter', 'impoundment_amplified_no_streams', 'hpmf_mean_6', 'slope_min_6', 'impundment_max_6', 'skyview_median_6', 'skyview_gabor', 'skyview_min_6', 'skyview_max_4', 'slope_median_6', 'impundment_mean_2', 'impundment_median_6', 'hpmf_std_6', 'slope_std_6']






Amount of features X_train: 20, X_test: 20




Amount of features X_train: 20, X_test: 20


Amount of features used:   20
Cohen's kappa score         0.24740216945734117
Accuracy score              0.91803737579819
Recall score                0.6655842299955632
Precision score             0.17356471225393572

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'hpmf_median_4', 'skyview_gabor', 'impundment_amplified', 'skyview_max_6', 'skyview_non_ditch', 'impundment_max_6', 'hpmf_filter', 'impoundment_amplified_no_streams', 'slope_min_6', 'hpmf_mean_6', 'slope_median_6', 'slope_std_6', 'skyview_median_6']






Amount of features X_train: 15, X_test: 15




Amount of features X_train: 15, X_test: 15


Amount of features used:   15
Cohen's kappa score         0.21965942286768703
Accuracy score              0.9057264007009554
Recall score                0.6693868218552402
Precision score             0.15322519343185304

------------------------------------------------------

Features used in experiment:
['impundment_mean_3', 'impundment_mean_4', 'skyview_max_6', 'hpmf_median_4', 'skyview_gabor', 'impundment_max_6', 'impundment_amplified', 'hpmf_filter', 'slope_min_6', 'skyview_non_ditch']






Amount of features X_train: 10, X_test: 10




Amount of features X_train: 10, X_test: 10


Amount of features used:   10
Cohen's kappa score         0.20622193514888065
Accuracy score              0.8997941325549468
Recall score                0.6643103481127222
Precision score             0.14403708709329022

------------------------------------------------------



# Optimization

In [None]:
def optim_function(tol=0.0001,
                   C=1,
                   intercept_scaling=1,
                   verbose=0,
                   max_iter=1000,
                   class_weight=0,
                   fit_intercept = 0,
                   dual = 0,
                   loss = 0,
                   penalty = 0):
    
    max_iter = int(max_iter)
    verbose = int(verbose)
    class_weight = None if int(class_weight) == 0 else "balanced"
    fit_intercept = False if int(fit_intercept) == 0 else True
    dual = False if int(dual) == 0 else True
    loss = "squared_hinge" if int(loss) == 0 else "hinge"
    penalty= "l2" if int(penalty) == 0 else "l1"
    
    clf = make_pipeline(StandardScaler(), LinearSVC(tol=tol,
                                                    C=C,
                                                    intercept_scaling=intercept_scaling,
                                                    verbose=verbose,
                                                    max_iter=max_iter,
                                                    class_weight=class_weight,
                                                    fit_intercept=fit_intercept,
                                                    dual=dual,
                                                    loss=loss,
                                                    penalty=penalty))

    clf.fit(X_train, y_train)
    
    pred = clf.predict(X_test[:, most_important_features])
    kappa = cohen_kappa_score(y_test, pred)
    return kappa

In [None]:
X_train, y_train = np.array(zone_4_resampled.loc[:, zone_4_resampled.columns != 'label_3m']), np.array(zone_4_resampled["label_3m"]).astype(int)
zone_4_resampled = None
zone_7_resampled = None

X_test, y_test = np.array(zone_7.loc[:, zone_7.columns != 'label_3m']), np.array(zone_7["label_3m"]).astype(int)
zone_7 = None
zone_4 = None

In [None]:
X_train.shape

In [None]:
pbounds = {"tol": (0.000001, 1),
           "C": (0.1, 10),
           "intercept_scaling": (0, 10),
           "verbose": (0, 10),
           "max_iter": (10, 5000),
           "class_weight": (0, 2),
           "fit_intercept": (0, 2),
           "dual": (0, 2),
           "loss": (0, 2),
           "penalty" (0, 2)
          }

optimizer = BayesianOptimization(
    f=optim_function,
    pbounds=pbounds,
    random_state=1,
    verbose=2,
)

In [None]:
optimizer.maximize(n_iter=100)

In [None]:
optimizer.max