In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif
import matplotlib.pyplot as plt

In [2]:
sensors = list(range(1,6))

In [3]:
def top_10_features(X: pd.DataFrame, y: pd.DataFrame):
    selector = SelectKBest(f_classif, k=10)
    selector.fit(X, np.ravel(y))
    return list(selector.get_feature_names_out())

def get_full_data(train, test):
    return train.append(test, ignore_index=True)

def get_data_2022(folder: str):
    exclude_features = ['TIMESTAMP', 'pit_number', 'Redox_error_flag']
    data = pd.read_pickle(open(folder, 'rb'))
    return data.loc[:,~data.columns.isin(exclude_features)]

def get_data_2022_sensors(folder: str, sensor: int):
    features = [f'Redox_Avg({sensor})', f'EC_Avg({sensor})', f'Matric_potential_Avg({sensor})',
                f'Temp_T12_Avg({sensor})', 'Water_level_Avg', 'Temp_ottpls_Avg', 'BatterymV_Min',
                f'WC{sensor}', f'Redox_Avg({sensor})_sigma_b_24', f'Redox_Avg({sensor})_sigma_f_24',
                f'Redox_Avg({sensor})_sigma_b_12', f'Redox_Avg({sensor})_sigma_f_12',
                f'Wave_period_0.5({sensor})', f'Wave_period_0.7({sensor})', f'Wave_period_0.9({sensor})',
                f'Wave_period_1.1({sensor})', f'Wave_period_1.5({sensor})', f'Wave_period_1.9({sensor})',
                f'Wave_period_2.5({sensor})', f'Wave_period_3.3({sensor})', f'Wave_period_4.4({sensor})']
    data = pd.read_pickle(open(folder, 'rb'))
    return data.loc[:,features]

def print_features(features):
    last_index = 0
    while(last_index < len(features)):
        block_end_index = last_index+5
        if block_end_index < len(features):
            print(features[last_index:block_end_index])
        else:
            print(features[last_index:])
        last_index = block_end_index

def get_sensor_top_10_columns(sensor):
    # Get train and test data for 2022_sensors
    # Train X
    folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/X_train_sensor_{sensor}.pkl'
    train_X = get_data_2022_sensors(folder, sensor)
    # Test X
    folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/X_test_sensor_{sensor}.pkl'
    test_X = get_data_2022_sensors(folder, sensor)

    # Train X scaled
    folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/Scaled/X_train_scaled_sensor_{sensor}.pkl'
    train_X_scaled = get_data_2022_sensors(folder, sensor)
    # Test X scaled
    folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/Scaled/X_test_scaled_sensor_{sensor}.pkl'
    test_X_scaled = get_data_2022_sensors(folder, sensor)

    # Train y
    folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/y_train_sensor_{sensor}.pkl'
    train_y = pd.read_pickle(open(folder, 'rb'))
    # Test y
    folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/y_test_sensor_{sensor}.pkl'
    test_y = pd.read_pickle(open(folder, 'rb'))

    return top_10_features(get_full_data(train_X, test_X), get_full_data(train_y, test_y))

def get_sensor_wavelet_columns(sensor):
    return [f'Wave_period_0.5({sensor})', f'Wave_period_0.7({sensor})', f'Wave_period_0.9({sensor})', f'Wave_period_1.1({sensor})', f'Wave_period_1.5({sensor})',
     f'Wave_period_1.9({sensor})',f'Wave_period_2.5({sensor})', f'Wave_period_3.3({sensor})', f'Wave_period_4.4({sensor})']

# Support vector machine classifier (SVC)

We collected comparison results for 18 different SVC models:

1) Data from all pits and sensors with all columns
2) Data from all pits and sensors with top 10 feature columns (Described below)
3) Data from all pits and sensors with only wavelet feature columns
4) Data from all pits for each sensor (5 in total) with all sensor specific columns + 3 common columns
5) Data from all pits for each sensor with top 10 feature columns of the specific sensor (Described below)
6) Data from all pits for each sensor with only wavelet feature column related to the sensor

Models were trained using gridsearch and 10-fold-crossvalidation. We used **recall** for comparing scoring results.

Parameters for training the SVC models:

1) Kernel: Polynomial (*Linear, rbf and sigmoid kernels were trained for kernel comparison and only polynomial gave good results*)
2) Polynomial kernel degree: 3, 4, 5, 6, 7
3) Regularization parameter: 0.5, 1, 2, 3, 4, 5, 6, 7

## ============================================================

# Features

## Features for model **1**

In [4]:
folder = f'../../Data/Training/wavelet_pickle_data/2022/X_train.pkl'
train_X = get_data_2022(folder)
all_columns = list(train_X.columns.array)
print_features(all_columns)

['Redox_Avg(1)', 'Redox_Avg(2)', 'Redox_Avg(3)', 'Redox_Avg(4)', 'Redox_Avg(5)']
['Temp_T12_Avg(1)', 'EC_Avg(1)', 'Temp_T12_Avg(2)', 'EC_Avg(2)', 'Temp_T12_Avg(3)']
['EC_Avg(3)', 'Temp_T12_Avg(4)', 'EC_Avg(4)', 'Temp_T12_Avg(5)', 'EC_Avg(5)']
['Matric_potential_Avg(1)', 'Matric_potential_Avg(2)', 'Matric_potential_Avg(3)', 'Matric_potential_Avg(4)', 'Matric_potential_Avg(5)']
['Water_level_Avg', 'Temp_ottpls_Avg', 'BatterymV_Min', 'WC1', 'WC2']
['WC3', 'WC4', 'WC5', 'Wave_period_0.5(1)', 'Wave_period_0.7(1)']
['Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)', 'Wave_period_2.5(1)']
['Wave_period_3.3(1)', 'Wave_period_4.4(1)', 'Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)']
['Wave_period_1.1(2)', 'Wave_period_1.5(2)', 'Wave_period_1.9(2)', 'Wave_period_2.5(2)', 'Wave_period_3.3(2)']
['Wave_period_4.4(2)', 'Wave_period_0.5(3)', 'Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)']
['Wave_period_1.5(3)', 'Wave_period_1.9(3

## ____________________________________________________________
## Features for model **2**

In [5]:
# Get train and test data for 2022
# Train X
folder = f'../../Data/Training/wavelet_pickle_data/2022/X_train.pkl'
train_X = get_data_2022(folder)
# Test X
folder = f'../../Data/Training/wavelet_pickle_data/2022/X_test.pkl'
test_X = get_data_2022(folder)

# Train X scaled
folder = f'../../Data/Training/wavelet_pickle_data/2022/Scaled/X_train_scaled.pkl'
train_X_scaled = get_data_2022(folder)
# Test X scaled
folder = f'../../Data/Training/wavelet_pickle_data/2022/Scaled/X_test_scaled.pkl'
test_X_scaled = get_data_2022(folder)

# Train y
folder = f'../../Data/Training/wavelet_pickle_data/2022/y_train.pkl'
train_y = pd.read_pickle(open(folder, 'rb'))
# Test y
folder = f'../../Data/Training/wavelet_pickle_data/2022/y_test.pkl'
test_y = pd.read_pickle(open(folder, 'rb'))

top_10_columns = top_10_features(get_full_data(train_X, test_X), get_full_data(train_y, test_y))
print_features(top_10_columns)

['Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Redox_Avg(2)_sigma_f_24', 'Redox_Avg(3)_sigma_b_24']
['Redox_Avg(3)_sigma_f_24', 'Redox_Avg(4)_sigma_b_24', 'Redox_Avg(4)_sigma_f_24', 'Redox_Avg(5)_sigma_b_24', 'Redox_Avg(5)_sigma_f_24']


## ____________________________________________________________
## Features for model **3**

In [6]:
all_wavelet_features = ['Wave_period_0.5(1)', 'Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)', 'Wave_period_2.5(1)', 'Wave_period_3.3(1)', 'Wave_period_4.4(1)',
                        'Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)', 'Wave_period_1.9(2)', 'Wave_period_2.5(2)', 'Wave_period_3.3(2)', 'Wave_period_4.4(2)',
                        'Wave_period_0.5(3)', 'Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)', 'Wave_period_2.5(3)', 'Wave_period_3.3(3)', 'Wave_period_4.4(3)',
                        'Wave_period_0.5(4)', 'Wave_period_0.7(4)', 'Wave_period_0.9(4)', 'Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)', 'Wave_period_3.3(4)', 'Wave_period_4.4(4)',
                        'Wave_period_0.5(5)', 'Wave_period_0.7(5)', 'Wave_period_0.9(5)', 'Wave_period_1.1(5)', 'Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Wave_period_3.3(5)', 'Wave_period_4.4(5)']

print_features(all_wavelet_features)

['Wave_period_0.5(1)', 'Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)']
['Wave_period_1.9(1)', 'Wave_period_2.5(1)', 'Wave_period_3.3(1)', 'Wave_period_4.4(1)', 'Wave_period_0.5(2)']
['Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)', 'Wave_period_1.9(2)']
['Wave_period_2.5(2)', 'Wave_period_3.3(2)', 'Wave_period_4.4(2)', 'Wave_period_0.5(3)', 'Wave_period_0.7(3)']
['Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)', 'Wave_period_2.5(3)']
['Wave_period_3.3(3)', 'Wave_period_4.4(3)', 'Wave_period_0.5(4)', 'Wave_period_0.7(4)', 'Wave_period_0.9(4)']
['Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)', 'Wave_period_3.3(4)']
['Wave_period_4.4(4)', 'Wave_period_0.5(5)', 'Wave_period_0.7(5)', 'Wave_period_0.9(5)', 'Wave_period_1.1(5)']
['Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Wave_period_3.3(5)', 'Wave_period_4.4(5)']


## ____________________________________________________________
## Features for models **4**

Sensor specified data excludes features that are related to other sensors. Though they have 3 common features (*Water_level_Avg*, *Temp_ottpls_Avg* and *BatterymV_Min*).

The number in parenthesis describes the sensor related feature, with exception in feature *WC*, where the number is not inside parenthesis

### Sensor 1

In [7]:
folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/X_train_sensor_{1}.pkl'
train_X = get_data_2022_sensors(folder, 1)
print_features(list(train_X.columns.array))

['Redox_Avg(1)', 'EC_Avg(1)', 'Matric_potential_Avg(1)', 'Temp_T12_Avg(1)', 'Water_level_Avg']
['Temp_ottpls_Avg', 'BatterymV_Min', 'WC1', 'Redox_Avg(1)_sigma_b_24', 'Redox_Avg(1)_sigma_f_24']
['Redox_Avg(1)_sigma_b_12', 'Redox_Avg(1)_sigma_f_12', 'Wave_period_0.5(1)', 'Wave_period_0.7(1)', 'Wave_period_0.9(1)']
['Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)', 'Wave_period_2.5(1)', 'Wave_period_3.3(1)']
['Wave_period_4.4(1)']


### Sensor 2

In [8]:
folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/X_train_sensor_{2}.pkl'
train_X = get_data_2022_sensors(folder, 2)
print_features(list(train_X.columns.array))

['Redox_Avg(2)', 'EC_Avg(2)', 'Matric_potential_Avg(2)', 'Temp_T12_Avg(2)', 'Water_level_Avg']
['Temp_ottpls_Avg', 'BatterymV_Min', 'WC2', 'Redox_Avg(2)_sigma_b_24', 'Redox_Avg(2)_sigma_f_24']
['Redox_Avg(2)_sigma_b_12', 'Redox_Avg(2)_sigma_f_12', 'Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)']
['Wave_period_1.1(2)', 'Wave_period_1.5(2)', 'Wave_period_1.9(2)', 'Wave_period_2.5(2)', 'Wave_period_3.3(2)']
['Wave_period_4.4(2)']


### Sensor 3

In [9]:
folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/X_train_sensor_{3}.pkl'
train_X = get_data_2022_sensors(folder, 3)
print_features(list(train_X.columns.array))

['Redox_Avg(3)', 'EC_Avg(3)', 'Matric_potential_Avg(3)', 'Temp_T12_Avg(3)', 'Water_level_Avg']
['Temp_ottpls_Avg', 'BatterymV_Min', 'WC3', 'Redox_Avg(3)_sigma_b_24', 'Redox_Avg(3)_sigma_f_24']
['Redox_Avg(3)_sigma_b_12', 'Redox_Avg(3)_sigma_f_12', 'Wave_period_0.5(3)', 'Wave_period_0.7(3)', 'Wave_period_0.9(3)']
['Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)', 'Wave_period_2.5(3)', 'Wave_period_3.3(3)']
['Wave_period_4.4(3)']


### Sensor 4

In [10]:
folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/X_train_sensor_{4}.pkl'
train_X = get_data_2022_sensors(folder, 4)
print_features(list(train_X.columns.array))

['Redox_Avg(4)', 'EC_Avg(4)', 'Matric_potential_Avg(4)', 'Temp_T12_Avg(4)', 'Water_level_Avg']
['Temp_ottpls_Avg', 'BatterymV_Min', 'WC4', 'Redox_Avg(4)_sigma_b_24', 'Redox_Avg(4)_sigma_f_24']
['Redox_Avg(4)_sigma_b_12', 'Redox_Avg(4)_sigma_f_12', 'Wave_period_0.5(4)', 'Wave_period_0.7(4)', 'Wave_period_0.9(4)']
['Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)', 'Wave_period_3.3(4)']
['Wave_period_4.4(4)']


### Sensor 5

In [11]:
folder = f'../../Data/Training/wavelet_pickle_data/2022_sensors/X_train_sensor_{5}.pkl'
train_X = get_data_2022_sensors(folder, 5)
print_features(list(train_X.columns.array))

['Redox_Avg(5)', 'EC_Avg(5)', 'Matric_potential_Avg(5)', 'Temp_T12_Avg(5)', 'Water_level_Avg']
['Temp_ottpls_Avg', 'BatterymV_Min', 'WC5', 'Redox_Avg(5)_sigma_b_24', 'Redox_Avg(5)_sigma_f_24']
['Redox_Avg(5)_sigma_b_12', 'Redox_Avg(5)_sigma_f_12', 'Wave_period_0.5(5)', 'Wave_period_0.7(5)', 'Wave_period_0.9(5)']
['Wave_period_1.1(5)', 'Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Wave_period_3.3(5)']
['Wave_period_4.4(5)']


## ____________________________________________________________
## Features for models **5**

### Sensor 1

In [12]:
print_features(get_sensor_top_10_columns(1))

['Water_level_Avg', 'Redox_Avg(1)_sigma_b_24', 'Redox_Avg(1)_sigma_f_24', 'Redox_Avg(1)_sigma_f_12', 'Wave_period_0.5(1)']
['Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)']


### Sensor 2

In [13]:
print_features(get_sensor_top_10_columns(2))

['Redox_Avg(2)', 'Redox_Avg(2)_sigma_b_24', 'Redox_Avg(2)_sigma_f_24', 'Redox_Avg(2)_sigma_b_12', 'Redox_Avg(2)_sigma_f_12']
['Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)']


### Sensor 3

In [14]:
print_features(get_sensor_top_10_columns(3))

['Redox_Avg(3)_sigma_b_24', 'Redox_Avg(3)_sigma_f_24', 'Redox_Avg(3)_sigma_b_12', 'Redox_Avg(3)_sigma_f_12', 'Wave_period_0.5(3)']
['Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)']


### Sensor 4

In [15]:
print_features(get_sensor_top_10_columns(4))

['Redox_Avg(4)_sigma_b_24', 'Redox_Avg(4)_sigma_f_24', 'Redox_Avg(4)_sigma_b_12', 'Redox_Avg(4)_sigma_f_12', 'Wave_period_0.7(4)']
['Wave_period_0.9(4)', 'Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)']


### Sensor 5

In [16]:
print_features(get_sensor_top_10_columns(5))

['Redox_Avg(5)_sigma_b_24', 'Redox_Avg(5)_sigma_f_24', 'Redox_Avg(5)_sigma_b_12', 'Redox_Avg(5)_sigma_f_12', 'Wave_period_0.7(5)']
['Wave_period_0.9(5)', 'Wave_period_1.1(5)', 'Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)']


## ____________________________________________________________
## Features for models **6**

### Sensor 1

In [17]:
print_features(get_sensor_wavelet_columns(1))

['Wave_period_0.5(1)', 'Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)']
['Wave_period_1.9(1)', 'Wave_period_2.5(1)', 'Wave_period_3.3(1)', 'Wave_period_4.4(1)']


### Sensor 2

In [18]:
print_features(get_sensor_wavelet_columns(2))

['Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)']
['Wave_period_1.9(2)', 'Wave_period_2.5(2)', 'Wave_period_3.3(2)', 'Wave_period_4.4(2)']


### Sensor 3

In [19]:
print_features(get_sensor_wavelet_columns(3))

['Wave_period_0.5(3)', 'Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)']
['Wave_period_1.9(3)', 'Wave_period_2.5(3)', 'Wave_period_3.3(3)', 'Wave_period_4.4(3)']


### Sensor 4

In [20]:
print_features(get_sensor_wavelet_columns(4))

['Wave_period_0.5(4)', 'Wave_period_0.7(4)', 'Wave_period_0.9(4)', 'Wave_period_1.1(4)', 'Wave_period_1.5(4)']
['Wave_period_1.9(4)', 'Wave_period_2.5(4)', 'Wave_period_3.3(4)', 'Wave_period_4.4(4)']


### Sensor 5

In [21]:
print_features(get_sensor_wavelet_columns(5))

['Wave_period_0.5(5)', 'Wave_period_0.7(5)', 'Wave_period_0.9(5)', 'Wave_period_1.1(5)', 'Wave_period_1.5(5)']
['Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Wave_period_3.3(5)', 'Wave_period_4.4(5)']


## ============================================================

# Gridsearch results

In [22]:
def get_top_15_results(df):
    columns_to_show = ['rank_test_score', 'mean_fit_time', 'mean_score_time', 'param_C', 'param_degree', 'mean_test_score', 'mean_train_score']
    return df.loc[:,columns_to_show].sort_values(by=['rank_test_score', 'mean_score_time']).head(15)

In [23]:
all_feature_folder = './results/all_features'
fs_folder = './results/feature_selection'
wavelet_feature_folder = './results/wavelet_features'

all_features = pd.read_csv(f'{all_feature_folder}/Results.csv')
all_features_sensor_1 = pd.read_csv(f'{all_feature_folder}/Results_1.csv')
all_features_sensor_2 = pd.read_csv(f'{all_feature_folder}/Results_2.csv')
all_features_sensor_3 = pd.read_csv(f'{all_feature_folder}/Results_3.csv')
all_features_sensor_4 = pd.read_csv(f'{all_feature_folder}/Results_4.csv')
all_features_sensor_5 = pd.read_csv(f'{all_feature_folder}/Results_5.csv')

fs = pd.read_csv(f'{fs_folder}/Results_fs.csv')
fs_sensor_1 = pd.read_csv(f'{fs_folder}/Results_fs_1.csv')
fs_sensor_2 = pd.read_csv(f'{fs_folder}/Results_fs_2.csv')
fs_sensor_3 = pd.read_csv(f'{fs_folder}/Results_fs_3.csv')
fs_sensor_4 = pd.read_csv(f'{fs_folder}/Results_fs_4.csv')
fs_sensor_5 = pd.read_csv(f'{fs_folder}/Results_fs_5.csv')

wavelet_features = pd.read_csv(f'{wavelet_feature_folder}/Results.csv')

# Top 15 results respect to rank and mean score time for all models

## Model 1

In [24]:
get_top_15_results(all_features)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
38,1,123.104748,0.118831,7.0,6,0.99962,0.999809
24,1,51.964369,0.127437,4.0,7,0.99962,0.999809
39,1,91.337252,0.141765,7.0,7,0.99962,0.999809
34,1,136.261474,0.141926,6.0,7,0.99962,0.999809
33,1,38.086161,0.147192,6.0,6,0.99962,0.999809
37,1,79.804713,0.152276,7.0,5,0.99962,0.999809
19,1,97.623018,0.152327,3.0,7,0.99962,0.999809
28,1,236.45275,0.158126,5.0,6,0.99962,0.999809
29,1,6.220708,0.159427,5.0,7,0.99962,0.999809
14,1,113.081432,0.161844,2.0,7,0.99962,0.999809


## ____________________________________________________________
## Model 2

In [25]:
get_top_15_results(fs)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
34,1,127.149871,0.10658,6.0,7,0.99962,0.999809
33,1,33.585202,0.116013,6.0,6,0.99962,0.999809
39,1,106.288663,0.116825,7.0,7,0.99962,0.999809
32,1,45.431989,0.135149,6.0,5,0.99962,0.999809
24,1,60.216903,0.139693,4.0,7,0.99962,0.999809
19,1,111.499485,0.140181,3.0,7,0.99962,0.999809
29,1,6.479079,0.14488,5.0,7,0.99962,0.999809
28,1,236.054998,0.147361,5.0,6,0.99962,0.999809
23,1,81.64827,0.156475,4.0,6,0.99962,0.999809
37,1,85.061224,0.164956,7.0,5,0.99962,0.999809


## ____________________________________________________________
## Model 3

In [26]:
get_top_15_results(wavelet_features)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
35,1,114.341199,2.876744,7.0,3,0.908133,0.909304
30,2,119.147689,3.372667,6.0,3,0.903178,0.904836
36,3,125.534713,2.931069,7.0,4,0.901488,0.902952
31,4,128.681254,3.142704,6.0,4,0.898431,0.899976
25,5,112.41375,3.553409,5.0,3,0.8964,0.897647
26,6,124.362748,3.122371,5.0,4,0.894992,0.895995
37,7,165.830156,3.160644,7.0,5,0.890402,0.89302
20,8,124.686549,3.509432,4.0,3,0.889831,0.891071
21,9,130.392886,3.456385,4.0,4,0.888333,0.890001
15,10,124.905769,3.723545,3.0,3,0.884602,0.886136


## ____________________________________________________________
## Models 4

### Sensor 1

In [27]:
get_top_15_results(all_features_sensor_1)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
29,1,4.21273,0.122558,5.0,7,0.998658,0.998878
24,1,4.317266,0.139929,4.0,7,0.998658,0.998878
38,1,5.075428,0.150097,7.0,6,0.998658,0.998878
19,1,5.048591,0.166005,3.0,7,0.998658,0.998878
33,1,4.885369,0.168176,6.0,6,0.998658,0.998878
28,1,5.065658,0.177451,5.0,6,0.998658,0.998878
14,1,5.513325,0.194381,2.0,7,0.998658,0.998846
23,1,5.717787,0.214163,4.0,6,0.998658,0.998846
39,9,3.93015,0.108372,7.0,7,0.998379,0.999095
34,9,4.425374,0.123098,6.0,7,0.998379,0.998878


### Sensor 2

In [28]:
get_top_15_results(all_features_sensor_2)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
39,1,11.938227,0.422594,7.0,7,0.999054,0.999289
34,2,11.594128,0.517445,6.0,7,0.99886,0.999055
29,3,10.728299,0.470519,5.0,7,0.998188,0.998439
24,4,11.156204,0.505094,4.0,7,0.995038,0.995498
38,5,10.757798,0.459781,7.0,6,0.991222,0.991655
19,6,11.248387,0.496583,3.0,7,0.989701,0.989956
33,7,11.385592,0.526426,6.0,6,0.988843,0.98916
28,8,11.30788,0.492088,5.0,6,0.987399,0.987748
14,9,11.240071,0.535626,2.0,7,0.986837,0.986952
23,10,11.618448,0.539239,4.0,6,0.986362,0.986252


### Sensor 3

In [29]:
get_top_15_results(all_features_sensor_3)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
39,1,12.736291,0.43636,7.0,7,0.994421,0.994556
34,2,13.840424,0.562304,6.0,7,0.992413,0.992319
29,3,13.500764,0.5684,5.0,7,0.987856,0.988592
24,4,13.390171,0.560674,4.0,7,0.985762,0.985809
38,5,14.656201,0.612222,7.0,6,0.984328,0.984295
19,6,39.54979,0.649568,3.0,7,0.984328,0.984274
14,7,15.015268,0.711681,2.0,7,0.983474,0.983408
23,7,18.32084,0.736331,4.0,6,0.983474,0.983355
18,9,19.963502,0.84996,3.0,6,0.983384,0.983355
33,10,15.536695,0.637195,6.0,6,0.983372,0.983744


### Sensor 4

In [30]:
get_top_15_results(all_features_sensor_4)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
39,1,8.583789,0.375565,7.0,7,0.998766,0.998973
34,2,9.125592,0.422592,6.0,7,0.998292,0.998591
29,3,20.634217,0.386662,5.0,7,0.997355,0.997521
24,4,9.76928,0.511704,4.0,7,0.994531,0.994757
38,5,9.592971,0.506578,7.0,6,0.993395,0.993687
33,6,11.049505,0.548567,6.0,6,0.990728,0.990764
19,7,9.223914,0.497079,3.0,7,0.989767,0.990097
28,8,12.050399,0.644176,5.0,6,0.987674,0.988128
23,9,11.942999,0.59929,4.0,6,0.984618,0.985089
14,10,11.63198,0.601896,2.0,7,0.984333,0.984528


### Sensor 5

In [31]:
get_top_15_results(all_features_sensor_5)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
29,1,8.550238,0.380329,5.0,7,0.999611,0.999788
39,2,8.300963,0.303149,7.0,7,0.999517,0.999799
34,2,8.96076,0.379499,6.0,7,0.999517,0.999767
24,4,7.562117,0.387641,4.0,7,0.999512,0.999777
38,5,7.7727,0.340622,7.0,6,0.999138,0.999597
19,6,9.148946,0.46777,3.0,7,0.998677,0.998876
33,7,7.450413,0.377065,6.0,6,0.998482,0.998686
28,8,8.069625,0.413956,5.0,6,0.996045,0.996334
14,9,9.730457,0.453536,2.0,7,0.992731,0.993094
23,10,9.829638,0.508425,4.0,6,0.992539,0.993083


## ____________________________________________________________
## Models 5

### Sensor 1

In [32]:
get_top_15_results(fs_sensor_1)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
29,1,3.821746,0.121353,5.0,7,0.998658,0.998878
38,1,4.408928,0.14319,7.0,6,0.998658,0.998878
24,1,4.557851,0.146652,4.0,7,0.998658,0.998878
33,1,4.397096,0.14861,6.0,6,0.998658,0.998878
19,1,4.837046,0.155391,3.0,7,0.998658,0.998878
23,1,4.694936,0.161144,4.0,6,0.998658,0.998846
28,1,5.178317,0.179538,5.0,6,0.998658,0.998878
14,1,5.053034,0.187033,2.0,7,0.998658,0.998846
39,9,4.233595,0.10015,7.0,7,0.998379,0.999095
34,9,4.386043,0.132261,6.0,7,0.998379,0.998878


### Sensor 2

In [33]:
get_top_15_results(fs_sensor_2)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
39,1,11.29352,0.437402,7.0,7,0.999054,0.999289
34,2,11.632374,0.523365,6.0,7,0.99886,0.999055
29,3,11.0496,0.491389,5.0,7,0.998188,0.998439
24,4,10.429001,0.471275,4.0,7,0.995038,0.995498
38,5,10.727471,0.448617,7.0,6,0.991222,0.991655
19,6,11.749794,0.530957,3.0,7,0.989701,0.989956
33,7,10.673042,0.481197,6.0,6,0.988843,0.98916
28,8,11.993374,0.504819,5.0,6,0.987399,0.987748
14,9,11.607737,0.55377,2.0,7,0.986837,0.986952
23,10,12.521834,0.529539,4.0,6,0.986362,0.986252


### Sensor 3

In [34]:
get_top_15_results(fs_sensor_3)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
39,1,11.790994,0.405391,7.0,7,0.994421,0.994556
34,2,13.519435,0.556344,6.0,7,0.992413,0.992319
29,3,13.485222,0.535589,5.0,7,0.987856,0.988592
24,4,14.263181,0.596755,4.0,7,0.985762,0.985809
38,5,15.803521,0.680352,7.0,6,0.984328,0.984295
19,6,38.509166,0.678193,3.0,7,0.984328,0.984274
23,7,17.418677,0.717589,4.0,6,0.983474,0.983355
14,7,16.007563,0.797846,2.0,7,0.983474,0.983408
18,9,20.493532,0.929041,3.0,6,0.983384,0.983355
33,10,16.259736,0.723118,6.0,6,0.983372,0.983744


### Sensor 4

In [35]:
get_top_15_results(fs_sensor_4)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
39,1,8.494506,0.368542,7.0,7,0.998766,0.998973
34,2,9.405278,0.454533,6.0,7,0.998292,0.998591
29,3,28.436437,0.418196,5.0,7,0.997355,0.997521
24,4,9.316304,0.478857,4.0,7,0.994531,0.994757
38,5,9.621225,0.500019,7.0,6,0.993395,0.993687
33,6,11.65594,0.580712,6.0,6,0.990728,0.990764
19,7,10.748304,0.532347,3.0,7,0.989767,0.990097
28,8,10.755406,0.547676,5.0,6,0.987674,0.988128
23,9,11.630484,0.583877,4.0,6,0.984618,0.985089
14,10,11.24166,0.618988,2.0,7,0.984333,0.984528


### Sensor 5

In [36]:
get_top_15_results(fs_sensor_5)

Unnamed: 0,rank_test_score,mean_fit_time,mean_score_time,param_C,param_degree,mean_test_score,mean_train_score
29,1,8.116917,0.354085,5.0,7,0.999611,0.999788
39,2,9.022486,0.326731,7.0,7,0.999517,0.999799
34,2,8.782249,0.370753,6.0,7,0.999517,0.999767
24,4,7.797127,0.370269,4.0,7,0.999512,0.999777
38,5,7.44376,0.343063,7.0,6,0.999138,0.999597
19,6,8.591073,0.453073,3.0,7,0.998677,0.998876
33,7,7.811319,0.365044,6.0,6,0.998482,0.998686
28,8,8.62925,0.430468,5.0,6,0.996045,0.996334
14,9,9.938772,0.540422,2.0,7,0.992731,0.993094
23,10,9.380794,0.467023,4.0,6,0.992539,0.993083


## ============================================================

# Best model parameters respect to rank_test_score and mean_score_time

In [37]:
def get_best_model_parameters(df):
    best_params_df = df.loc[df['rank_test_score']==1].sort_values(by=['mean_score_time']).iloc[0][['param_degree', 'param_C']]
    degree = best_params_df[['param_degree']].values[0]
    c = best_params_df[['param_C']].values[0]
    return f'Degree: {degree}\nC: {c}'

## Model 1

In [38]:
print(get_best_model_parameters(all_features))

Degree: 6
C: 7.0


## ____________________________________________________________
## Model 2

In [39]:
print(get_best_model_parameters(fs))

Degree: 7
C: 6.0


## ____________________________________________________________
## Model 3

In [40]:
print(get_best_model_parameters(wavelet_features))

Degree: 3
C: 7.0


## ____________________________________________________________
## Models 4

### Sensor 1

In [41]:
print(get_best_model_parameters(all_features_sensor_1))

Degree: 7
C: 5.0


### Sensor 2

In [42]:
print(get_best_model_parameters(all_features_sensor_2))

Degree: 7
C: 7.0


### Sensor 3

In [43]:
print(get_best_model_parameters(all_features_sensor_3))

Degree: 7
C: 7.0


### Sensor 4

In [44]:
print(get_best_model_parameters(all_features_sensor_4))

Degree: 7
C: 7.0


### Sensor 5

In [45]:
print(get_best_model_parameters(all_features_sensor_5))

Degree: 7
C: 5.0


## ____________________________________________________________
## Models 5

### Sensor 1

In [46]:
print(get_best_model_parameters(fs_sensor_1))

Degree: 7
C: 5.0


### Sensor 2

In [47]:
print(get_best_model_parameters(fs_sensor_2))

Degree: 7
C: 7.0


### Sensor 3

In [48]:
print(get_best_model_parameters(fs_sensor_3))

Degree: 7
C: 7.0


### Sensor 4

In [49]:
print(get_best_model_parameters(fs_sensor_4))

Degree: 7
C: 7.0


### Sensor 5

In [50]:
print(get_best_model_parameters(fs_sensor_5))

Degree: 7
C: 5.0


## ____________________________________________________________
## Models 6

### Sensor 1

### Sensor 2

### Sensor 3

### Sensor 4

### Sensor 5