In [1]:
from sklearn.svm import SVC
import pandas as pd
import numpy as np
import pickle

In [2]:
wavelet_data = '../../Data/Training/wavelet_pickle_data'
sensors = list(range(1,6))

# Load Full data

In [3]:
# Main folders
folder_2022 = f'{wavelet_data}/2022'
scaled_2022 = f'{folder_2022}/Scaled'

# 2022 Folder
X_train_scaled_2022_folder = f'{scaled_2022}/X_train_scaled.pkl'
y_train_2022_folder = f'{folder_2022}/y_train.pkl'

# 2022 data
X_train_scaled_2022 = pd.read_pickle(open(X_train_scaled_2022_folder, 'rb'))
y_train_2022 = pd.read_pickle(open(y_train_2022_folder, 'rb'))

# Load sensor data

In [4]:
sensor_data = dict()

# Main folders
folder_2022_sensors = f'{wavelet_data}/2022_sensors'
scaled_2022_sensors = f'{folder_2022_sensors}/Scaled'

for sensor in sensors:
    # 2022 sensors folder
    X_train_scaled_2022_sensor_folder = f'{scaled_2022_sensors}/X_train_scaled_sensor_{sensor}.pkl'
    y_train_2022_sensor_folder = f'{folder_2022_sensors}/y_train_sensor_{sensor}.pkl'

    # 2022 sensors data
    X_train_scaled_2022_sensor = pd.read_pickle(open(X_train_scaled_2022_sensor_folder, 'rb'))
    y_train_2022_sensor = pd.read_pickle(open(y_train_2022_sensor_folder, 'rb'))

    sensor_data[f'sensor_{sensor}'] = {
        "X_train_scaled": X_train_scaled_2022_sensor,
        "y_train": y_train_2022_sensor,
    }

# Model training

### Training helper functions

In [5]:
# Columns for full data training
removable_full_columns = ['Redox_error_flag', 'pit_number', 'TIMESTAMP']
top_10_features = ['Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Redox_Avg(2)_sigma_f_24', 'Redox_Avg(3)_sigma_b_24',
                   'Redox_Avg(3)_sigma_f_24', 'Redox_Avg(4)_sigma_b_24', 'Redox_Avg(4)_sigma_f_24', 'Redox_Avg(5)_sigma_b_24', 'Redox_Avg(5)_sigma_f_24']
full_wavelet_columns = ['Wave_period_0.5(1)', 'Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)', 'Wave_period_2.5(1)', 'Wave_period_3.3(1)', 'Wave_period_4.4(1)',
                        'Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)', 'Wave_period_1.9(2)', 'Wave_period_2.5(2)', 'Wave_period_3.3(2)', 'Wave_period_4.4(2)',
                        'Wave_period_0.5(3)', 'Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)', 'Wave_period_2.5(3)', 'Wave_period_3.3(3)', 'Wave_period_4.4(3)',
                        'Wave_period_0.5(4)', 'Wave_period_0.7(4)', 'Wave_period_0.9(4)', 'Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)', 'Wave_period_3.3(4)', 'Wave_period_4.4(4)',
                        'Wave_period_0.5(5)', 'Wave_period_0.7(5)', 'Wave_period_0.9(5)', 'Wave_period_1.1(5)', 'Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Wave_period_3.3(5)', 'Wave_period_4.4(5)']

# Columns for sensor data training
def get_removable_sensor_columns(sensor):
    return [f'Redox_error_flag({sensor})', 'Redox_error_flag', 'pit_number', 'TIMESTAMP']

top_10_sensor_features = {
    "sensor_1": ['Water_level_Avg', 'Redox_Avg(1)_sigma_b_24', 'Redox_Avg(1)_sigma_f_24', 'Redox_Avg(1)_sigma_f_12', 'Wave_period_0.5(1)',
                 'Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)'],
    "sensor_2": ['Redox_Avg(2)', 'Redox_Avg(2)_sigma_b_24', 'Redox_Avg(2)_sigma_f_24', 'Redox_Avg(2)_sigma_b_12', 'Redox_Avg(2)_sigma_f_12',
                 'Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)'],
    "sensor_3": ['Redox_Avg(3)_sigma_b_24', 'Redox_Avg(3)_sigma_f_24', 'Redox_Avg(3)_sigma_b_12', 'Redox_Avg(3)_sigma_f_12', 'Wave_period_0.5(3)',
                 'Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)'],
    "sensor_4": ['Redox_Avg(4)_sigma_b_24', 'Redox_Avg(4)_sigma_f_24', 'Redox_Avg(4)_sigma_b_12', 'Redox_Avg(4)_sigma_f_12', 'Wave_period_0.7(4)',
                 'Wave_period_0.9(4)', 'Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)'],
    "sensor_5": ['Redox_Avg(5)_sigma_b_24', 'Redox_Avg(5)_sigma_f_24', 'Redox_Avg(5)_sigma_b_12', 'Redox_Avg(5)_sigma_f_12', 'Wave_period_0.7(5)',
                 'Wave_period_0.9(5)', 'Wave_period_1.1(5)', 'Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)']
}

# Random state
rs = 0

# Best model parameters
best_params = dict()
best_params['2022_full'] = {"degree": 6, "C": 7}
best_params['2022_fs'] = {"degree": 7, "C": 6}
best_params['2022_wavelet'] = {"degree": 3, "C": 7}
best_params['sensor_1'] = {"degree": 7, "C": 5}
best_params['sensor_2'] = {"degree": 7, "C": 7}
best_params['sensor_3'] = {"degree": 7, "C": 7}
best_params['sensor_4'] = {"degree": 7, "C": 7}
best_params['sensor_5'] = {"degree": 7, "C": 5}
best_params['sensor_1_fs'] = {"degree": 7, "C": 5}
best_params['sensor_2_fs'] = {"degree": 7, "C": 7}
best_params['sensor_3_fs'] = {"degree": 7, "C": 7}
best_params['sensor_4_fs'] = {"degree": 7, "C": 7}
best_params['sensor_5_fs'] = {"degree": 7, "C": 5}

### 2022 full

In [6]:
svc_2022_full = SVC(kernel="poly", random_state=rs, degree=best_params['2022_full']['degree'], C=best_params['2022_full']['C'])
svc_2022_full.fit(X_train_scaled_2022.loc[:, ~X_train_scaled_2022.columns.isin(removable_full_columns)], np.ravel(y_train_2022))
pickle.dump(svc_2022_full, open('./svc_2022_full.pkl', 'wb'))

### 2022 feature selection

In [7]:
svc_2022_fs = SVC(kernel="poly", random_state=rs, degree=best_params['2022_fs']['degree'], C=best_params['2022_fs']['C'])
svc_2022_fs.fit(X_train_scaled_2022.loc[:, top_10_features], np.ravel(y_train_2022))
pickle.dump(svc_2022_fs, open('./svc_2022_fs.pkl', 'wb'))

### 2022 wavelet

In [8]:
svc_2022_wavelet = SVC(kernel="poly", random_state=rs, degree=best_params['2022_wavelet']['degree'], C=best_params['2022_wavelet']['C'])
svc_2022_wavelet.fit(X_train_scaled_2022.loc[:, full_wavelet_columns], np.ravel(y_train_2022))
pickle.dump(svc_2022_wavelet, open('./svc_2022_wavelet.pkl', 'wb'))

### 2022 sensor

In [9]:
for sensor in sensors:
    scaled_training_data = sensor_data[f'sensor_{sensor}']['X_train_scaled']
    training_target_data = sensor_data[f'sensor_{sensor}']['y_train']
    removable_sensors_columns = get_removable_sensor_columns(sensor)

    svc_sensor = SVC(kernel="poly", random_state=rs, degree=best_params[f'sensor_{sensor}']['degree'], C=best_params[f'sensor_{sensor}']['C'])
    svc_sensor.fit(scaled_training_data.loc[:, ~scaled_training_data.columns.isin(removable_sensors_columns)], np.ravel(training_target_data))

    model_name = f'svc_sensor_{sensor}.pkl'
    pickle.dump(svc_sensor, open(model_name, 'wb'))

### 2022 sensor feature selection

In [10]:
for sensor in sensors:
    scaled_training_data = sensor_data[f'sensor_{sensor}']['X_train_scaled']
    training_target_data = sensor_data[f'sensor_{sensor}']['y_train']
    fs_columns = top_10_sensor_features[f'sensor_{sensor}']

    svc_fs = SVC(kernel="poly", random_state=rs, degree=best_params[f'sensor_{sensor}_fs']['degree'], C=best_params[f'sensor_{sensor}_fs']['C'])
    svc_fs.fit(scaled_training_data.loc[:, fs_columns], np.ravel(training_target_data))

    model_name = f'svc_fs_sensor_{sensor}.pkl'
    pickle.dump(svc_fs, open(model_name, 'wb'))