In [1]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

In [2]:
wavelet_data = '../../Data/Training/wavelet_pickle_data'
sensors = list(range(1,6))

# Load Full data

In [3]:
# Main folders
folder_2022 = f'{wavelet_data}/2022'
scaled_2022 = f'{folder_2022}/Scaled'

# 2022 Folder
X_train_scaled_2022_folder = f'{scaled_2022}/X_train_scaled.pkl'
X_test_scaled_2022_folder = f'{scaled_2022}/X_test_scaled.pkl'
X_test_2022_folder = f'{folder_2022}/X_test.pkl'
y_train_2022_folder = f'{folder_2022}/y_train.pkl'
y_test_2022_folder = f'{folder_2022}/y_test.pkl'

# 2022 data
X_train_scaled_2022 = pd.read_pickle(open(X_train_scaled_2022_folder, 'rb'))
X_test_scaled_2022 = pd.read_pickle(open(X_test_scaled_2022_folder, 'rb'))
X_test_2022 = pd.read_pickle(open(X_test_2022_folder, 'rb'))
y_train_2022 = pd.read_pickle(open(y_train_2022_folder, 'rb'))
y_test_2022 = pd.read_pickle(open(y_test_2022_folder, 'rb'))

# 2023 folders
test_scaled_2023_folder = f'{wavelet_data}/2023/Scaled/test_scaled.pkl'
test_2023_folder = f'{wavelet_data}/2023/test.pkl'

# 2023 data
test_scaled_2023 = pd.read_pickle(open(test_scaled_2023_folder, 'rb'))
test_2023 = pd.read_pickle(open(test_2023_folder, 'rb'))

# Load sensor data

In [4]:
sensor_data = dict()

# Main folders
folder_2022_sensors = f'{wavelet_data}/2022_sensors'
scaled_2022_sensors = f'{folder_2022_sensors}/Scaled'

for sensor in sensors:
    # 2022 sensors folder
    X_train_scaled_2022_sensor_folder = f'{scaled_2022_sensors}/X_train_scaled_sensor_{sensor}.pkl'
    X_test_scaled_2022_sensor_folder = f'{scaled_2022_sensors}/X_test_scaled_sensor_{sensor}.pkl'
    X_test_2022_sensor_folder = f'{folder_2022_sensors}/X_test_sensor_{sensor}.pkl'
    y_train_2022_sensor_folder = f'{folder_2022_sensors}/y_train_sensor_{sensor}.pkl'
    y_test_2022_sensor_folder = f'{folder_2022_sensors}/y_test_sensor_{sensor}.pkl'

    # 2022 sensors data
    X_train_scaled_2022_sensor = pd.read_pickle(open(X_train_scaled_2022_sensor_folder, 'rb'))
    X_test_scaled_2022_sensor = pd.read_pickle(open(X_test_scaled_2022_sensor_folder, 'rb'))
    X_test_2022_sensor = pd.read_pickle(open(X_test_2022_sensor_folder, 'rb'))
    y_train_2022_sensor = pd.read_pickle(open(y_train_2022_sensor_folder, 'rb'))
    y_test_2022_sensor = pd.read_pickle(open(y_test_2022_sensor_folder, 'rb'))

    # 2023 sensors folders
    test_scaled_2023_sensor_folder = f'{wavelet_data}/2023_sensors/Scaled/test_scaled_sensor_{sensor}.pkl'
    test_2023_sensor_folder = f'{wavelet_data}/2023_sensors/test_sensor_{sensor}.pkl'

    # 2023 sensors data
    test_scaled_2023_sensor = pd.read_pickle(open(test_scaled_2023_sensor_folder, 'rb'))
    test_2023_sensor = pd.read_pickle(open(test_2023_sensor_folder, 'rb'))

    sensor_data[f'sensor_{sensor}'] = {
        "X_train_scaled": X_train_scaled_2022_sensor,
        "X_test_scaled": X_test_scaled_2022_sensor,
        "X_test": X_test_2022_sensor,
        "y_train": y_train_2022_sensor,
        "y_test": y_test_2022_sensor,
        "test_scaled_2023": test_scaled_2023_sensor,
        "test_2023": test_2023_sensor
    }

# Model training

### Training helper functions

In [None]:
# Columns for full data training
removable_full_columns = ['Redox_error_flag', 'pit_number', 'TIMESTAMP']
top_10_features = ['Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Redox_Avg(2)_sigma_f_24', 'Redox_Avg(3)_sigma_b_24',
                   'Redox_Avg(3)_sigma_f_24', 'Redox_Avg(4)_sigma_b_24', 'Redox_Avg(4)_sigma_f_24', 'Redox_Avg(5)_sigma_b_24', 'Redox_Avg(5)_sigma_f_24']
full_wavelet_columns = ['Wave_period_0.5(1)', 'Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)', 'Wave_period_2.5(1)', 'Wave_period_3.3(1)', 'Wave_period_4.4(1)',
                        'Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)', 'Wave_period_1.9(2)', 'Wave_period_2.5(2)', 'Wave_period_3.3(2)', 'Wave_period_4.4(2)',
                        'Wave_period_0.5(3)', 'Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)', 'Wave_period_2.5(3)', 'Wave_period_3.3(3)', 'Wave_period_4.4(3)',
                        'Wave_period_0.5(4)', 'Wave_period_0.7(4)', 'Wave_period_0.9(4)', 'Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)', 'Wave_period_3.3(4)', 'Wave_period_4.4(4)',
                        'Wave_period_0.5(5)', 'Wave_period_0.7(5)', 'Wave_period_0.9(5)', 'Wave_period_1.1(5)', 'Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)', 'Wave_period_3.3(5)', 'Wave_period_4.4(5)']

# Columns for sensor data training
def get_removable_sensor_columns(sensor):
    return [f'Redox_error_flag({sensor})', 'Redox_error_flag', 'pit_number', 'TIMESTAMP']
def get_wavelet_columns(sensor):
    return [f'Wave_period_0.5({sensor})', f'Wave_period_0.7({sensor})', f'Wave_period_0.9({sensor})', f'Wave_period_1.1({sensor})', f'Wave_period_1.5({sensor})',
            f'Wave_period_1.9({sensor})',f'Wave_period_2.5({sensor})', f'Wave_period_3.3({sensor})', f'Wave_period_4.4({sensor})']
top_10_sensor_features = {
    "sensor_1": ['Water_level_Avg', 'Redox_Avg(1)_sigma_b_24', 'Redox_Avg(1)_sigma_f_24', 'Redox_Avg(1)_sigma_f_12', 'Wave_period_0.5(1)',
                 'Wave_period_0.7(1)', 'Wave_period_0.9(1)', 'Wave_period_1.1(1)', 'Wave_period_1.5(1)', 'Wave_period_1.9(1)'],
    "sensor_2": ['Redox_Avg(2)', 'Redox_Avg(2)_sigma_b_24', 'Redox_Avg(2)_sigma_f_24', 'Redox_Avg(2)_sigma_b_12', 'Redox_Avg(2)_sigma_f_12',
                 'Wave_period_0.5(2)', 'Wave_period_0.7(2)', 'Wave_period_0.9(2)', 'Wave_period_1.1(2)', 'Wave_period_1.5(2)'],
    "sensor_3": ['Redox_Avg(3)_sigma_b_24', 'Redox_Avg(3)_sigma_f_24', 'Redox_Avg(3)_sigma_b_12', 'Redox_Avg(3)_sigma_f_12', 'Wave_period_0.5(3)',
                 'Wave_period_0.7(3)', 'Wave_period_0.9(3)', 'Wave_period_1.1(3)', 'Wave_period_1.5(3)', 'Wave_period_1.9(3)'],
    "sensor_4": ['Redox_Avg(4)_sigma_b_24', 'Redox_Avg(4)_sigma_f_24', 'Redox_Avg(4)_sigma_b_12', 'Redox_Avg(4)_sigma_f_12', 'Wave_period_0.7(4)',
                 'Wave_period_0.9(4)', 'Wave_period_1.1(4)', 'Wave_period_1.5(4)', 'Wave_period_1.9(4)', 'Wave_period_2.5(4)'],
    "sensor_5": ['Redox_Avg(5)_sigma_b_24', 'Redox_Avg(5)_sigma_f_24', 'Redox_Avg(5)_sigma_b_12', 'Redox_Avg(5)_sigma_f_12', 'Wave_period_0.7(5)',
                 'Wave_period_0.9(5)', 'Wave_period_1.1(5)', 'Wave_period_1.5(5)', 'Wave_period_1.9(5)', 'Wave_period_2.5(5)']
}

# Random state
rs = 0

# Best model parameters
best_params = dict()
best_params['2022_full'] = {"degree": 6, "C": 7}
best_params['2022_fs'] = {"degree": 7, "C": 6}
best_params['2022_wavelet'] = {"degree": 3, "C": 7}
best_params['sensor_1'] = {"degree": 7, "C": 5}
best_params['sensor_2'] = {"degree": 7, "C": 7}
best_params['sensor_3'] = {"degree": 7, "C": 7}
best_params['sensor_4'] = {"degree": 7, "C": 7}
best_params['sensor_5'] = {"degree": 7, "C": 5}
best_params['sensor_1_fs'] = {"degree": 7, "C": 5}
best_params['sensor_2_fs'] = {"degree": 7, "C": 7}
best_params['sensor_3_fs'] = {"degree": 7, "C": 7}
best_params['sensor_4_fs'] = {"degree": 7, "C": 7}
best_params['sensor_5_fs'] = {"degree": 7, "C": 5}
best_params['sensor_1_wavelet'] = {"degree": 3, "C": 7}
best_params['sensor_2_wavelet'] = {"degree": 3, "C": 7}
best_params['sensor_3_wavelet'] = {"degree": 3, "C": 7}
best_params['sensor_4_wavelet'] = {"degree": 3, "C": 7}
best_params['sensor_5_wavelet'] = {"degree": 3, "C": 7}

# Sensor models
sensor_models = dict()

### 2022 full

In [None]:
svc_2022_full = SVC(kernel="poly", random_state=rs, degree=best_params['2022_full']['degree'], C=best_params['2022_full']['C'])
svc_2022_full.fit(X_train_scaled_2022.loc[:, ~X_train_scaled_2022.columns.isin(removable_full_columns)], np.ravel(y_train_2022))

### 2022 feature selection

In [None]:
svc_2022_fs = SVC(kernel="poly", random_state=rs, degree=best_params['2022_fs']['degree'], C=best_params['2022_fs']['C'])
svc_2022_fs.fit(X_train_scaled_2022.loc[:, top_10_features], np.ravel(y_train_2022))

### 2022 wavelet

In [None]:
svc_2022_wavelet = SVC(kernel="poly", random_state=rs, degree=best_params['2022_wavelet']['degree'], C=best_params['2022_wavelet']['C'])
svc_2022_wavelet.fit(X_train_scaled_2022.loc[:, full_wavelet_columns], np.ravel(y_train_2022))

### 2022 sensor

In [None]:
for sensor in sensors:
    scaled_training_data = sensor_data[f'sensor_{sensor}']['X_train_scaled']
    training_target_data = sensor_data[f'sensor_{sensor}']['y_train']
    removable_sensors_columns = get_removable_sensor_columns(sensor)

    svc_sensor = SVC(kernel="poly", random_state=rs, degree=best_params[f'sensor_{sensor}']['degree'], C=best_params[f'sensor_{sensor}']['C'])
    svc_sensor.fit(scaled_training_data.loc[:, ~scaled_training_data.columns.isin(removable_sensors_columns)], np.ravel(training_target_data))

    sensor_models[f'svc_sensor_{sensor}'] = svc_sensor

### 2022 sensor feature selection

In [None]:
for sensor in sensors:
    scaled_training_data = sensor_data[f'sensor_{sensor}']['X_train_scaled']
    training_target_data = sensor_data[f'sensor_{sensor}']['y_train']
    fs_columns = top_10_sensor_features[f'sensor_{sensor}']

    svc_fs = SVC(kernel="poly", random_state=rs, degree=best_params[f'sensor_{sensor}_fs']['degree'], C=best_params[f'sensor_{sensor}_fs']['C'])
    svc_fs.fit(scaled_training_data.loc[:, fs_columns], np.ravel(training_target_data))

    sensor_models[f'svc_fs_sensor_{sensor}'] = svc_sensor

### 2022 sensor wavelet

In [None]:
for sensor in sensors:
    scaled_training_data = sensor_data[f'sensor_{sensor}']['X_train_scaled']
    training_target_data = sensor_data[f'sensor_{sensor}']['y_train']
    sensor_wavelet_columns = get_wavelet_columns(sensor)

    svc_sensor_wavelet = SVC(kernel="poly", random_state=rs, degree=best_params[f'sensor_{sensor}_wavelet']['degree'], C=best_params[f'sensor_{sensor}_wavelet']['C'])
    svc_sensor_wavelet.fit(scaled_training_data.loc[:, sensor_wavelet_columns], np.ravel(training_target_data))

    sensor_models[f'svc_wavelet_sensor_{sensor}'] = svc_sensor_wavelet

# Get results

### Result helper functions

In [None]:
def get_results_df(test_X, test_y, pred, sensor=0):
    result = test_X.copy()
    result['pred'] = pred
    if sensor > 0:
        result[f'Redox_error_flag({sensor})'] = test_y[f'Redox_error_flag({sensor})']
        result['true_pos'] = np.where((result[f'Redox_error_flag({sensor})']==True) & (result['pred']==True), True, False)
        result['false_pos'] = np.where((result[f'Redox_error_flag({sensor})']==False) & (result['pred']==True), True, False)
        result['false_neg'] = np.where((result[f'Redox_error_flag({sensor})']==True) & (result['pred']==False), True, False)
    else:
        result['Redox_error_flag'] = test_y['Redox_error_flag']
        result['true_pos'] = np.where((result['Redox_error_flag']==True) & (result['pred']==True), True, False)
        result['false_pos'] = np.where((result['Redox_error_flag']==False) & (result['pred']==True), True, False)
        result['false_neg'] = np.where((result['Redox_error_flag']==True) & (result['pred']==False), True, False)
    result = result.sort_index()
    return result

def get_results_df_2023(test_data, pred):
    result = test_data.copy()
    result['pred'] = pred
    result = result.sort_index()
    return result

In [None]:
# 2022 model results with all columns
predictions_2022_full = svc_2022_full.predict(X_test_scaled_2022.loc[:, ~X_test_scaled_2022.columns.isin(removable_full_columns)])
predictions_2023_full = svc_2022_full.predict(test_scaled_2023.loc[:, ~test_scaled_2023.columns.isin(removable_full_columns)])
cm_2022_full = confusion_matrix(y_test_2022['Redox_error_flag'], predictions_2022_full)

# 2022 model results with feature selection
predictions_2022_fs = svc_2022_fs.predict(X_test_scaled_2022.loc[:, top_10_features])
predictions_2023_fs = svc_2022_fs.predict(test_scaled_2023.loc[:, top_10_features])
cm_2022_fs = confusion_matrix(y_test_2022['Redox_error_flag'], predictions_2022_fs)

# 2022 model results with wavelet columns
predictions_2022_wavelet = svc_2022_wavelet.predict(X_test_scaled_2022.loc[:, full_wavelet_columns])
predictions_2023_wavelet = svc_2022_wavelet.predict(test_scaled_2023.loc[:, full_wavelet_columns])
cm_2022_wavelet = confusion_matrix(y_test_2022['Redox_error_flag'], predictions_2022_wavelet)

sensor_predictions = dict()
sensor_cm = dict()

# 2022 sensor model results
for sensor in sensors:
    scaled_testing_data = sensor_data[f'sensor_{sensor}']['X_test_scaled']
    scaled_testing_data_2023 = sensor_data[f'sensor_{sensor}']['test_scaled_2023']
    true_y = sensor_data[f'sensor_{sensor}']['y_test']

    # All columns
    removable_sensors_columns = get_removable_sensor_columns(sensor)
    sensor_predictions[f'sensor_{sensor}_2022'] = sensor_models[f'svc_sensor_{sensor}'].predict(scaled_testing_data.loc[:, ~scaled_testing_data.columns.isin(removable_sensors_columns)])
    sensor_predictions[f'sensor_{sensor}_2023'] = sensor_models[f'svc_sensor_{sensor}'].predict(scaled_testing_data_2023.loc[:, ~scaled_testing_data_2023.columns.isin(removable_sensors_columns)])
    sensor_cm[f'sensor_{sensor}_2022'] = confusion_matrix(true_y['Redox_error_flag'], sensor_predictions[f'sensor_{sensor}_2022'])

    # Feature selection
    fs_columns = top_10_sensor_features[f'sensor_{sensor}']
    sensor_predictions[f'sensor_{sensor}_2022_fs'] = sensor_models[f'svc_fs_sensor_{sensor}'].predict(scaled_testing_data.loc[:, fs_columns])
    sensor_predictions[f'sensor_{sensor}_2023_fs'] = sensor_models[f'svc_fs_sensor_{sensor}'].predict(scaled_testing_data_2023.loc[:, fs_columns])
    sensor_cm[f'sensor_{sensor}_2022_fs'] = confusion_matrix(true_y['Redox_error_flag'], sensor_predictions[f'sensor_{sensor}_2022_fs'])

    # Wavelet columns
    sensor_wavelet_columns = get_wavelet_columns(sensor)
    sensor_predictions[f'sensor_{sensor}_2022_wavelet'] = sensor_models[f'svc_wavelet_sensor_{sensor}'].predict(scaled_testing_data.loc[:, sensor_wavelet_columns])
    sensor_predictions[f'sensor_{sensor}_2023_wavelet'] = sensor_models[f'svc_wavelet_sensor_{sensor}'].predict(scaled_testing_data_2023.loc[:, sensor_wavelet_columns])
    sensor_cm[f'sensor_{sensor}_2022_wavelet'] = confusion_matrix(true_y['Redox_error_flag'], sensor_predictions[f'sensor_{sensor}_2022_wavelet'])

# Print results

### Print functions

In [None]:
# Common print variables
ncols = 1
nrows = len(sensors)
opacity = 0.5
# plt.subplots_adjust(hspace=1.0)

def log_false_neg_and_pos_count(results):
    print('False positive (cyan) count: ', len(results.loc[results['false_pos']==True]))
    print('False negative (orange) count: ', len(results.loc[results['false_neg']==True]))

def plot_confusion_matrix(cm):
    sns.heatmap(cm, annot=True, fmt='g', 
            xticklabels=['Error','Not Error'],
            yticklabels=['Error','Not Error'])
    plt.ylabel('Prediction',fontsize=13)
    plt.xlabel('Actual',fontsize=13)
    plt.title('Confusion Matrix',fontsize=17)
    plt.show()

# Full data plot functions
def plot_results(results, title):
    plt.figure(figsize=(35,nrows*13))
    plt.suptitle(title, fontsize=36, y=0.95)

    log_false_neg_and_pos_count(results)

    for i, sensor in enumerate(sensors):
        i += 1
        ax = plt.subplot(nrows, ncols, i)
        ax.set_title(f'Predictions for sensor {sensor}', fontsize = 24)
        ax.plot(results.index.array, results[f'Redox_Avg({sensor})'], c='g', zorder=0)
        #plt.plot(plt_results_sens_1.index.array, plt_results_sens_1[f'Matric_potential_Avg({1})'], c='magenta', zorder=0)
        ax.scatter(results.loc[results[f'Redox_error_flag']==True].index.array, results.loc[results[f'Redox_error_flag']==True][f'Redox_Avg({sensor})'], c='r', s=35, zorder=5, alpha=opacity)
        ax.scatter(results.loc[results['true_pos']==True].index.array, results.loc[results['true_pos']==True][f'Redox_Avg({sensor})'], c='blue', s=15, zorder=10, alpha=opacity)
        ax.scatter(results.loc[results['false_pos']==True].index.array, results.loc[results['false_pos']==True][f'Redox_Avg({sensor})'], c='cyan', s=25, zorder=10, alpha=opacity)
        ax.scatter(results.loc[results['false_neg']==True].index.array, results.loc[results['false_neg']==True][f'Redox_Avg({sensor})'], c='orange', s=25, zorder=10, alpha=opacity)

def plot_results_2023(results, title):
    plt.figure(figsize=(35,nrows*13))
    plt.suptitle(title, fontsize=36, y=0.95)

    for i, sensor in enumerate(sensors):
        i += 1
        ax = plt.subplot(nrows, ncols, i)
        ax.set_title(f'Predictions for sensor {sensor}', fontsize = 24)
        ax.plot(results.index.array, results[f'Redox_Avg({sensor})'], c='g', zorder=0)
        ax.scatter(results.loc[results[f'pred']==True].index.array, results.loc[results[f'pred']==True][f'Redox_Avg({sensor})'], c='r', s=35, zorder=5, alpha=opacity)

# Sensor data plot functions
def plot_sensor_results(results, sensor, title):
    plt.figure(figsize=(35,13))
    plt.suptitle(title, fontsize=36, y=0.95)

    log_false_neg_and_pos_count(results)

    plt.plot(results.index.array, results[f'Redox_Avg({sensor})'], c='g', zorder=0)
    #plt.plot(plt_results_sens_1.index.array, plt_results_sens_1[f'Matric_potential_Avg({1})'], c='magenta', zorder=0)
    plt.scatter(results.loc[results[f'Redox_error_flag({sensor})']==True].index.array, results.loc[results[f'Redox_error_flag({sensor})']==True][f'Redox_Avg({sensor})'], c='r', s=35, zorder=5)
    plt.scatter(results.loc[results['true_pos']==True].index.array, results.loc[results['true_pos']==True][f'Redox_Avg({sensor})'], c='blue', s=15, zorder=10, alpha=opacity)
    plt.scatter(results.loc[results['false_pos']==True].index.array, results.loc[results['false_pos']==True][f'Redox_Avg({sensor})'], c='cyan', s=25, zorder=10, alpha=opacity)
    plt.scatter(results.loc[results['false_neg']==True].index.array, results.loc[results['false_neg']==True][f'Redox_Avg({sensor})'], c='orange', s=25, zorder=10, alpha=opacity)

def plot_2023_sensor_results(results, sensor, title):
    plt.figure(figsize=(35,13))
    plt.suptitle(title, fontsize=36, y=0.95)

    plt.plot(results.index.array, results[f'Redox_Avg({sensor})'], c='g', zorder=0)
    plt.scatter(results.loc[results[f'pred']==True].index.array, results.loc[results[f'pred']==True][f'Redox_Avg({sensor})'], c='r', s=35, zorder=5, alpha=opacity)

## 2022 full columns

In [None]:
full_2022_results = get_results_df(X_test_2022, y_test_2022, predictions_2022_full)
full_2023_results = get_results_df_2023(test_2023, predictions_2023_full)

plot_confusion_matrix(cm_2022_full)
plot_results(full_2022_results, "2022 all pits, sensors and columns")
plot_results_2023(full_2023_results, "2023 all pits, sensors and columns")

## 2022 feature selection columns

In [None]:
fs_2022_results = get_results_df(X_test_2022, y_test_2022, predictions_2022_fs)
fs_2023_results = get_results_df_2023(test_2023, predictions_2023_fs)

plot_confusion_matrix(cm_2022_fs)
plot_results(fs_2022_results, "2022 all pits, sensors and top 10 columns")
plot_results_2023(fs_2023_results, "2023 all pits, sensors and top 10 columns")

## 2022 wavelet columns

In [None]:
wavelet_2022_results = get_results_df(X_test_2022, y_test_2022, predictions_2022_wavelet)
wavelet_2023_results = get_results_df_2023(test_2023, predictions_2023_wavelet)

plot_confusion_matrix(cm_2022_wavelet)
plot_results(wavelet_2022_results, "2022 all pits, sensors with wavelet columns")
plot_results_2023(wavelet_2023_results, "2023 all pits, sensors with wavelet columns")

## 2022 sensor full columns

In [None]:
for sensor in sensors:
    testing_data_X = sensor_data[f'sensor_{sensor}']['X_test']
    testing_data_y = sensor_data[f'sensor_{sensor}']['y_test']
    testing_data_2023 = sensor_data[f'sensor_{sensor}']['test_2023']

    sensor_results_2022 = get_results_df(testing_data_X, testing_data_y, sensor_predictions[f'sensor_{sensor}_2022'], sensor)
    sensor_results_2023 = get_results_df_2023(testing_data_2023, sensor_predictions[f'sensor_{sensor}_2023'], sensor)

    plot_confusion_matrix(sensor_cm[f'sensor_{sensor}_2022'])
    plot_sensor_results(sensor_results_2022, sensor, f'2022 sensor {sensor} all pits and columns')
    plot_2023_sensor_results(sensor_results_2023, sensor, f'2023 sensor {sensor} all pits and columns')

## 2022 sensor feature selection columns

In [None]:
for sensor in sensors:
    testing_data_X = sensor_data[f'sensor_{sensor}']['X_test']
    testing_data_y = sensor_data[f'sensor_{sensor}']['y_test']
    testing_data_2023 = sensor_data[f'sensor_{sensor}']['test_2023']

    sensor_results_2022_fs = get_results_df(testing_data_X, testing_data_y, sensor_predictions[f'sensor_{sensor}_2022_fs'], sensor)
    sensor_results_2023_fs = get_results_df_2023(testing_data_2023, sensor_predictions[f'sensor_{sensor}_2023_fs'], sensor)

    plot_confusion_matrix(sensor_cm[f'sensor_{sensor}_2022_fs'])
    plot_sensor_results(sensor_results_2022_fs, sensor, f'2022 sensor {sensor} all pits with top 10 columns')
    plot_2023_sensor_results(sensor_results_2023_fs, sensor, f'2023 sensor {sensor} all pits with top 10 columns')

## 2022 sensor wavelet columns

In [None]:
for sensor in sensors:
    testing_data_X = sensor_data[f'sensor_{sensor}']['X_test']
    testing_data_y = sensor_data[f'sensor_{sensor}']['y_test']
    testing_data_2023 = sensor_data[f'sensor_{sensor}']['test_2023']

    sensor_results_2022_wavelet = get_results_df(testing_data_X, testing_data_y, sensor_predictions[f'sensor_{sensor}_2022_wavelet'], sensor)
    sensor_results_2023_wavelet = get_results_df_2023(testing_data_2023, sensor_predictions[f'sensor_{sensor}_2023_wavelet'], sensor)

    plot_confusion_matrix(sensor_cm[f'sensor_{sensor}_2022_wavelet'])
    plot_sensor_results(sensor_results_2022_wavelet, sensor, f'2022 sensor {sensor} all pits with wavelet columns')
    plot_2023_sensor_results(sensor_results_2023_wavelet, sensor, f'2023 sensor {sensor} all pits with wavelet columns')