In [24]:
import pickle
import numpy as np
import pandas as pd
from dataset import Dataset

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
# from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import ExtraTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier, VotingClassifier

In [25]:
with open("lpf_sensor_data.pkl", "rb") as f:
    sensor_data = pickle.load(f)

with open("sensor_labels.pkl", "rb") as f:
    labels = pickle.load(f)

with open("interpolation_functions.pkl", "rb") as f:
    interp_funcs = pickle.load(f)

dataset = Dataset(sensor_data, labels, interp_funcs)

In [30]:
class_subset = [0, 1, 2, 3, 4, 5, 6, 7]
random_state = 42
df_dict = {
    "KNN": [],
    "GaussianNB": [],
    "BaggingTree": [],
    "MLP": [],
    "VotingClassifier": []
}

results = {
    "mat_0": {"s_0_1": [], "s_2_3": [], "s_4_5": [], "s_6_7": []},
    "mat_1": {"s_0_1": [], "s_2_3": [], "s_4_5": [], "s_6_7": []}
}


for matrix in range(2):
    for s in range(0, 8, 2):
        s_l = s
        s_r = s+1
        print(f"Mat {matrix}, Sensors {s_l} & {s_r}")

        X, y, _, _ = dataset.get_sensor_pair_cls(matrix,
                                                 (s_l, s_r),
                                                 num_samples=100,
                                                 as_log=True,
                                                 as_mean=False,
                                                 class_subset=class_subset,
                                                 include_types=["gas"])

        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        classifiers = []

        clf_name = "KNN"
        knn = KNeighborsClassifier(n_neighbors=3)
        accuracy = np.mean(cross_val_score(knn, X, y, cv=5))
        results[f"mat_{matrix}"][f"s_{s_l}_{s_r}"].append(
            {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
        )
        df_dict[clf_name].append(np.round(accuracy, 3))
        classifiers.append((clf_name, gnb))

        # gaussian naive bayes
        clf_name = "GaussianNB"
        gnb = GaussianNB()
        accuracy = np.mean(cross_val_score(gnb, X, y, cv=5))
        results[f"mat_{matrix}"][f"s_{s_l}_{s_r}"].append(
            {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
        )
        df_dict[clf_name].append(np.round(accuracy, 3))
        classifiers.append((clf_name, gnb))

        # bagging
        clf_name = "BaggingTree"
        extra_tree = ExtraTreeClassifier(random_state=random_state,
                                         criterion="entropy")
        bc = BaggingClassifier(extra_tree,
                               random_state=random_state,
                               n_estimators=15)
        accuracy = np.mean(cross_val_score(bc, X, y, cv=5))
        results[f"mat_{matrix}"][f"s_{s_l}_{s_r}"].append(
            {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
        )
        df_dict[clf_name].append(np.round(accuracy, 3))
        classifiers.append((clf_name, bc))

        # mlp
        clf_name = "MLP"
        mlp = MLPClassifier(
            hidden_layer_sizes=(256,),
            activation="logistic",
            learning_rate="adaptive",
            max_iter=2000,
            random_state=random_state)
        accuracy = np.mean(cross_val_score(mlp, X, y, cv=5))
        results[f"mat_{matrix}"][f"s_{s_l}_{s_r}"].append(
            {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
        )
        df_dict[clf_name].append(np.round(accuracy, 3))
        classifiers.append((clf_name, mlp))

        # voting clf
        clf_name = "VotingClassifier"
        vclf = VotingClassifier(estimators=classifiers, voting="soft")
        accuracy = np.mean(cross_val_score(vclf, X, y, cv=5))
        results[f"mat_{matrix}"][f"s_{s_l}_{s_r}"].append(
            {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
        )
        df_dict[clf_name].append(np.round(accuracy, 3))

Mat 0, Sensors 0 & 1
Mat 0, Sensors 2 & 3
Mat 0, Sensors 4 & 5
Mat 0, Sensors 6 & 7
Mat 1, Sensors 0 & 1
Mat 1, Sensors 2 & 3
Mat 1, Sensors 4 & 5
Mat 1, Sensors 6 & 7


In [31]:
df = pd.DataFrame(df_dict)
df.to_excel(f"model_accu_scores_{"_".join(
    [str(x) for x in class_subset])}.xlsx")

Testing model hyper params

In [None]:
class_subset = [1, 2, 3, 4]
random_state = 42
for matrix in range(2):
    for s in range(0, 8, 2):
        s_l = s
        s_r = s+1
        print(f"S {s_l} & {s_r}")

        X, y, _, _ = dataset.get_sensor_pair_cls(matrix,
                                                 (s_l, s_r),
                                                 num_samples=100,
                                                 as_log=True,
                                                 as_mean=False,
                                                 class_subset=class_subset,
                                                 include_types=["gas"])

        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        gnb = GaussianNB()
        accuracy = np.mean(cross_val_score(gnb, X, y, cv=5))
        print(f"gnb: {accuracy:.3f}")

        knn = KNeighborsClassifier(n_neighbors=3)
        accuracy = np.mean(cross_val_score(knn, X, y, cv=5))
        print(f"knn: {accuracy:.3f}")
    print("-" * 10)

S 0 & 1
gnb: 0.768
knn: 0.897
S 2 & 3
gnb: 1.000
knn: 1.000
S 4 & 5
gnb: 1.000
knn: 0.993
S 6 & 7
gnb: 0.995
knn: 1.000
----------
S 0 & 1
gnb: 0.757
knn: 0.900
S 2 & 3
gnb: 0.997
knn: 1.000
S 4 & 5
gnb: 1.000
knn: 1.000
S 6 & 7
gnb: 0.847
knn: 0.950
----------
