## Dynamic Dataset

In [52]:
import os
import pandas as pd
from hmmlearn.hmm import GaussianHMM, CategoricalHMM
from sklearn.metrics import mutual_info_score,normalized_mutual_info_score


In [3]:
def load_dynamic_dataset(data_path="data/driftdataset"):
    dfs = []
    for filename in os.listdir(data_path):
        with open(os.path.join(data_path, filename), "r") as file:
            df_rows = []
            for line in file.readlines():
                curr_row = {}
                line = line.split(";")
                curr_row["gas_type"] = line[0]
                line = line[1].split(" ")
                curr_row["concentration"] = line[0]

                for sensor_value in line[1:]:
                    sensor_value = sensor_value.split(":")
                    if len(sensor_value) == 2:
                        curr_row[f"sensor_{sensor_value[0]}"] = float(sensor_value[1])
                df_rows.append(curr_row)
            dfs.append(pd.DataFrame(df_rows))
    return pd.concat(dfs, axis=0)


In [47]:
df = load_dynamic_dataset("../data/driftdataset")
external_vars = ["gas_type", "concentration"]
X = df.drop(external_vars, axis=1).values
y = df[external_vars]
y["concentration"] = y["concentration"].apply(lambda val: int(float(val)))

In [53]:
from sklearn.decomposition import PCA
dim_model = PCA(n_components=10)
new_data = dim_model.fit_transform(X)

In [54]:
import numpy as np
from sklearn.cluster import KMeans
model = KMeans(10, max_iter=1000)
labels = model.fit_predict(new_data)

In [108]:
model = CategoricalHMM(n_components=6).fit(labels.reshape(-1, 1))
hidden_states = model.predict(labels.reshape(-1, 1))

In [109]:
print(normalized_mutual_info_score(
    y["gas_type"].values,
    hidden_states
))

0.08069078590904835


In [57]:
model = CategoricalHMM(n_components=y["concentration"].nunique()).fit(labels.reshape(-1, 1))
hidden_states = model.predict(labels.reshape(-1, 1))

In [58]:
print(normalized_mutual_info_score(
    y["concentration"].values,
    hidden_states
))

0.24128904814698104
