In [None]:
import pandas as pd

energy_data = pd.read_csv("Extra//energy.csv")
energy_data['timestamp'] = pd.to_datetime(energy_data['timestamp'])
energy_data.set_index('timestamp', inplace=True)
resampled_energy = energy_data.resample("5s").mean()
resampled_energy = resampled_energy.fillna(method='ffill')
resampled_energy = resampled_energy.fillna(method='bfill')

env_data = pd.read_csv("Extra//environment.csv")
env_data['timestamp'] = pd.to_datetime(env_data['timestamp'])
env_data.set_index('timestamp', inplace=True)
resampled_env = env_data.resample("5s").mean()
resampled_env = resampled_env.fillna(method='ffill')
resampled_env = resampled_env.fillna(method='bfill')

resampled_energy['reactive_power'] = resampled_energy[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_energy['thdi'] = resampled_energy[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_energy['thdu'] = resampled_energy[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_energy['current'] = resampled_energy[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_energy['power_factor'] = resampled_energy[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_energy.join(resampled_env)
useful_data = useful_data[["reactive_power", "thdi", "thdu", "current","power_factor", "Xacc","yaw","pitch"]]
useful_data = useful_data.dropna()
useful_data = useful_data.iloc[:-(10 + (useful_data.shape[0] % 10))]
useful_data.head()

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(useful_data[useful_data.columns])

In [None]:
#find best model
from hmmlearn import hmm
import numpy as np
np.random.seed(33)
import matplotlib.pyplot as plt

def mdl_score(model, data):
    n_features = data.shape[1]
    n_states = model.n_components
    
    n_transition_params = n_states * (n_states - 1)
    n_emission_params = n_states * n_features
    n_initial_state_params = n_states - 1
    n_params = n_transition_params + n_emission_params + n_initial_state_params
    
    adjusted_bic = model.score(data) - np.square(n_params) * np.log(data.shape[0])
    return adjusted_bic

n_runs = 10
n_states_range = range(4, 9)
mdl_scores = []

for n_states in n_states_range:
    mdl_scores_n_states = []

    for run in range(n_runs):
        model = hmm.GaussianHMM(n_components=n_states, covariance_type='diag')
        model.fit(scaled_data)
        mdl = mdl_score(model, scaled_data)
        mdl_scores_n_states.append(mdl)

    avg_mdl = np.mean(mdl_scores_n_states)
    print(n_states)
    print(avg_mdl)
    mdl_scores.append(avg_mdl)

plt.figure(figsize=(8, 6))
plt.plot(n_states_range, mdl_scores, marker='o', linestyle='-', linewidth=2)
plt.xlabel("Number of hidden states")
plt.ylabel("Average MDL score")
plt.grid()
plt.show()

In [None]:
from hmmlearn import hmm
import numpy as np
np.random.seed(33)

model = hmm.GaussianHMM(n_components = n_states_range[np.argmax(mdl_scores)], covariance_type='diag')
model.fit(scaled_data)
hidden_states = model.predict(scaled_data)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
min_maxed = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)

min_maxed = min_maxed.assign(states = hidden_states)
min_maxed.insert(loc=0, column='Date', value=pd.to_datetime(min_maxed.index))

In [None]:
new_df = min_maxed.loc['2022-11-08 14:30:00':'2022-11-08 16:00:00']
new_df['colors'] = new_df['states'].map({0:'On', 1: 'Mode-2', 2: 'Off', 3: 'Moving', 4:'Mode-1'})
color_map = {"Off": "black", "On": "white", "Mode-1" : "cyan", "Mode-2": "yellow", "Moving": "green"}

import plotly.express as px
import plotly.graph_objects as go

fig1 = px.line(new_df, x = 'Date', y = 'current')
fig1.update_traces(line=dict(color = 'black'))
fig2 = px.scatter(new_df, x = 'Date', y = 'current', color='colors')
fig2.update_traces(marker=dict(size=4))
fig3 = go.Figure(data=fig1.data).update_layout(xaxis_title="Time", yaxis_title="Current average [A]")

#start background
start_mode = str(new_df.iloc[0]["colors"])
start_date = str(new_df.iloc[0]["Date"])

for index, row in new_df.iterrows():
    current_mode = row["colors"]
    if current_mode != start_mode:
        fig3.add_vrect(x0=start_date, x1=str(row["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
        start_mode = row["colors"]
        start_date = str(row["Date"])

fig3.add_vrect(x0=start_date, x1=str(new_df.iloc[-1]["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
fig3.update_layout(xaxis=dict(showgrid=False), yaxis=dict(showgrid=False))
#end background

fig3.show()

In [None]:
log_probability = model._compute_log_likelihood(scaled_data)
likelihoods = np.sum(np.exp(log_probability), axis=1)
threshold = np.percentile(likelihoods, 0.01)
anomalies = np.where(likelihoods < threshold)[0]
#to-do: compute the z-score in order to find which feature most likely causes each anomaly
display(anomalies)

In [None]:
import plotly.express as px
import plotly.graph_objects as go

for anomaly in anomalies:
    df = min_maxed.loc[str(useful_data.iloc[anomaly-100].name):str(useful_data.iloc[anomaly+100].name)]

    line_fig = px.line(df, x = 'Date', y = useful_data.columns)
    line_fig.update_traces(line=dict(color = 'black'), visible='legendonly')
    fig = go.Figure(data=line_fig.data).update_layout(xaxis_title="Time", yaxis_title='MinMaxed Data')

    df['colors'] = df['states'].map({0:'On', 1: 'Mode-2', 2: 'Off', 3: 'Moving', 4:'Mode-1'})
    color_map = {"Off": "black", "On": "white", "Mode-1" : "cyan", "Mode-2": "yellow", "Moving": "green"}
    
    #start background
    start_mode = str(df.iloc[0]["colors"])
    start_date = str(df.iloc[0]["Date"])

    for index, row in df.iterrows():
        current_mode = row["colors"]
        if current_mode != start_mode:
            fig.add_vrect(x0=start_date, x1=str(row["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
            start_mode = row["colors"]
            start_date = str(row["Date"])

    fig.add_vrect(x0=start_date, x1=str(df.iloc[-1]["Date"]), fillcolor=color_map[start_mode], opacity=0.5)

    fig.add_vrect(x0=str(useful_data.iloc[anomaly-1].name),x1=str(useful_data.iloc[anomaly+1].name),fillcolor="red", opacity=0.5)
    #end background

    fig.update_layout(xaxis=dict(showgrid=False), yaxis=dict(showgrid=False))
    fig.show()