In [None]:
import pandas as pd
energy_data = pd.read_csv("Extra//energy.csv")
env_data = pd.read_csv("Extra//environment.csv")

energy_data.timestamp = pd.to_datetime(energy_data.timestamp, format='%Y-%m-%d %H:%M:%S')
energy_data.index = energy_data.timestamp
resampled_df = energy_data.resample("5s").mean()
resampled_df.head()

In [None]:
env_data.timestamp = pd.to_datetime(env_data.timestamp, format='%Y-%m-%d %H:%M:%S')
env_data.index = env_data.timestamp
resampled_env = env_data.resample("5s").mean()
resampled_env.head()

In [None]:
resampled_df['active_power'] = resampled_df[["Active Power A average [kW]","Active Power B average [kW]","Active Power C average [kW]"]].mean(axis=1)
resampled_df['thdi'] = resampled_df[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_df['thdu'] = resampled_df[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_df['current'] = resampled_df[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_df['power'] = resampled_df[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
resampled_df = resampled_df.join(resampled_env)

useful_data = resampled_df[["active_power","thdi","thdu", "current","power","Xacc","pitch", "yaw"]]
#useful_data = resampled_df[["Xacc","pitch", "yaw"]]
useful_data = useful_data.dropna()
useful_data.head()

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

scaled_data = scaler.fit_transform(useful_data[useful_data.columns])

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
min_maxed = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)

In [None]:
"""
from sklearn.mixture import BayesianGaussianMixture
BGM = BayesianGaussianMixture(n_components = 5).fit(scaled_data)
clusters = BGM.predict(scaled_data)
min_maxed = useful_data.assign(states = clusters)
min_maxed.insert(loc=0,column='Date', value=pd.to_datetime(min_maxed.index))
"""

In [None]:
from hmmlearn import hmm
import numpy as np
np.random.seed(33)

model = hmm.GaussianHMM(n_components = 5)
model.fit(scaled_data)
hidden_states = model.predict(scaled_data)
min_maxed = useful_data.assign(states = hidden_states)
min_maxed.insert(loc=0,column='Date', value=pd.to_datetime(min_maxed.index))

In [None]:
new_df = min_maxed.loc['2022-11-08 14:00:00':'2022-11-08 16:00:00']
new_df['colors'] = new_df['states'].map({0:'Off', 1: 'Mode-2', 2: 'Extra', 3: 'Mode-1', 4:'On', 5:'Moving',
                                         6: 'Extra1', 7: 'Extra2'})
color_map = {"Off": "blue", "Extra": "yellow", "On": "green", "Mode-1" : "magenta", "Mode-2": "orange", "Moving": "cyan",
             "Extra1": "black", "Extra2": "white"}

import plotly.express as px
import plotly.graph_objects as go

fig1 = px.line(new_df, x = 'Date', y = 'current')
fig1.update_traces(line=dict(color = 'black'))
fig2 = px.scatter(new_df, x = 'Date', y = 'Xacc', color='colors')
fig2.update_traces(marker=dict(size=4))
fig3 = go.Figure(data=fig1.data).update_layout(xaxis_title="Time", yaxis_title="Current average [A]")

#start background
start_mode = str(new_df.iloc[0]["colors"])
start_date = str(new_df.iloc[0]["Date"])

for index, row in new_df.iterrows():
    current_mode = row["colors"]
    if current_mode != start_mode:
        fig3.add_vrect(x0=start_date, x1=str(row["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
        start_mode = row["colors"]
        start_date = str(row["Date"])

fig3.add_vrect(x0=start_date, x1=str(new_df.iloc[-1]["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
fig3.update_layout(xaxis=dict(showgrid=False), yaxis=dict(showgrid=False))
#end background

fig3.show()