In [None]:
import pandas as pd
energy_data = pd.read_csv("Extra//energy.csv")

energy_data.timestamp = pd.to_datetime(energy_data.timestamp, format='%Y-%m-%d %H:%M:%S')
energy_data.index = energy_data.timestamp
resampled_df = energy_data.resample("5s").mean()
resampled_df.head()

In [None]:
resampled_df['reactive_power'] = resampled_df[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_df['active_power'] = resampled_df[["Active Power A average [kW]","Active Power B average [kW]","Active Power C average [kW]"]].mean(axis=1)
resampled_df['thdi'] = resampled_df[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_df['thdu'] = resampled_df[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_df['current'] = resampled_df[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_df['power'] = resampled_df[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_df[["reactive_power","active_power","thdi","thdu", "current","power"]]
useful_data = useful_data.dropna()
useful_data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

scaled_data = scaler.fit_transform(useful_data[useful_data.columns])
scaled_data

In [None]:
from hmmlearn import hmm
import numpy as np
np.random.seed(33)

model = hmm.GaussianHMM(n_components = 6)
model.fit(scaled_data)
hidden_states = model.predict(scaled_data)
relevant_data = useful_data.assign(states = hidden_states)
relevant_data.insert(loc=0,column='Date', value=pd.to_datetime(relevant_data.index))
relevant_data

In [None]:
new_df = relevant_data.loc['2022-11-08 14:00:00':'2022-11-08 16:00:00']
new_df['colors'] = new_df['states'].map({0:'Extra', 1: 'Moving', 2: 'On', 3: 'Off', 4:'Mode-2', 5:'Mode-1'})
color_map = {"Off": "black", "Extra": "cyan", "On": "white", "Mode-1" : "red", "Mode-2": "yellow", "Moving": "green"}

import plotly.express as px
import plotly.graph_objects as go

fig1 = px.line(new_df, x = 'Date', y = 'current')
fig1.update_traces(line=dict(color = 'black'))
fig2 = px.scatter(new_df, x = 'Date', y = 'current', color='colors')
fig2.update_traces(marker=dict(size=4))
fig3 = go.Figure(data=fig1.data).update_layout(xaxis_title="Time", yaxis_title="Current average [A]")

#start background
start_mode = str(new_df.iloc[0]["colors"])
start_date = str(new_df.iloc[0]["Date"])

for index, row in new_df.iterrows():
    current_mode = row["colors"]
    if current_mode != start_mode:
        fig3.add_vrect(x0=start_date, x1=str(row["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
        start_mode = row["colors"]
        start_date = str(row["Date"])

fig3.add_vrect(x0=start_date, x1=str(new_df.iloc[-1]["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
fig3.update_layout(xaxis=dict(showgrid=False), yaxis=dict(showgrid=False))
#end background

fig3.show()

In [None]:
#get data
energy_data = pd.read_csv("Extra//energy.csv")
energy_data.timestamp = pd.to_datetime(energy_data.timestamp, format='%Y-%m-%d %H:%M:%S')
energy_data.index = energy_data.timestamp
resampled_energy = energy_data.resample("5s").mean()

env_data = pd.read_csv("Extra//environment.csv")
env_data.timestamp = pd.to_datetime(env_data.timestamp, format='%Y-%m-%d %H:%M:%S')
env_data.index = env_data.timestamp
resampled_env = env_data.resample("5s").mean()

#preprocess data
resampled_energy['reactive_power'] = resampled_energy[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_energy['thdi'] = resampled_energy[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_energy['thdu'] = resampled_energy[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_energy['current'] = resampled_energy[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_energy['power_factor'] = resampled_energy[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_energy.join(resampled_env)
useful_data = useful_data[["thdi","Xacc","pitch"]]
#useful_data = useful_data.drop(["THDI A average [%]","THDI B average [%]","THDI C average [%]","thdi","Xacc","pitch"], axis=1)
useful_data = useful_data.dropna()
useful_data = useful_data.iloc[:-(10 + (useful_data.shape[0] % 10))]

#scale data
from sklearn.preprocessing import StandardScaler
standard_scaler = StandardScaler()

scaled_data = standard_scaler.fit_transform(useful_data)
reshaped_scaled_data = scaled_data.reshape(scaled_data.shape[0]//10, 10, scaled_data.shape[1])
reshaped_scaled_data.shape

#autoencoder LSTM
from keras.layers import Input, LSTM, RepeatVector
from keras.models import Model

input_shape = (reshaped_scaled_data.shape[1], reshaped_scaled_data.shape[2])
encoding_dim = 100
inputs = Input(shape=input_shape)
encoded = LSTM(encoding_dim, activation='relu')(inputs)

decoded = RepeatVector(input_shape[0])(encoded)
decoded = LSTM(input_shape[1], activation='relu', return_sequences=True)(decoded)

autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

autoencoder.fit(reshaped_scaled_data, reshaped_scaled_data, epochs=16, batch_size=32)
predictions = autoencoder.predict(reshaped_scaled_data)
predictions = predictions.reshape(predictions.shape[0]*predictions.shape[1], predictions.shape[2])

In [None]:
#get anomalies
mse = np.square(np.subtract(scaled_data, predictions))
anomalies = np.where(mse > 500)
anomalies

In [None]:
import plotly.express as px
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
plot_df = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)

for i in list(zip(anomalies[0],anomalies[1]))[:30]:
    anomaly = i[0]
    bg_df = relevant_data.loc[str(useful_data.iloc[anomaly-100].name):str(useful_data.iloc[anomaly+100].name)]
    new_df = plot_df.loc[str(useful_data.iloc[anomaly-100].name):str(useful_data.iloc[anomaly+100].name)]
    new_df.insert(loc=0,column='Date', value=pd.to_datetime(new_df.index))

    line_fig = px.line(new_df, x = 'Date', y = useful_data.columns[i[1]])
    line_fig.update_traces(line=dict(color = 'black'))
    fig = go.Figure(data=line_fig.data).update_layout(xaxis_title="Time", yaxis_title=useful_data.columns[i[1]])

    bg_df['colors'] = bg_df['states'].map({0:'Extra', 1: 'Moving', 2: 'On', 3: 'Off', 4:'Mode-2', 5:'Mode-1'})
    color_map = {"Off": "black", "Extra": "cyan", "On": "white", "Mode-1" : "red", "Mode-2": "yellow", "Moving": "green"}
    
    #start background
    start_mode = str(bg_df.iloc[0]["colors"])
    start_date = str(bg_df.iloc[0]["Date"])

    for index, row in bg_df.iterrows():
        current_mode = row["colors"]
        if current_mode != start_mode:
            fig.add_vrect(x0=start_date, x1=str(row["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
            start_mode = row["colors"]
            start_date = str(row["Date"])

    fig.add_vrect(x0=start_date, x1=str(bg_df.iloc[-1]["Date"]), fillcolor=color_map[start_mode], opacity=0.5)

    fig.add_vrect(x0=str(useful_data.iloc[anomaly-5].name),x1=str(useful_data.iloc[anomaly-3].name),fillcolor="black", opacity=1)
    fig.add_vrect(x0=str(useful_data.iloc[anomaly+4].name),x1=str(useful_data.iloc[anomaly+6].name),fillcolor="black", opacity=1)
    #fig.add_vrect(x0=str(useful_data.iloc[anomaly].name),x1=str(useful_data.iloc[anomaly+1].name),fillcolor="black", opacity=0.5)
    #end background

    fig.update_layout(xaxis=dict(showgrid=False), yaxis=dict(showgrid=False))
    fig.show()