In [None]:
import pandas as pd

energy_data = pd.read_csv("Extra//energy.csv")
energy_data['timestamp'] = pd.to_datetime(energy_data['timestamp'])
energy_data.set_index('timestamp', inplace=True)
resampled_energy = energy_data.resample("5s").mean()
resampled_energy = resampled_energy.fillna(method='ffill')
resampled_energy = resampled_energy.fillna(method='bfill')

env_data = pd.read_csv("Extra//environment.csv")
env_data['timestamp'] = pd.to_datetime(env_data['timestamp'])
env_data.set_index('timestamp', inplace=True)
resampled_env = env_data.resample("5s").mean()
resampled_env = resampled_env.fillna(method='ffill')
resampled_env = resampled_env.fillna(method='bfill')

resampled_energy['reactive_power'] = resampled_energy[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_energy['thdi'] = resampled_energy[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_energy['thdu'] = resampled_energy[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_energy['current'] = resampled_energy[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_energy['power_factor'] = resampled_energy[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_energy.join(resampled_env)
useful_data = useful_data[["reactive_power", "thdi", "thdu", "current","power_factor", "Xacc","yaw","pitch"]]
useful_data = useful_data.dropna()
useful_data = useful_data.iloc[:-(10 + (useful_data.shape[0] % 10))]
useful_data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(useful_data[useful_data.columns])

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
min_maxed = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)

In [None]:
from hmmlearn import hmm
import numpy as np
np.random.seed(33)

model = hmm.GaussianHMM(n_components = 5, covariance_type='diag')
model.fit(scaled_data)
hidden_states = model.predict(scaled_data)
min_maxed = min_maxed.assign(states = hidden_states)
min_maxed.insert(loc=0, column='Date', value=pd.to_datetime(min_maxed.index))

In [None]:
new_df = min_maxed.loc['2022-11-08 14:00:00':'2022-11-08 16:00:00']
new_df['colors'] = new_df['states'].map({0:'On', 1: 'Mode-2', 2: 'Off', 3: 'Moving', 4:'Mode-1'})
color_map = {"Off": "black", "On": "white", "Mode-1" : "cyan", "Mode-2": "yellow", "Moving": "green"}

import plotly.express as px
import plotly.graph_objects as go

fig1 = px.line(new_df, x = 'Date', y = 'current')
fig1.update_traces(line=dict(color = 'black'))
fig3 = go.Figure(data=fig1.data).update_layout(xaxis_title="Time", yaxis_title="Current average [A]")

#start background
start_mode = str(new_df.iloc[0]["colors"])
start_date = str(new_df.iloc[0]["Date"])

for index, row in new_df.iterrows():
    current_mode = row["colors"]
    if current_mode != start_mode:
        fig3.add_vrect(x0=start_date, x1=str(row["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
        start_mode = row["colors"]
        start_date = str(row["Date"])

fig3.add_vrect(x0=start_date, x1=str(new_df.iloc[-1]["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
fig3.update_layout(xaxis=dict(showgrid=False), yaxis=dict(showgrid=False))
#end background

fig3.show()

In [None]:
#get data
relevant_data = resampled_energy.join(resampled_env)
relevant_data = relevant_data[["thdi", "Xacc", "pitch"]]
relevant_data = relevant_data.dropna()
relevant_data = relevant_data.iloc[:-(10 + (relevant_data.shape[0] % 10))]

#scale data
from sklearn.preprocessing import StandardScaler
standard_scaler = StandardScaler()

scaled_data = standard_scaler.fit_transform(relevant_data)
reshaped_scaled_data = scaled_data.reshape(scaled_data.shape[0]//10, 10, scaled_data.shape[1])

In [None]:
#autoencoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Bidirectional

timesteps = reshaped_scaled_data.shape[1]
n_features = reshaped_scaled_data.shape[2]

autoencoder = Sequential([
    Bidirectional(LSTM(64, activation='tanh', return_sequences=True), input_shape=(timesteps, n_features)),
    Bidirectional(LSTM(32, activation='tanh', return_sequences=False)),
    RepeatVector(timesteps),
    Bidirectional(LSTM(32, activation='tanh', return_sequences=True)),
    Bidirectional(LSTM(64, activation='tanh', return_sequences=True)),
    TimeDistributed(Dense(n_features))
])

autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
autoencoder.fit(reshaped_scaled_data, reshaped_scaled_data, epochs=20, batch_size=32, verbose=1)
predictions = autoencoder.predict(reshaped_scaled_data)
predictions = predictions.reshape(predictions.shape[0]*predictions.shape[1], predictions.shape[2])

In [None]:
#get anomalies
mse = np.square(np.subtract(scaled_data, predictions))
print(np.mean(mse))
print(np.percentile(mse, 99.999))
threshold = np.percentile(mse, 99.999)
anomalies = np.where(mse > threshold)
anomalies

In [None]:
import plotly.express as px
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
plot_df = pd.DataFrame(scaler.fit_transform(relevant_data.values), columns=relevant_data.columns, index=relevant_data.index)

for i in list(zip(anomalies[0],anomalies[1]))[:30]:
    anomaly = i[0]
    bg_df = min_maxed.loc[str(useful_data.iloc[anomaly-100].name):str(useful_data.iloc[anomaly+100].name)]
    new_df = plot_df.loc[str(useful_data.iloc[anomaly-100].name):str(useful_data.iloc[anomaly+100].name)]
    new_df.insert(loc=0,column='Date', value=pd.to_datetime(new_df.index))

    line_fig = px.line(new_df, x = 'Date', y = relevant_data.columns)
    line_fig.update_traces(line=dict(color = 'black'))
    fig = go.Figure(data=line_fig.data).update_layout(xaxis_title="Time", yaxis_title=relevant_data.columns[i[1]])

    bg_df['colors'] = bg_df['states'].map({0:'On', 1: 'Mode-2', 2: 'Off', 3: 'Moving', 4:'Mode-1'})
    color_map = {"Off": "black", "On": "white", "Mode-1" : "cyan", "Mode-2": "yellow", "Moving": "green"}
    
    #start background
    start_mode = str(bg_df.iloc[0]["colors"])
    start_date = str(bg_df.iloc[0]["Date"])

    for index, row in bg_df.iterrows():
        current_mode = row["colors"]
        if current_mode != start_mode:
            fig.add_vrect(x0=start_date, x1=str(row["Date"]), fillcolor=color_map[start_mode], opacity=0.5)
            start_mode = row["colors"]
            start_date = str(row["Date"])

    fig.add_vrect(x0=start_date, x1=str(bg_df.iloc[-1]["Date"]), fillcolor=color_map[start_mode], opacity=0.5)

    #fig.add_vrect(x0=str(useful_data.iloc[anomaly-5].name),x1=str(useful_data.iloc[anomaly-3].name),fillcolor="black", opacity=1)
    #fig.add_vrect(x0=str(useful_data.iloc[anomaly+4].name),x1=str(useful_data.iloc[anomaly+6].name),fillcolor="black", opacity=1)
    fig.add_vrect(x0=str(useful_data.iloc[anomaly-1].name),x1=str(useful_data.iloc[anomaly+1].name),fillcolor="red", opacity=0.5)
    #end background

    fig.update_layout(xaxis=dict(showgrid=False), yaxis=dict(showgrid=False))
    fig.show()