In [None]:
import pandas as pd
import numpy as np

energy_data = pd.read_csv("Extra//energy.csv")
energy_data['timestamp'] = pd.to_datetime(energy_data['timestamp'])
energy_data.set_index('timestamp', inplace=True)
resampled_energy = energy_data.resample("5s").mean()
resampled_energy = resampled_energy.fillna(method='ffill')
resampled_energy = resampled_energy.fillna(method='bfill')

env_data = pd.read_csv("Extra//environment.csv")
env_data['timestamp'] = pd.to_datetime(env_data['timestamp'])
env_data.set_index('timestamp', inplace=True)
resampled_env = env_data.resample("5s").mean()
resampled_env = resampled_env.fillna(method='ffill')
resampled_env = resampled_env.fillna(method='bfill')

In [None]:
resampled_energy['reactive_power'] = resampled_energy[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_energy['thdi'] = resampled_energy[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_energy['thdu'] = resampled_energy[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_energy['current'] = resampled_energy[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_energy['power_factor'] = resampled_energy[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_energy.join(resampled_env)
useful_data = useful_data[["reactive_power", "thdi", "thdu", "current","Xacc", "pitch","yaw","sound"]]
useful_data = useful_data.dropna()
useful_data = useful_data.iloc[:-(10 + (useful_data.shape[0] % 10))]
useful_data

In [None]:
from sklearn.preprocessing import StandardScaler
standard_scaler = StandardScaler()

scaled_data = standard_scaler.fit_transform(useful_data)
reshaped_scaled_data = scaled_data.reshape(scaled_data.shape[0]//10, 10, scaled_data.shape[1])
reshaped_scaled_data.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Bidirectional

timesteps = reshaped_scaled_data.shape[1]
n_features = reshaped_scaled_data.shape[2]

autoencoder = Sequential([
    Bidirectional(LSTM(64, activation='tanh', return_sequences=True), input_shape=(timesteps, n_features)),
    Bidirectional(LSTM(32, activation='tanh', return_sequences=False)),
    RepeatVector(timesteps),
    Bidirectional(LSTM(32, activation='tanh', return_sequences=True)),
    Bidirectional(LSTM(64, activation='tanh', return_sequences=True)),
    TimeDistributed(Dense(n_features))
])

autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
autoencoder.fit(reshaped_scaled_data, reshaped_scaled_data, epochs=3, batch_size=32, verbose=1)
predictions = autoencoder.predict(reshaped_scaled_data)
predictions = predictions.reshape(predictions.shape[0]*predictions.shape[1], predictions.shape[2])

In [None]:
mse = np.square(np.subtract(scaled_data, predictions))
print(np.mean(mse))
print(np.percentile(mse, 99.999))
threshold = np.percentile(mse, 99.999)
anomalies = np.where(mse > threshold)
anomalies

In [None]:
import plotly.express as px
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
plot_df = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)

for i in list(zip(anomalies[0],anomalies[1]))[:10]:
    anomaly = i[0]
    new_df = plot_df.loc[str(useful_data.iloc[anomaly-100].name):str(useful_data.iloc[anomaly+100].name)]
    new_df.insert(loc=0,column='Date', value=pd.to_datetime(new_df.index))

    fig = px.line(new_df, x = 'Date', y = useful_data.columns[i[1]])
    fig = go.Figure(data=fig.data).update_layout(xaxis_title="Time", yaxis_title=useful_data.columns[i[1]])

    fig.add_vrect(x0=str(useful_data.iloc[anomaly-5].name),x1=str(useful_data.iloc[anomaly-4].name),fillcolor="black", opacity=1)
    fig.add_vrect(x0=str(useful_data.iloc[anomaly+5].name),x1=str(useful_data.iloc[anomaly+6].name),fillcolor="black", opacity=1)
    fig.add_vrect(x0=str(useful_data.iloc[anomaly].name),x1=str(useful_data.iloc[anomaly+1].name),fillcolor="black", opacity=0.2)

    fig.show()