In [None]:
import pandas as pd
energy_data = pd.read_csv("Extra//energy.csv")

energy_data.timestamp = pd.to_datetime(energy_data.timestamp, format='%Y-%m-%d %H:%M:%S')
energy_data.index = energy_data.timestamp
resampled_df = energy_data.resample("2s").mean()
resampled_df.head()

In [None]:
resampled_df['active_power'] = resampled_df[["Active Power A average [kW]","Active Power B average [kW]","Active Power C average [kW]"]].mean(axis=1)
resampled_df['thdi'] = resampled_df[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_df['thdu'] = resampled_df[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_df['current'] = resampled_df[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_df['power'] = resampled_df[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_df[["active_power","thdi","thdu", "current","power"]]
useful_data = useful_data.dropna()
useful_data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

scaled_df = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)
scaled_df

In [None]:
import numpy as np

sequence_length = 10
subsequences = np.zeros((scaled_df.shape[0] // sequence_length, sequence_length, scaled_df.shape[1]))

for i in range(scaled_df.shape[0] // sequence_length):
    subsequences[i] = scaled_df[i*sequence_length : (i+1)*sequence_length]

subsequences.shape

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

input_shape = (sequence_length, 5)
latent_dim = 8

encoder_inputs = keras.Input(shape=input_shape)
encoder = layers.LSTM(latent_dim, return_sequences=True)(encoder_inputs)
encoder = layers.LSTM(latent_dim)(encoder)

decoder_inputs = layers.RepeatVector(sequence_length)(encoder)
decoder = layers.LSTM(latent_dim, return_sequences=True)(decoder_inputs)
decoder_outputs = layers.TimeDistributed(layers.Dense(5))(decoder)

autoencoder = keras.Model(encoder_inputs, decoder_outputs)
autoencoder.compile(optimizer='adam', loss='mse')

autoencoder.fit(subsequences, subsequences, epochs=30, batch_size=100)

predictions = autoencoder.predict(subsequences)
mse = np.mean(np.power(subsequences - predictions, 2), axis=1)

In [None]:
anomalies = np.where(mse > 5)[0]
anomalies

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
plot_df = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)

new_df = plot_df.loc['2022-11-07 07:00:00':'2022-11-08 18:00:00']
new_df.insert(loc=0,column='Date', value=pd.to_datetime(new_df.index))

import plotly.express as px
import plotly.graph_objects as go

fig = px.line(new_df, x = 'Date', y = new_df.columns)

fig3 = go.Figure(data=fig.data).update_layout(xaxis_title="Time", yaxis_title="Energy Data")
for i in anomalies[0:4]:
    fig.add_vrect(x0=str(useful_data.iloc[i*sequence_length].name),
                  x1=str(useful_data.iloc[(i+1)*sequence_length].name), fillcolor="black", opacity=0.15)

fig.show()