In [None]:
import pandas as pd
import numpy as np

energy_data = pd.read_csv("Extra//energy.csv")
env_data = pd.read_csv("Extra//environment.csv")

energy_data.timestamp = pd.to_datetime(energy_data.timestamp, format='%Y-%m-%d %H:%M:%S')
energy_data.index = energy_data.timestamp
resampled_df = energy_data.resample("2s").mean()

env_data.timestamp = pd.to_datetime(env_data.timestamp, format='%Y-%m-%d %H:%M:%S')
env_data.index = env_data.timestamp
resampled_env = env_data.resample("2s").mean()

In [None]:
resampled_df['reactive_power'] = resampled_df[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_df['thdi'] = resampled_df[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_df['thdu'] = resampled_df[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_df['current'] = resampled_df[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_df['power_factor'] = resampled_df[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
resampled_df = resampled_df.join(resampled_env)
useful_data = resampled_df[["reactive_power", "thdi", "current","power_factor","Xacc", "pitch"]]
useful_data = useful_data.dropna()
useful_data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

#scaled_df = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)
useful_df = scaler.fit_transform(useful_data)
scaled_df = useful_df.reshape(useful_df.shape[0], 1, useful_df.shape[1])

In [None]:
from keras.layers import Input, LSTM, RepeatVector
from keras.models import Model

inputs = Input(shape=(scaled_df.shape[1], scaled_df.shape[2]))
encoded = LSTM(80, activation='relu', return_sequences=True)(inputs)
encoded = LSTM(80, activation='relu')(encoded)
decoded = RepeatVector(1)(encoded)
decoded = LSTM(80, activation='relu', return_sequences=True)(decoded)
decoded = LSTM(80, activation='relu', return_sequences=True)(decoded)
decoded = LSTM(scaled_df.shape[2], activation='relu')(decoded)
autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

autoencoder.fit(scaled_df, scaled_df, epochs=15, batch_size=80)
predictions = autoencoder.predict(scaled_df)
mse = np.square(np.subtract(useful_df, predictions))

In [None]:
anomalies = np.where(mse > 1000)[0]
anomalies

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
plot_df = pd.DataFrame(scaler.fit_transform(useful_data.values), columns=useful_data.columns, index=useful_data.index)

for i in anomalies[:5]:
    anomaly = i
    start = str(useful_data.iloc[anomaly-100].name)
    final = str(useful_data.iloc[anomaly+100].name)

    new_df = plot_df.loc[start:final]
    new_df.insert(loc=0,column='Date', value=pd.to_datetime(new_df.index))

    import plotly.express as px
    import plotly.graph_objects as go

    fig = px.line(new_df, x = 'Date', y = new_df.columns)
    fig = go.Figure(data=fig.data).update_layout(xaxis_title="Time", yaxis_title="Data")

    fig.add_vrect(x0=str(useful_data.iloc[anomaly-5].name),x1=str(useful_data.iloc[anomaly-4].name),fillcolor="black", opacity=1)
    fig.add_vrect(x0=str(useful_data.iloc[anomaly+5].name),x1=str(useful_data.iloc[anomaly+6].name),fillcolor="black", opacity=1)
    fig.add_vrect(x0=str(useful_data.iloc[anomaly].name),x1=str(useful_data.iloc[anomaly+1].name),fillcolor="black", opacity=0.1)

    fig.show()