<a href="https://colab.research.google.com/github/dhanushka365/LSTM-AutoEncoders/blob/main/LSTM_AUTO_ENCODERv1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
!pip install --prefix {sys.prefix} pandas tensorflow scikit-learn matplotlib

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
!pip install tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, RepeatVector, TimeDistributed

In [None]:
#Load the sequence data from csv
df = pd.read_csv('/content/drive/MyDrive/hourly_csv/AEP_hourly.csv')

In [None]:
df.head()

In [None]:
dataset = df
dataset["Month"] = pd.to_datetime(df["Datetime"]).dt.month
dataset["Year"] = pd.to_datetime(df["Datetime"]).dt.year
dataset["Date"] = pd.to_datetime(df["Datetime"]).dt.date
dataset["Time"] = pd.to_datetime(df["Datetime"]).dt.time
dataset["Week"] = pd.to_datetime(df["Datetime"]).dt.isocalendar().week
dataset["Day"] = pd.to_datetime(df["Datetime"]).dt.day_name()


**Data Time Period**

In [None]:
df['Date'].min(), df['Date'].max()

# Visualize the data

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['Date'], y=df['AEP_MW'], name='AEP_MW'))
fig.update_layout(showlegend=True, title='Electricity usage 2004-2020')
fig.show()

# Data Preprocessing

In [None]:
startdate = pd.to_datetime("2006-12-10").date()
enddate = pd.to_datetime("2010-6-10").date()
train = df.loc[df['Date'] <= startdate]
test = df.loc[df['Date'] > enddate]
train.shape, test.shape

# Data Scaling

In [None]:
scaler = StandardScaler()
scaler = scaler.fit(np.array(train['AEP_MW']).reshape(-1,1))

train['AEP_MW'] = scaler.transform(np.array(train['AEP_MW']).reshape(-1,1))
test['AEP_MW'] = scaler.transform(np.array(test['AEP_MW']).reshape(-1,1))

In [None]:
# Visualize scaled data
plt.plot(train['AEP_MW'], label = 'scaled')
plt.legend()
plt.show()

# Create sequences

In [None]:
TIME_STEPS=30

def create_sequences(X, y, time_steps=TIME_STEPS):
    X_out, y_out = [], []
    for i in range(len(X)-time_steps):
        X_out.append(X.iloc[i:(i+time_steps)].values)
        y_out.append(y.iloc[i+time_steps])
    
    return np.array(X_out), np.array(y_out)

X_train, y_train = create_sequences(train[['AEP_MW']], train['AEP_MW'])
X_test, y_test = create_sequences(test[['AEP_MW']], test['AEP_MW'])
print("Training input shape: ", X_train.shape)
print("Testing input shape: ", X_test.shape)

In [None]:
# set seed to regenerate same sequence of random numbers. 
np.random.seed(21)
tf.random.set_seed(21)

# Build a model

In [None]:
print(X_train.shape[1])

In [None]:
print(X_train.shape[2])

In [None]:
model = Sequential()
model.add(LSTM(128, activation = 'tanh', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(rate=0.1))
model.add(RepeatVector(X_train.shape[1]))
model.add(LSTM(128, activation = 'tanh', return_sequences=True))
model.add(Dropout(rate=0.1))
model.add(TimeDistributed(Dense(X_train.shape[2])))
model.compile(loss="mean_squared_error",optimizer="adam",metrics=["mse"])
model.summary()

In [None]:
history = model.fit(X_train,
                    y_train,
                    epochs=4,
                    batch_size=5000,
                    validation_split=0.1)

# Plot Training - Validation loss

In [None]:
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend();

In [None]:
# Mean Absolute Error loss
X_train_pred = model.predict(X_train)
train_mae_loss = np.mean(np.abs(X_train_pred - X_train), axis=1)

plt.hist(train_mae_loss, bins=50)
plt.xlabel('Train MAE loss')
plt.ylabel('Number of Samples');

# Set reconstruction error threshold
threshold = np.max(train_mae_loss)

print('Reconstruction error threshold:',threshold)

# Predict Anomalies on test data using threshold

In [None]:
X_test_pred = model.predict(X_test, verbose=1)
test_mae_loss = np.mean(np.abs(X_test_pred-X_test), axis=1)

plt.hist(test_mae_loss, bins=50)
plt.xlabel('Test MAE loss')
plt.ylabel('Number of samples')

In [None]:
anomaly_df = pd.DataFrame(test[TIME_STEPS:])
anomaly_df['loss'] = test_mae_loss
anomaly_df['threshold'] = threshold
anomaly_df['anomaly'] = anomaly_df['loss'] > anomaly_df['threshold']

In [None]:
anomaly_df.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=anomaly_df['Date'], y=anomaly_df['loss'], name='Test loss'))
fig.add_trace(go.Scatter(x=anomaly_df['Date'], y=anomaly_df['threshold'], name='Threshold'))
fig.update_layout(showlegend=True, title='Test loss vs. Threshold')
fig.show()

In [None]:
anomalies = anomaly_df.loc[anomaly_df['anomaly'] == True]
anomalies.head()

In [None]:
anomalies.shape

In [None]:
anomaly_df['AEP_MW'] = scaler.inverse_transform(np.array(anomaly_df['AEP_MW']).reshape(-1,1))


In [None]:
anomalies['AEP_MW'] = scaler.inverse_transform(np.array(anomalies['AEP_MW']).reshape(-1,1))

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=anomaly_df['Date'], y=anomaly_df['AEP_MW'], name='Close price'))
fig.add_trace(go.Scatter(x=anomalies['Date'], y=anomalies['AEP_MW'], mode='markers', name='Anomaly'))
fig.update_layout(showlegend=True, title='Detected anomalies')
fig.show()