# 1. Importing some Libraries

In [None]:
import numpy as np
import pandas as pd
from influxdb import InfluxDBClient
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

import holoviews as hv
from holoviews import opts
hv.extension('bokeh')

pd.set_option('display.max_rows', 15)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)


# 2. Fetching Data: DE1Thing_HMD8310

In [None]:
# Connect to InfluxDB and fetch data
client = InfluxDBClient(host='localhost', port=8086)
client.switch_database('ISS')

# Query to the Database for one measurement

#query1 = 'SELECT * FROM "DE1Thing_HMD8310"'
query1 = 'SELECT * FROM "DE1Thing_HMD8310" WHERE time >= \'2022-08-29T23:28:00Z\' AND time < \'2023-01-26T08:00:00Z\''
results1 = client.query(query1)
DE1Thing_HMD8310 = pd.DataFrame.from_records(results1.get_points())

print(DE1Thing_HMD8310.shape)
DE1Thing_HMD8310.head(2)

In [None]:
DE1Thing_HMD8310.to_csv("Data/DE1Thing_HMD8310.csv")

In [None]:
# Function to convert ISO 8601 time to datetime
def ISO_8601_To_Datetime(s):
    return datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ')

In [None]:
# Read The data
df = pd.read_csv('Data/DE1Thing_HMD8310.csv', parse_dates=[1], index_col=0, date_parser=ISO_8601_To_Datetime)
df.head(2)

In [None]:
df.info()

In [None]:
df.columns

In [None]:
# Select only 5 features
df = df.loc[:, ['time','CFWInletPress','CFWInletTemp', 'CFWOutletTempA', 'CFWOutletTempB']]
df

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df.isnull().sum()

In [None]:
df

In [None]:
# Checking for blank values and Data Types.
def overview(df: pd.DataFrame, timestamp_col: str= None) -> None:
    print('Null Count:\n', df.isnull().sum(), '\n')
    print('Data Types:\n:', df.dtypes)
    
    if timestamp_col is not None:
        print('\nDate Range: \n\nStart:\t', df[timestamp_col].min())
        print('End:\t', df[timestamp_col].max())
        print('Days:\t',(df[timestamp_col].max() - df[timestamp_col].min()))

In [None]:
overview(df, timestamp_col='time')

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df["time"], y=df["CFWInletPress"], mode='lines', name='CFWInletPress'))

fig.add_trace(go.Scatter(x=df["time"], y=df["CFWInletTemp"], mode='lines', name='CFWInletTemp', yaxis='y2'))

fig.update_layout(title_text="CFWInletPress vs CFWInletTemp",
                  yaxis1=dict(title="CFWInletPress", side='left'),
                  yaxis2=dict(title="CFWInletTemp", side='right', anchor="x", overlaying="y")
                  )

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df["time"], y=df["CFWInletPress"], mode='lines', name='CFWInletPress'))


fig.add_trace(go.Scatter(x=df["time"], y=df["CFWOutletTempA"], mode='lines', name='CFWOutletTempA', yaxis='y2'))

fig.update_layout(title_text="CFWInletPress vs CFWOutletTempB",
                  yaxis1=dict(title="CFWInletPress", side='left'),
                  yaxis2=dict(title="CFWOutletTempB", side='right', anchor="x", overlaying="y")
                  )

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df["time"], y=df["CFWOutletTempA"], mode='lines', name='CFWOutletTempA'))


fig.add_trace(go.Scatter(x=df["time"], y=df["CFWOutletTempB"], mode='lines', name='CFWOutletTempB', yaxis='y2'))

fig.update_layout(title_text="CFWOutletTempA vs CFWOutletTempB",
                  yaxis1=dict(title="CFWOutletTempA", side='left'),
                  yaxis2=dict(title="CFWOutletTempB", side='right', anchor="x", overlaying="y")
                  )

# LTSM Autoencoder

## Splitting the Data into Train and Test set

In [None]:
#df = df.loc[:2000,:]
df = df.loc[:, ['time','CFWInletPress','CFWInletTemp', 'CFWOutletTempA', 'CFWOutletTempB']]
df_timestamp = df[['time']]
df_ = df[['CFWInletPress','CFWInletTemp', 'CFWOutletTempA', 'CFWOutletTempB']]
df.shape

In [None]:
train_prp = .6
train = df_.loc[:df_.shape[0] * train_prp]
test = df_.loc[df_.shape[0] * train_prp:]

In [None]:
train

In [None]:
# Standardize The Data
scaler = StandardScaler()
X_train = scaler.fit_transform(train)
X_test = scaler.transform(test)

print("X train Shape:", X_train.shape)
print("X test Shape:", X_test.shape)

In [None]:
# Reshape the Dimension of the Train and Test set for LSTM Model
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

print("X train Shape:", X_train.shape)
print("X test Shape:", X_test.shape)

In [None]:
from tensorflow.keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential
from sklearn import metrics
import tensorflow as tf
import keras.backend as K
import matplotlib.pyplot as plt

In [None]:
def autoencoder_model(X):
    # The Encoder
    inputs = Input(shape=(X.shape[1],  X.shape[2]))
    L1 = LSTM(16, activation='relu', return_sequences=True, kernel_regularizer=regularizers.l2(0.00))(inputs)
    L2 = LSTM(4, activation='relu', return_sequences=False)(L1)
    
    L3 = RepeatVector(X.shape[1])(L2)
    
    # The Decoder
    L4 = LSTM(4, activation='relu', return_sequences=True)(L3)
    L5 = LSTM(16, activation='relu', return_sequences=True)(L4)
    output = TimeDistributed(Dense(X.shape[2]))(L5)
    model = Model(inputs=inputs, outputs=output)
    return model

In [None]:
model = autoencoder_model(X_train)
model.compile(optimizer='adam', loss='mae', metrics=['accuracy'])
model.summary()

In [None]:
epochs = 50
batch = 25
history = model.fit(X_train, X_train, epochs=epochs, batch_size=batch, validation_split=.2, verbose=1).history

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=[x for x in range(len(history['loss']))], y=history['loss'], mode='lines', name='loss'))

fig.add_trace(go.Scatter(x=[x for x in range(len(history['val_loss']))], y=history['val_loss'], mode='lines', name='validation loss'))

fig.update_layout(title="Autoencoder error loss over epochs", yaxis=dict(title="Loss"), xaxis=dict(title="Epoch"))

fig.show()

In [None]:
# Check how loss & mse went down
epoch_loss = history['loss']
epoch_val_loss = history['val_loss']
epoch_mae = history['accuracy']
epoch_val_mae = history['val_accuracy']

plt.figure(figsize=(8,5))
plt.plot(range(0,len(epoch_loss)), epoch_loss, 'b-', linewidth=2, label='Train Loss')
plt.plot(range(0,len(epoch_val_loss)), epoch_val_loss, 'r-', linewidth=2, label='Test Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")

#lt.title('Loss')
plt.legend(loc='best')
plt.savefig('Figure_Loss.jpeg')
plt.show()


In [None]:
# Check how loss & mse went down
epoch_loss = history['loss']
epoch_val_loss = history['val_loss']
epoch_mae = history['accuracy']
epoch_val_mae = history['val_accuracy']

plt.figure(figsize=(8,5))

plt.plot(range(0,len(epoch_mae)), epoch_mae, 'b-', linewidth=2, label='Train Acc')
plt.plot(range(0,len(epoch_val_mae)), epoch_val_mae, 'r-', linewidth=2,label='Test Acc')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
#plt.title('Accuracy')
plt.legend(loc='lower right')

plt.savefig('Figure_Acc.jpeg')
plt.show()

In [None]:
X_pred = model.predict(X_train)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = scaler.inverse_transform(X_pred)
X_pred = pd.DataFrame(X_pred, columns=train.columns)

# 1. Anomaly Detection for CFWInletPress with LSTM-AE

In [None]:
# CFWInletPress, CFWInletTemp, CFWOutletTempA, CFWOutletTempB

scores = pd.DataFrame()
scores['CFWInletPress_train'] = train['CFWInletPress']
scores["CFWInletPress_predicted"] = X_pred["CFWInletPress"]
scores['loss_mae'] = (scores['CFWInletPress_train']-scores['CFWInletPress_predicted']).abs()

In [None]:
fig = go.Figure(data=[go.Histogram(x=scores['loss_mae'])])
fig.update_layout(title="Error distribution", 
                 xaxis=dict(title="Error delta between predicted and real data [CFWInletPress]"),
                 yaxis=dict(title="Data point counts"))
fig.show()

In [None]:
X_pred = model.predict(X_test)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = scaler.inverse_transform(X_pred)
X_pred = pd.DataFrame(X_pred, columns=train.columns)
X_pred.index = test.index

In [None]:
scores = X_pred
scores['datetime'] = df_timestamp.loc[1893:]
scores['real CFWInletPress'] = test['CFWInletPress']
scores["loss_mae"] = (scores['real CFWInletPress'] - scores['CFWInletPress']).abs()
scores['Threshold'] = 3
scores['Anomaly'] = np.where(scores["loss_mae"] > scores["Threshold"], 1, 0)

In [None]:
scores

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=scores['datetime'], 
                         y=scores['loss_mae'], 
                         name="Loss"))
fig.add_trace(go.Scatter(x=scores['datetime'], 
                         y=scores['Threshold'],
                         name="Threshold"))

fig.update_layout(title="Error Timeseries and Threshold", 
                 xaxis=dict(title="DateTime"),
                 yaxis=dict(title="Loss"))
fig.show()

In [None]:
scores['Anomaly'].value_counts()

In [None]:
anomalies = scores[scores['Anomaly'] == 1][['real CFWInletPress']]
anomalies = anomalies.rename(columns={'real CFWInletPress':'anomalies'})
scores = scores.merge(anomalies, left_index=True, right_index=True, how='left')

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores["datetime"], y=scores["real CFWInletPress"], mode='lines', name='CFWInletPress'))


fig.update_layout(title_text="Test Data")

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores["datetime"], y=scores["real CFWInletPress"], mode='lines', name='CFWInletPress'))

fig.add_trace(go.Scatter(x=scores["datetime"], y=scores["anomalies"], name='Anomaly', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.update_layout(title_text="Anomalies Detected for CFWInletPress with LSTM-AE")

fig.show()

# 2. Anomaly Detection for CFWInletTemp with LSTM-AE

In [None]:
# CFWInletPress, CFWInletTemp, CFWOutletTempA, CFWOutletTempB

scores_2 = pd.DataFrame()
scores_2['CFWInletTemp_train'] = train['CFWInletTemp']
scores_2["CFWInletTemp_predicted"] = X_pred["CFWInletTemp"]
scores_2['loss_mae'] = (scores_2['CFWInletTemp_train'] - scores_2['CFWInletTemp_predicted']).abs()

fig = go.Figure(data=[go.Histogram(x=scores_2['loss_mae'])])
fig.update_layout(title="Error distribution", 
                 xaxis=dict(title="Error delta between predicted and real data [CFWInletTemp]"),
                 yaxis=dict(title="Data point counts"))
fig.show()

In [None]:
scores_2 = X_pred
scores_2['datetime'] = df_timestamp.loc[1893:]
scores_2['real CFWInletTemp'] = test['CFWInletTemp']
scores_2["loss_mae"] = (scores_2['real CFWInletTemp'] - scores_2['CFWInletTemp']).abs()
scores_2['Threshold'] = 3
scores_2['Anomaly'] = np.where(scores_2["loss_mae"] > scores_2["Threshold"], 1, 0)
scores_2

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=scores_2['datetime'], y=scores_2['loss_mae'], name="Loss"))
fig.add_trace(go.Scatter(x=scores_2['datetime'], y=scores_2['Threshold'], name="Threshold"))

fig.update_layout(title="CFWInletTemp: Error Timeseries and Threshold", xaxis=dict(title="DateTime"), yaxis=dict(title="Loss"))
fig.show()

In [None]:
scores_2['Anomaly'].value_counts()

In [None]:
anomalies_2 = scores_2[scores_2['Anomaly'] == 1][['real CFWInletTemp']]
anomalies_2 = anomalies_2.rename(columns={'real CFWInletTemp':'anomalies'})
scores_2 = scores_2.merge(anomalies_2, left_index=True, right_index=True, how='left')

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["real CFWInletTemp"], mode='lines', name='CFWInletTemp'))

#fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["anomalies"], name='Anomaly', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.update_layout(title_text=" Test Data : CFWInletTemp")

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["real CFWInletTemp"], mode='lines', name='CFWInletTemp'))
fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["anomalies"], name='CFWInletTemp Anomalies', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))
fig.update_layout(title_text="Anomalies Detected in CFWInletTemp with LSTM-AE")

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["anomalies"], name='CFWInletTemp Anomalies', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.add_trace(go.Scatter(x=scores["datetime"], y=scores["anomalies"], name='CFWInletPress Anomalies', mode='markers', marker=dict(color="black", size=11, line=dict(color="black", width=2))))

fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["real CFWInletTemp"], mode='lines', name='CFWInletTemp'))

fig.add_trace(go.Scatter(x=scores["datetime"], y=scores["real CFWInletPress"], mode='lines', name='CFWInletPress'))


fig.update_layout(title_text="Anomalies Detected in CFWInletTemp with LSTM-AE")

fig.show()

# 3. Anomaly Detection for CFWOutletTempA with LSTM-AE

In [None]:
#  CFWOutletTempA

scores_3 = pd.DataFrame()
scores_3['CFWOutletTempA_train'] = train['CFWOutletTempA']
scores_3["CFWOutletTempA_predicted"] = X_pred["CFWOutletTempA"]
scores_3['loss_mae'] = (scores_3['CFWOutletTempA_train'] - scores_3['CFWOutletTempA_predicted']).abs()

fig = go.Figure(data=[go.Histogram(x=scores_3['loss_mae'])])
fig.update_layout(title="Error distribution", 
                 xaxis=dict(title="Error delta between predicted and real data [CFWOutletTempA]"),
                 yaxis=dict(title="Data point counts"))
fig.show()

In [None]:
scores_3 = X_pred
scores_3['datetime'] = df_timestamp.loc[1893:]
scores_3['real CFWOutletTempA'] = test['CFWOutletTempA']
scores_3["loss_mae"] = (scores_3['real CFWOutletTempA'] - scores_3['CFWOutletTempA']).abs()
scores_3['Threshold'] = 3
scores_3['Anomaly'] = np.where(scores_3["loss_mae"] > scores_3["Threshold"], 1, 0)
scores_3

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=scores_3['datetime'], y=scores_3['loss_mae'], name="Loss"))
fig.add_trace(go.Scatter(x=scores_3['datetime'], y=scores_3['Threshold'], name="Threshold"))

fig.update_layout(title="CFWOutletTempA: Error Timeseries and Threshold", xaxis=dict(title="DateTime"), yaxis=dict(title="Loss"))
fig.show()

In [None]:
scores_3['Anomaly'].value_counts()

In [None]:
anomalies_3 = scores_3[scores_3['Anomaly'] == 1][['real CFWOutletTempA']]
anomalies_3 = anomalies_3.rename(columns={'real CFWOutletTempA':'anomalies'})
scores_3 = scores_3.merge(anomalies_3, left_index=True, right_index=True, how='left')

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores_3["datetime"], y=scores_3["real CFWOutletTempA"], mode='lines', name='CFWOutletTempA'))

#fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["anomalies"], name='Anomaly', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.update_layout(title_text=" Test Data : CFWOutletTempA")

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores_3["datetime"], y=scores_3["real CFWOutletTempA"], mode='lines', name='CFWOutletTempA'))

fig.add_trace(go.Scatter(x=scores_3["datetime"], y=scores_3["anomalies"], name='Anomaly', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.update_layout(title_text=" Anomalies Detected in CFWOutletTempA with LSTM_AE")

fig.show()

# 4. Anomaly Detection for CFWOutletTempB with LSTM-AE

In [None]:
#  CFWOutletTempB

scores_4 = pd.DataFrame()
scores_4['CFWOutletTempB_train'] = train['CFWOutletTempB']
scores_4["CFWOutletTempB_predicted"] = X_pred["CFWOutletTempB"]
scores_4['loss_mae'] = (scores_4['CFWOutletTempB_train'] - scores_4['CFWOutletTempB_predicted']).abs()

fig = go.Figure(data=[go.Histogram(x=scores_4['loss_mae'])])
fig.update_layout(title="Error distribution", 
                 xaxis=dict(title="Error delta between predicted and real data [CFWOutletTempB]"),
                 yaxis=dict(title="Data point counts"))

In [None]:
scores_4 = X_pred
scores_4['datetime'] = df_timestamp.loc[1893:]
scores_4['real CFWOutletTempB'] = test['CFWOutletTempB']
scores_4["loss_mae"] = (scores_4['real CFWOutletTempB'] - scores_4['CFWOutletTempB']).abs()
scores_4['Threshold'] = 3
scores_4['Anomaly'] = np.where(scores_4["loss_mae"] > scores_4["Threshold"], 1, 0)
scores_4

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=scores_4['datetime'], y=scores_4['loss_mae'], name="Loss"))
fig.add_trace(go.Scatter(x=scores_4['datetime'], y=scores_4['Threshold'], name="Threshold"))

fig.update_layout(title="CFWOutletTempB: Error Timeseries and Threshold", xaxis=dict(title="DateTime"), yaxis=dict(title="Loss"))
fig.show()

In [None]:
scores_4['Anomaly'].value_counts()

In [None]:
anomalies_4 = scores_4[scores_4['Anomaly'] == 1][['real CFWOutletTempB']]
anomalies_4 = anomalies_4.rename(columns={'real CFWOutletTempB':'anomalies'})
scores_4 = scores_4.merge(anomalies_4, left_index=True, right_index=True, how='left')

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores_4["datetime"], y=scores_4["real CFWOutletTempB"], mode='lines', name='CFWOutletTempB'))

#fig.add_trace(go.Scatter(x=scores_2["datetime"], y=scores_2["anomalies"], name='Anomaly', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.update_layout(title_text=" Test Data : CFWOutletTempB")

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores_4["datetime"], y=scores_4["real CFWOutletTempB"], mode='lines', name='CFWOutletTempB'))

fig.add_trace(go.Scatter(x=scores_4["datetime"], y=scores_4["anomalies"], name='Anomaly', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.update_layout(title_text=" Anomalies Detected in CFWOutletTempB")

fig.show()

In [None]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=scores_3["datetime"], y=scores_3["real CFWOutletTempA"], mode='lines', name='CFWOutletTempA'))

fig.add_trace(go.Scatter(x=scores_3["datetime"], y=scores_3["anomalies"], name='CFWOutletTempB Anomalies', mode='markers', marker=dict(color="blue", size=11, line=dict(color="blue", width=2))))


fig.add_trace(go.Scatter(x=scores_4["datetime"], y=scores_4["real CFWOutletTempB"], mode='lines', name='CFWOutletTempB'))

fig.add_trace(go.Scatter(x=scores_4["datetime"], y=scores_4["anomalies"], name='CFWOutletTempB Anomalies', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))


fig.update_layout(title_text=" Anomalies Detected in CFWOutletTempA and CFWOutletTempB")

fig.show()