# 1. Importing some Libraries

In [None]:
import numpy as np
import pandas as pd
from influxdb import InfluxDBClient
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest

pd.set_option('display.max_rows', 15)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)


# 2. Fetching Data: EP_Propulsion1.Thing_HMD8310

In [None]:
# Connect to InfluxDB and fetch data
client = InfluxDBClient(host='localhost', port=8086)
client.switch_database('ISS')

# Query to the Database for one measurement

query1 = 'SELECT * FROM "EP_Propulsion1.Thing_HMD8310"'
results1 = client.query(query1)
EP_Propulsion1_Thing_HMD8310 = pd.DataFrame.from_records(results1.get_points())

print(EP_Propulsion1_Thing_HMD8310.shape)
EP_Propulsion1_Thing_HMD8310.head(2)

In [None]:
EP_Propulsion1_Thing_HMD8310.to_csv("Data/EP_Propulsion1_Thing_HMD8310.csv")

In [None]:
# Function to convert ISO 8601 time to datetime
def ISO_8601_To_Datetime(s):
    return datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ')

In [None]:
# Read The data
df = pd.read_csv('Data/EP_Propulsion1_Thing_HMD8310.csv', parse_dates=[1], index_col=0, date_parser=ISO_8601_To_Datetime)
df.head(2)

In [None]:
# Select only 2 features : Time and MotorRPM for Univariate Time Serie Analysis
df = df.loc[:, ['time','MotorRPM','MotorPower']]
df

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df.isnull().sum()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df["time"], y=df["MotorRPM"], mode='lines', name='Motor RPM'))

fig.add_trace(go.Scatter(x=df["time"], y=df["MotorPower"], mode='lines', name='Motor Power', yaxis='y2'))

fig.update_layout(title_text="Motor Power vs Motor RPM",
                  yaxis1=dict(title="Motor RPM in rpm/min", side='left'),
                  yaxis2=dict(title="Motor Power", side='right', anchor="x", overlaying="y")
                  )

fig.show()

Graph observations
We can see that in August 31st and September 15th, and September 20th there are some misproduction areas that could be considered anomalies.

# LTSM Autoencoder

## Splitting the Data into Train and Test set

In [None]:
#df = df.loc[:2000,:]
df = df[['time','MotorRPM','MotorPower']]
df_timestamp = df[['time']]
df_ = df[['MotorRPM','MotorPower']]
df.shape

In [None]:
train_prp = .6
train = df_.loc[:df_.shape[0] * train_prp]
test = df_.loc[df_.shape[0] * train_prp:]

In [None]:
train

In [None]:
# Standardize The Data
scaler = StandardScaler()
X_train = scaler.fit_transform(train)
X_test = scaler.transform(test)

print("X train Shape:", X_train.shape)
print("X test Shape:", X_test.shape)

In [None]:
# Reshape the Dimension of the Train and Test set for LSTM Model
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

print("X train Shape:", X_train.shape)
print("X test Shape:", X_test.shape)

In [None]:
from tensorflow.keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers

In [None]:
def autoencoder_model(X):
    # The Encoder
    inputs = Input(shape=(X.shape[1],  X.shape[2]))
    L1 = LSTM(16, activation='relu', return_sequences=True, kernel_regularizer=regularizers.l2(0.00))(inputs)
    L2 = LSTM(4, activation='relu', return_sequences=False)(L1)
    
    L3 = RepeatVector(X.shape[1])(L2)
    
    # The Decoder
    L4 = LSTM(4, activation='relu', return_sequences=True)(L3)
    L5 = LSTM(16, activation='relu', return_sequences=True)(L4)
    output = TimeDistributed(Dense(X.shape[2]))(L5)
    model = Model(inputs=inputs, outputs=output)
    return model

In [None]:
model = autoencoder_model(X_train)
model.compile(optimizer='adam', loss='mae')
model.summary()

In [None]:
epochs = 50
batch = 25
history = model.fit(X_train, X_train, epochs=epochs, batch_size=batch, validation_split=.2, verbose=1).history

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=[x for x in range(len(history['loss']))], y=history['loss'], mode='lines', name='loss'))

fig.add_trace(go.Scatter(x=[x for x in range(len(history['val_loss']))], y=history['val_loss'], mode='lines', name='validation loss'))

fig.update_layout(title="Autoencoder error loss over epochs", yaxis=dict(title="Loss"), xaxis=dict(title="Epoch"))

fig.show()

In [None]:
X_pred = model.predict(X_train)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = scaler.inverse_transform(X_pred)
X_pred = pd.DataFrame(X_pred, columns=train.columns)

In [None]:
scores = pd.DataFrame()
scores['Motor_train'] = train['MotorRPM']
scores["Motor_predicted"] = X_pred["MotorRPM"]
scores['loss_mae'] = (scores['Motor_train']-scores['Motor_predicted']).abs()

In [None]:
fig = go.Figure(data=[go.Histogram(x=scores['loss_mae'])])
fig.update_layout(title="Error distribution", 
                 xaxis=dict(title="Error delta between predicted and real data [Motor RPM]"),
                 yaxis=dict(title="Data point counts"))
fig.show()

In [None]:
X_pred = model.predict(X_test)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = scaler.inverse_transform(X_pred)
X_pred = pd.DataFrame(X_pred, columns=train.columns)
X_pred.index = test.index

In [None]:
scores = X_pred
scores['datetime'] = df_timestamp.loc[1893:]
scores['real MotorRPM'] = test['MotorRPM']
scores["loss_mae"] = (scores['real MotorRPM'] - scores['MotorRPM']).abs()
scores['Threshold'] = 8
scores['Anomaly'] = np.where(scores["loss_mae"] > scores["Threshold"], 1, 0)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=scores['datetime'], 
                         y=scores['loss_mae'], 
                         name="Loss"))
fig.add_trace(go.Scatter(x=scores['datetime'], 
                         y=scores['Threshold'],
                         name="Threshold"))

fig.update_layout(title="Error Timeseries and Threshold", 
                 xaxis=dict(title="DateTime"),
                 yaxis=dict(title="Loss"))
fig.show()

In [None]:
scores['Anomaly'].value_counts()

In [None]:
anomalies = scores[scores['Anomaly'] == 1][['real MotorRPM']]
anomalies = anomalies.rename(columns={'real MotorRPM':'anomalies'})
scores = scores.merge(anomalies, left_index=True, right_index=True, how='left')

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=scores["datetime"], y=scores["real MotorRPM"], mode='lines', name='Motor RPM'))

fig.add_trace(go.Scatter(x=scores["datetime"], y=scores["anomalies"], name='Anomaly', mode='markers', marker=dict(color="red", size=11, line=dict(color="red", width=2))))

fig.update_layout(title_text="Anomalies Detected LSTM Autoencoder")

fig.show()