# 1. Importing some Libraries

In [None]:
import numpy as np
import pandas as pd
from influxdb import InfluxDBClient
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

import holoviews as hv
from holoviews import opts
hv.extension('bokeh')


from tensorflow.keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential
from sklearn import metrics
import tensorflow as tf
import keras.backend as K
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 15)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)


# 2. Fetching Data: DE2Thing_HMD8310

In [None]:
# Connect to InfluxDB and fetch data
client = InfluxDBClient(host='localhost', port=8086)
client.switch_database('ISS')

# Query to the Database for one measurement

query1 = 'SELECT * FROM "DG2Thing_HMD8310"'
#query1 = 'SELECT * FROM "DE2Thing_HMD8310" WHERE time >= \'2022-08-29T23:28:00Z\' AND time < \'2023-04-26T08:00:00Z\''
#query1 = 'SELECT * FROM "DE1Thing_HMD8310" WHERE time >= \'2022-09-30T00:00:00Z\' AND time < \'2022-10-30T23:59:00Z\''
results1 = client.query(query1)
DG2Thing_HMD8310 = pd.DataFrame.from_records(results1.get_points())

print(DG2Thing_HMD8310.shape)
DG2Thing_HMD8310.head(2)

In [None]:
print(DE2Thing_HMD8310.columns.to_list())

In [None]:
DE2Thing_HMD8310.to_csv("../Data/DE2Thing_HMD8310.csv")

In [None]:
# Function to convert ISO 8601 time to datetime
def ISO_8601_To_Datetime(s):
    return datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ')

In [None]:
# Read The data
df = pd.read_csv('../Data/DE2Thing_HMD8310.csv', parse_dates=[1], index_col=0, date_parser=ISO_8601_To_Datetime)
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df_CyExGas_CyPmax = df.loc[:, ['time',
                                   'Cy1ExhGasOutletTemp',
                                   'Cy2ExhGasOutletTemp',
                                   'Cy3ExhGasOutletTemp',
                                   'Cy4ExhGasOutletTemp',
                                   'Cy5ExhGasOutletTemp',
                                   'Cy6ExhGasOutletTemp',
                                   'Cy7ExhGasOutletTemp',
                                   'Cy8ExhGasOutletTemp',
                                   'Cy9ExhGasOutletTemp',
                                   'Cyl1_Pmax', 
                                   'Cyl2_Pmax', 
                                   'Cyl3_Pmax', 
                                   'Cyl4_Pmax', 
                                   'Cyl5_Pmax', 
                                   'Cyl6_Pmax', 
                                   'Cyl7_Pmax', 
                                   'Cyl8_Pmax', 
                                   'Cyl9_Pmax',
                                   'Load',
                                   'Power' 
                                   ]]
print(df_CyExGas_CyPmax.shape)
df_CyExGas_CyPmax.head(2)

In [None]:
#df_CyExGas_CyPmax.to_csv("Data/df_CyExGas_CyPmax.csv")

In [None]:
df_CyExGas_CyPmax.isnull().sum()

In [None]:
df_CyExGas_CyPmax = df_CyExGas_CyPmax.dropna()
df_CyExGas_CyPmax

In [None]:
df_CyExGas_CyPmax.isnull().sum()

In [None]:
#df_CyExhGasOutletTemp = df_CyExhGasOutletTemp.fillna(df_CyExhGasOutletTemp.mean())

In [None]:
df_CyExGas_CyPmax

In [None]:
# Checking for blank values and Data Types.
def overview(df_CyExGas_CyPmax: pd.DataFrame, timestamp_col: str= None) -> None:
    print('Null Count:\n', df_CyExGas_CyPmax.isnull().sum(), '\n')
    print('Data Types:\n:', df_CyExGas_CyPmax.dtypes)
    
    if timestamp_col is not None:
        print('\nDate Range: \n\nStart:\t', df_CyExGas_CyPmax[timestamp_col].min())
        print('End:\t', df_CyExGas_CyPmax[timestamp_col].max())
        print('Days:\t',(df_CyExGas_CyPmax[timestamp_col].max() - df_CyExGas_CyPmax[timestamp_col].min()))

In [None]:
overview(df_CyExGas_CyPmax, timestamp_col='time')

In [None]:
#df_CyExGas_CyPmax

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_CyExGas_CyPmax["time"], y=df_CyExGas_CyPmax["Cy1ExhGasOutletTemp"], mode='lines', name='Cy1ExhGasOutletTemp'))

fig.add_trace(go.Scatter(x=df_CyExGas_CyPmax["time"], y=df_CyExGas_CyPmax["Cy2ExhGasOutletTemp"], mode='lines', name='Cy2ExhGasOutletTemp', yaxis='y2'))

fig.update_layout(title_text="Cy1ExhGasOutletTemp vs Cy2ExhGasOutletTemp",
                  yaxis1=dict(title="Cy1ExhGasOutletTemp", side='left'),
                  yaxis2=dict(title="Cy2ExhGasOutletTemp", side='right', anchor="x", overlaying="y")
                  )

fig.show()

In [None]:
df_CyExGas_CyPmax.info()

### Delete rows with 0 values in a pandas DataFrame

In [None]:
print(df_CyExGas_CyPmax.shape)

In [None]:
# Use boolean indexing to filter rows with 0 values
df_CyExGas_CyPmax = df_CyExGas_CyPmax[~(df_CyExGas_CyPmax == 0).any(axis=1)]

# Reset the index (optional)
df_CyExGas_CyPmax = df_CyExGas_CyPmax.reset_index(drop=True)

# Display the modified DataFrame
print("Dataframe without 0 in rows")
df_CyExGas_CyPmax

# LSTM Autoencoder

## Split The Data into Train and Test Set

In [None]:
#df = df.loc[:2000,:]

df_CyExGas_CyPmax = df_CyExGas_CyPmax.loc[:, ['time',
                                              'Cy1ExhGasOutletTemp',
                                              'Cy2ExhGasOutletTemp',
                                              'Cy3ExhGasOutletTemp',
                                              'Cy4ExhGasOutletTemp',
                                              'Cy5ExhGasOutletTemp',
                                              'Cy6ExhGasOutletTemp',
                                              'Cy7ExhGasOutletTemp',
                                              'Cy8ExhGasOutletTemp',
                                              'Cy9ExhGasOutletTemp',
                                              'Cyl1_Pmax',
                                              'Cyl2_Pmax',
                                              'Cyl3_Pmax',
                                              'Cyl4_Pmax',
                                              'Cyl5_Pmax',
                                              'Cyl6_Pmax',
                                              'Cyl7_Pmax',
                                              'Cyl8_Pmax',
                                              'Cyl9_Pmax',
                                              'Load',
                                              'Power'
                                              ]]
df_timestamp = df_CyExGas_CyPmax[['time']]

df_ = df_CyExGas_CyPmax[['Cy1ExhGasOutletTemp',
                         'Cy2ExhGasOutletTemp',
                         'Cy3ExhGasOutletTemp',
                         'Cy4ExhGasOutletTemp',
                         'Cy5ExhGasOutletTemp',
                         'Cy6ExhGasOutletTemp',
                         'Cy7ExhGasOutletTemp',
                         'Cy8ExhGasOutletTemp',
                         'Cy9ExhGasOutletTemp',
                         'Cyl1_Pmax',
                         'Cyl2_Pmax',
                         'Cyl3_Pmax',
                         'Cyl4_Pmax',
                         'Cyl5_Pmax',
                         'Cyl6_Pmax',
                         'Cyl7_Pmax',
                         'Cyl8_Pmax',
                         'Cyl9_Pmax',
                         'Load',
                         'Power'
                         ]]
df_CyExGas_CyPmax.shape


In [None]:
#df_CyExGas_CyPmax = df_CyExGas_CyPmax[df_CyExGas_CyPmax > 0]
df_CyExGas_CyPmax

In [None]:
train_prp = .98
train = df_.loc[:df_.shape[0] * train_prp]
test = df_.loc[df_.shape[0] * train_prp:]

In [None]:
train

In [None]:
filtered_df = train[(train > 1).any(axis=1)]
# Display the filtered DataFrame
print(filtered_df)

In [None]:
test

## Feature Scaling

In [None]:
# Standardize The Data
scaler = StandardScaler()
X_train = scaler.fit_transform(train)
X_test = scaler.transform(test)

print("X train Shape:", X_train.shape)
print("X test Shape:", X_test.shape)

In [None]:
X_train

In [None]:
# Reshape the Dimension of the Train and Test set for LSTM Model
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

print("X train Shape:", X_train.shape)
print("X test Shape:", X_test.shape)

In [None]:
def autoencoder_model(X):
    # The Encoder
    inputs = Input(shape=(X.shape[1],  X.shape[2]))
    L1 = LSTM(16, activation='relu', return_sequences=True, kernel_regularizer=regularizers.l2(0.00))(inputs)
    L2 = LSTM(4, activation='relu', return_sequences=False)(L1)
    
    L3 = RepeatVector(X.shape[1])(L2)
    
    # The Decoder
    L4 = LSTM(4, activation='relu', return_sequences=True)(L3)
    L5 = LSTM(16, activation='relu', return_sequences=True)(L4)
    output = TimeDistributed(Dense(X.shape[2]))(L5)
    model = Model(inputs=inputs, outputs=output)
    return model

In [None]:
model = autoencoder_model(X_train)
model.compile(optimizer='adam', loss='mae', metrics=['accuracy'])
model.summary()

In [None]:
epochs = 75
batch = 25
history = model.fit(X_train, X_train, epochs=epochs, batch_size=batch, validation_split=.2, verbose=1).history

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=[x for x in range(len(history['loss']))], y=history['loss'], mode='lines', name='loss'))

fig.add_trace(go.Scatter(x=[x for x in range(len(history['val_loss']))], y=history['val_loss'], mode='lines', name='validation loss'))

fig.update_layout(title="LSTM AE Error Loss Over Epochs", yaxis=dict(title="Loss"), xaxis=dict(title="Epoch"))

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=[x for x in range(len(history['accuracy']))], y=history['accuracy'], mode='lines', name='accuracy'))

fig.add_trace(go.Scatter(x=[x for x in range(len(history['val_accuracy']))], y=history['val_accuracy'], mode='lines', name='validation accuracy'))

fig.update_layout(title="LSTM AE Accuracy Over Epochs", yaxis=dict(title="Loss"), xaxis=dict(title="Epoch"))

fig.show()

In [None]:
# Check how loss & mse went down
epoch_loss = history['loss']
epoch_val_loss = history['val_loss']
epoch_mae = history['accuracy']
epoch_val_mae = history['val_accuracy']

plt.figure(figsize=(8,5))
plt.plot(range(0,len(epoch_loss)), epoch_loss, 'b-', linewidth=2, label='Train Loss')
plt.plot(range(0,len(epoch_val_loss)), epoch_val_loss, 'r-', linewidth=2, label='Test Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")

#lt.title('Loss')
plt.legend(loc='best')
plt.savefig('Figure_Loss_LSTM_AE_Cyl.jpeg')
plt.show()

In [None]:
# Check how loss & mse went down
epoch_loss = history['loss']
epoch_val_loss = history['val_loss']
epoch_mae = history['accuracy']
epoch_val_mae = history['val_accuracy']

plt.figure(figsize=(8,5))

plt.plot(range(0,len(epoch_mae)), epoch_mae, 'b-', linewidth=2, label='Train Acc')
plt.plot(range(0,len(epoch_val_mae)), epoch_val_mae, 'r-', linewidth=2,label='Test Acc')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
#plt.title('Accuracy')
plt.legend(loc='lower right')

plt.savefig('Figure_Acc_LSTM_AE_Cyl.jpeg')
plt.show()

# Saving The Model

In [None]:
# Save the model and architecture to single file
model.save('CyExGas_CyPmax_Model.h5')
print("Model Saved to a Disk")