In [26]:
import pandas as pd
import numpy as np
import os
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

# Load and preprocess data
def load_and_preprocess_data(folder_path):
    csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]
    dfs = [pd.read_csv(file) for file in csv_files]
    data = pd.concat(dfs, ignore_index=True)

    # Convert 'First Occurred On' to datetime and filter data before '2024-07-31'
    data['First Occurred On'] = pd.to_datetime(data['First Occurred On'], errors='coerce')
    data = data[data['First Occurred On'] < '2024-07-31']

    # Calculate the relative day index
    reference_time = pd.to_datetime('2024-07-31 08:00:00')
    data['Relative Day Index'] = ((data['First Occurred On'] - reference_time).dt.total_seconds() // 86400).fillna(-9999).astype(int)
    data['Relative Day Index'] = data['Relative Day Index'].apply(lambda x: x if x < 0 else x)

    return data

# Encode and scale the data
def preprocess_data(data):
    # Encode categorical variables
    le_site = LabelEncoder()
    le_device = LabelEncoder()
    le_alarm = LabelEncoder()
    le_domain = LabelEncoder()

    data['Site ID Encoded'] = le_site.fit_transform(data['Site ID'])
    data['Device Type Encoded'] = le_device.fit_transform(data['Device Type'])
    data['Alarm Name Encoded'] = le_alarm.fit_transform(data['Alarm Name'])
    data['Domain Encoded'] = le_domain.fit_transform(data['Domain'])

    # Scale the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data[['Site ID Encoded', 'Device Type Encoded', 'Alarm Name Encoded', 'Relative Day Index', 'Domain Encoded']])

    return scaled_data, le_site, le_device, le_alarm, le_domain, scaler

# Prepare the dataset for LSTM
def create_lstm_dataset(data, time_step=7):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), :-1])  # Features up to the last column
        y.append(data[i + time_step, -1])  # Target is the next alarm occurrence
    return np.array(X), np.array(y)

# Load data
folder_path = '/content/drive/MyDrive/Colab Notebooks/2024_08_09/'
data = load_and_preprocess_data(folder_path)


In [28]:

# Filter relevant alarms
alarms_of_interest = ['Mains Failure Alarm', 'Battery Deep Discharge Alarm',
                      'NE Is Disconnected', 'Heartbeat Failure',
                      'Cell Out of Service', 'Cell Unavailable']
data_filtered = data[data['Alarm Name'].isin(alarms_of_interest)]

# Preprocess the data
scaled_data, le_site, le_device, le_alarm, le_domain, scaler = preprocess_data(data_filtered)

# Create LSTM dataset
time_step = 7
X, y = create_lstm_dataset(scaled_data, time_step)
X = X.reshape((X.shape[0], time_step, X.shape[2]))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and train the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, X.shape[2])))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))  # Predict the encoded Alarm Name

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=512, epochs=5, verbose=1)

# Save the trained model
model_save_path = '/content/drive/MyDrive/Colab Notebooks/2024_08_09/lstm_alarm_model.keras'
model.save(model_save_path)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Site ID Encoded'] = le_site.fit_transform(data['Site ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Device Type Encoded'] = le_device.fit_transform(data['Device Type'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Alarm Name Encoded'] = le_alarm.fit_transform(data['Alarm N

Epoch 1/5


  super().__init__(**kwargs)


[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 42ms/step - loss: 0.2313
Epoch 2/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 56ms/step - loss: 0.1755
Epoch 3/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 45ms/step - loss: 0.1740
Epoch 4/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 45ms/step - loss: 0.1733
Epoch 5/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 52ms/step - loss: 0.1729


In [None]:
def predict_future_alarms(model, data, time_step=7, days=7):
    scaled_data, _, _, _, _, scaler = preprocess_data(data)

    if len(scaled_data) == 0:
        raise ValueError("No data available for prediction. Check your dataset.")

    X = create_lstm_dataset(scaled_data, time_step)[0]
    if len(X) == 0:
        raise ValueError("No sequences available for prediction. Check your data and time step.")

    X = X.reshape((X.shape[0], time_step, X.shape[2]))

    predictions = []
    for site_id in data['Site ID'].unique():
        site_data = data[data['Site ID'] == site_id]
        if site_data.empty:
            continue

        site_scaled_data, _, _, _, _, _ = preprocess_data(site_data)
        if len(site_scaled_data) == 0:
            continue

        site_X = create_lstm_dataset(site_scaled_data, time_step)[0]
        if len(site_X) == 0:
            continue

        site_X = site_X.reshape((site_X.shape[0], time_step, site_X.shape[2]))
        site_predictions = []

        for i in range(len(site_X)):
            prediction = model.predict(site_X[i:i+1])
            site_predictions.append(prediction)

        site_predictions = np.concatenate(site_predictions)
        mean_prediction = np.mean(site_predictions)
        predictions.append((site_id, mean_prediction))

    if len(predictions) == 0:
        raise ValueError("No predictions were made. Check your data and model.")

    return predictions

# Predict future alarms
try:
    site_predictions = predict_future_alarms(model, data)

    # Find the site with the highest probability of the target alarm
    most_probable_site = max(site_predictions, key=lambda x: x[1])
    print(f"Site with highest probability of target alarm: Site ID {most_probable_site[0]} with probability {most_probable_site[1]}")
except ValueError as e:
    print(f"Error: {e}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Site ID Encoded'] = le_site.fit_transform(data['Site ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Device Type Encoded'] = le_device.fit_transform(data['Device Type'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Alarm Name Encoded'] = le_alarm.fit_transform(data['Alarm N

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Site ID Encoded'] = le_site.fit_transform(data['Site ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Device Type Encoded'] = le_device.fit_transform(data['Device Type'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Alarm Name Encoded'] = le_alarm.fit_transform(data['Alarm N

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Site ID Encoded'] = le_site.fit_transform(data['Site ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Device Type Encoded'] = le_device.fit_transform(data['Device Type'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Alarm Name Encoded'] = le_alarm.fit_transform(data['Alarm N

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Site ID Encoded'] = le_site.fit_transform(data['Site ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Device Type Encoded'] = le_device.fit_transform(data['Device Type'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Alarm Name Encoded'] = le_alarm.fit_transform(data['Alarm N

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Site ID Encoded'] = le_site.fit_transform(data['Site ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Device Type Encoded'] = le_device.fit_transform(data['Device Type'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Alarm Name Encoded'] = le_alarm.fit_transform(data['Alarm N

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step

In [None]:

# Predict alarms for the next 7 days
def predict_next_days(model, X_test, le_alarm, days=7):
    predictions = []
    X_input = X_test[-1:]  # Start with the last input sequence

    for day in range(1, days + 1):
        day_prediction = model.predict(X_input)
        day_prediction = np.round(day_prediction).astype(int)
        decoded_prediction = le_alarm.inverse_transform(day_prediction.flatten())

        predictions.append((X_input[0, -1, -1] + day, decoded_prediction[0]))  # Adjust Relative Day Index and append

        # Update X_input with the new prediction for the next iteration
        new_input = np.hstack((X_input[:, 1:, :], np.expand_dims(day_prediction, axis=2)))
        X_input = new_input

    return predictions

# Prepare X_test for prediction
X_test = create_lstm_dataset(preprocess_data(data)[0], time_step)[0]
X_test = X_test.reshape((X_test.shape[0], time_step, X_test.shape[2]))

# Predict alarms for the next 7 days
upcoming_predictions = predict_next_days(model, X_test.copy(), le_alarm)

# Print the predictions
for day_index, alarm in upcoming_predictions:
    print(f"Day Index: {day_index}, Alarm: {alarm}")

    # Add back Site ID and Device Type decoding for a more complete output
    print(f"Site ID: {le_site.inverse_transform([X_test[0, 0, 0]])[0]}, Device Type: {le_device.inverse_transform([X_test[0, 0, 1]])[0]}")