In [29]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Mount Google Drive
drive.mount('/content/drive')

# Path of folder
folder_path = '/content/drive/MyDrive/Colab Notebooks/2024_08_05/'

# Get list of all CSV files in the folder
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
dfs = []

# Loop through the CSV files and read each one into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
data = pd.concat(dfs, ignore_index=True)

# Sort by 'First Occurred On' in descending order (latest alarms first)
data = data.sort_values(by='First Occurred On', ascending=False)

# Convert 'First Occurred On' to datetime
data['First Occurred On'] = pd.to_datetime(data['First Occurred On'], errors='coerce')

# Reference time
reference_time = pd.to_datetime('2024-07-31 08:00:00')

# Calculate the relative day index
data['Relative Day Index'] = ((data['First Occurred On'] - reference_time).dt.total_seconds() // 86400).fillna(-9999).astype(int)

# Adjust the index to set the 08:00 to 08:00 of the next day as 0, and previous days as -1, -2, -3, etc.
data['Relative Day Index'] = data['Relative Day Index'].apply(lambda x: x if x < 0 else x)

# Filter data to include only rows before the 0 Relative Day Index
data_before_0 = data[data['Relative Day Index'] < 0]

# Keep only relevant columns
columns_to_keep = ['Site ID', 'Alarm Name', 'Device Type', 'Relative Day Index']
data_before_0 = data_before_0[columns_to_keep]

# Filter the dataset to focus on specific alarms of interest
alarms_of_interest = ['Mains Failure Alarm', 'Battery Deep Discharge Alarm',
                      'NE Is Disconnected', 'Heartbeat Failure',
                      'Cell Out of Service', 'Cell Unavailable']

filtered_data = data_before_0[data_before_0['Alarm Name'].isin(alarms_of_interest)]

# Encode categorical variables
le_site = LabelEncoder()
le_device = LabelEncoder()
le_alarm = LabelEncoder()

filtered_data['Site ID Encoded'] = le_site.fit_transform(filtered_data['Site ID'])
filtered_data['Device Type Encoded'] = le_device.fit_transform(filtered_data['Device Type'])
filtered_data['Alarm Name Encoded'] = le_alarm.fit_transform(filtered_data['Alarm Name'])

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(filtered_data[['Site ID Encoded', 'Device Type Encoded', 'Alarm Name Encoded', 'Relative Day Index']])

# Prepare the data for LSTM (reshaping to 3D for LSTM)
def create_lstm_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), :-1])  # Features up to the last column
        y.append(data[i + time_step, -1])  # Target is the next alarm name
    return np.array(X), np.array(y)

time_step = 7  # Number of past days to consider for prediction
X, y = create_lstm_dataset(scaled_data, time_step)

# Reshape X to be [samples, time steps, features]
X = X.reshape((X.shape[0], time_step, X.shape[2]))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, X.shape[2])))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))  # Predict the encoded Alarm Name

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, batch_size=64, epochs=10, verbose=1)

# Predict on the test set
y_pred = model.predict(X_test)
y_pred = np.round(y_pred).astype(int)  # Round and convert to integer

# Inverse transform to get the original Alarm Names
y_pred_decoded = le_alarm.inverse_transform(y_pred.flatten())
y_test_decoded = le_alarm.inverse_transform(y_test.flatten())

# Evaluate the model
print("Predicted Alarm Names:\n", y_pred_decoded[:10])
print("True Alarm Names:\n", y_test_decoded[:10])

# Predict alarms for the next 7 days using the LSTM model
def predict_upcoming_alarms_lstm(model, X_test, le_alarm, days=7):
    predictions = []
    X_input = X_test[-1:]  # Start with the last input sequence

    for day in range(1, days + 1):
        day_prediction = model.predict(X_input)
        day_prediction = np.round(day_prediction).astype(int)
        decoded_prediction = le_alarm.inverse_transform(day_prediction.flatten())

        predictions.append((X_input[0, -1, -1] + day, decoded_prediction[0]))  # Adjust Relative Day Index and append

        # Update X_input with the new prediction for the next iteration
        new_input = np.hstack((X_input[:, 1:, :], np.expand_dims(day_prediction, axis=2)))
        X_input = new_input

    return predictions

# Use the trained model to predict alarms for the next 7 days
upcoming_predictions = predict_upcoming_alarms_lstm(model, X_test.copy(), le_alarm)

# Print predictions
for day_index, alarm in upcoming_predictions:
    print(f"Day Index: {day_index}, Alarm: {alarm}")

    # You can add back the Site ID and Device Type decoding for a more complete output
    print(f"Site ID: {le_site.inverse_transform([X_test[0, 0, 0]])[0]}, Device Type: {le_device.inverse_transform([X_test[0, 0, 1]])[0]}")



  and should_run_async(code)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  df = pd.read_csv(file)
  df = pd.read_csv(file)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['Site ID Encoded'] = le_site.fit_transform(filtered_data['Site ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['Device Type Encoded'] = le_device.fit_transform(filtered_data['Device Type'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

Epoch 1/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 12ms/step - loss: 0.0802
Epoch 2/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 12ms/step - loss: 0.0764
Epoch 3/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 12ms/step - loss: 0.0730
Epoch 4/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 12ms/step - loss: 0.0701
Epoch 5/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 12ms/step - loss: 0.0691
Epoch 6/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 12ms/step - loss: 0.0685
Epoch 7/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 12ms/step - loss: 0.0679
Epoch 8/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 12ms/step - loss: 0.0674
Epoch 9/10
[1m11595/11595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 12ms/step - loss: 0.0669
Epoch 10/10
[1m11595/11595

ValueError: y contains previously unseen labels: [0.00568182 0.01136364 0.01704545 0.02272727 0.02840909 0.03409091
 0.03977273 0.04545455 0.05113636 0.05681818 0.0625     0.06818182
 0.07386364 0.07954545 0.08522727 0.09090909 0.09659091 0.10227273
 0.10795455 0.11363636 0.11931818 0.125      0.13068182 0.13636364
 0.14204545 0.14772727 0.15340909 0.15909091 0.16477273 0.17045455
 0.17613636 0.18181818 0.1875     0.19318182 0.19886364 0.20454545
 0.21022727 0.21590909 0.22159091 0.22727273 0.23295455 0.23863636
 0.24431818 0.25       0.25568182 0.26136364 0.26704545 0.27272727
 0.27840909 0.28409091 0.28977273 0.29545455 0.30113636 0.30681818
 0.3125     0.31818182 0.32386364 0.32954545 0.33522727 0.34090909
 0.34659091 0.35227273 0.35795455 0.36363636 0.36931818 0.375
 0.38068182 0.38636364 0.39204545 0.39772727 0.40340909 0.40909091
 0.41477273 0.42045455 0.42613636 0.43181818 0.4375     0.44318182
 0.44886364 0.45454545 0.46022727 0.46590909 0.47159091 0.47727273
 0.48295455 0.48863636 0.49431818 0.5        0.50568182 0.51136364
 0.51704545 0.52272727 0.52840909 0.53409091 0.53977273 0.54545455
 0.55113636 0.55681818 0.5625     0.56818182 0.57386364 0.57954545
 0.58522727 0.59090909 0.59659091 0.60227273 0.60795455 0.61363636
 0.61931818 0.625      0.63068182 0.63636364 0.64204545 0.64772727
 0.65340909 0.65909091 0.66477273 0.67045455 0.67613636 0.68181818
 0.6875     0.69318182 0.69886364 0.70454545 0.71022727 0.71590909
 0.72159091 0.72727273 0.73295455 0.73863636 0.74431818 0.75
 0.75568182 0.76136364 0.76704545 0.77272727 0.77840909 0.78409091
 0.78977273 0.79545455 0.80113636 0.80681818 0.8125     0.81818182
 0.82386364 0.82954545 0.83522727 0.84090909 0.84659091 0.85227273
 0.85795455 0.86363636 0.86931818 0.875      0.88068182 0.88636364
 0.89204545 0.89772727 0.90340909 0.90909091 0.91477273 0.92045455
 0.92613636 0.93181818 0.9375     0.94318182 0.94886364 0.95454545
 0.96022727 0.96590909 0.97159091 0.97727273 0.98295455 0.98863636
 0.99431818 1.        ]