In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Path of folder
folder_path = '/content/drive/MyDrive/Colab Notebooks/new/'

# Get list of all CSV files in the folder
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
dfs = []

# Loop through the CSV files and read each one into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
data = pd.concat(dfs, ignore_index=True)

# Sort by 'First Occurred On' in descending order (latest alarms first)
df = data.sort_values(by='First Occurred On', ascending=False)

# Convert 'First Occurred On' to datetime, handle errors and fill NaT with a default date
df['First Occurred On'] = pd.to_datetime(df['First Occurred On'], errors='coerce')

# Reference time
reference_time = pd.to_datetime('2024-07-20 11:45:00')

# Calculate the relative day index, handle NaT by filling with a large negative number
df['Relative Day Index'] = ((df['First Occurred On'] - reference_time).dt.total_seconds() // 86400).fillna(-9999).astype(int)

# Adjust the index to set the 08:00 to 08:00 of the next day as 0, and previous days as -1, -2, -3, etc.
df['Relative Day Index'] = df['Relative Day Index'].apply(lambda x: x if x < 0 else x)

# Filter the data for HUAWEI vendor
filtered_data = df[(df['Vendor'] == 'HUAWEI')]

# Retain only the specified columns
columns_to_keep = ['Site ID', 'Alarm Name', 'Vendor', 'Domain', 'Device Type', 'Relative Day Index']
df_filtered = filtered_data[columns_to_keep]

# Encode categorical columns except 'Relative Day Index'
categorical_columns = df_filtered.select_dtypes(include=['object']).columns
label_encoders = {}
for column in categorical_columns:
    le = LabelEncoder()
    df_filtered[column] = le.fit_transform(df_filtered[column].astype(str))
    label_encoders[column] = le

# Prepare the dataset for LSTM
def create_dataset(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

time_steps = 10
features = df_filtered[['Alarm Name', 'Site ID', 'Vendor', 'Domain', 'Device Type', 'Relative Day Index']].values
X, y = create_dataset(features, time_steps)

# Reshape input to be [samples, time steps, features]
X = X.reshape(X.shape[0], time_steps, X.shape[2])

# Define the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_steps, X.shape[2])))
model.add(LSTM(50))
model.add(Dense(X.shape[2]))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X, y, epochs=10, batch_size=2048, verbose=1)

# Predict for the next 7 days (each day has 24 hours) with 2 unique predictions per day
predictions = []
current_batch = features[-time_steps:].reshape((1, time_steps, X.shape[2]))

for day in range(7):  # For 7 days
    daily_predictions = set()  # To ensure unique alarms for the day
    while len(daily_predictions) < 10:  # Generate 2 unique predictions
        pred = model.predict(current_batch)[0]  # Predict
        daily_predictions.add(tuple(pred))  # Add the prediction as a tuple

        # Update current_batch for next prediction
        current_batch = np.append(current_batch[:, 1:, :], [[pred]], axis=1)

    predictions.extend(list(daily_predictions))  # Add unique predictions for the day

# Inverse transform the predictions
predictions = scaler.inverse_transform(predictions)

# Create a DataFrame for predictions
relative_day_indices = np.repeat(np.arange(1, 8), 10)  # 1 to 7 days, with 2 predictions each
pred_df = pd.DataFrame(predictions, columns=['Alarm Name', 'Site ID', 'Vendor', 'Domain', 'Device Type', 'Relative Day Index'])
pred_df['Relative Day Index'] = relative_day_indices

# Convert back to original categorical values using label_encoders
for column in label_encoders.keys():
    pred_df[column] = pred_df[column].round().astype(int)

    # Filter out values not present in the fitted label encoder classes
    valid_indices = pred_df[column].isin(range(len(label_encoders[column].classes_)))
    pred_df.loc[valid_indices, column] = label_encoders[column].inverse_transform(pred_df.loc[valid_indices, column])

# Display sample output
pred_df.head(70)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered[column] = le.fit_transform(df_filtered[column].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered[column] = le.fit_transform(df_filtered[column].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered[column] = le.fit_transform(df_filtered[column].as

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,Alarm Name,Site ID,Vendor,Domain,Device Type,Relative Day Index
0,OML Fault,AM0037,HUAWEI,Access,eGBTS,1
1,OML Fault,AM0037,HUAWEI,Access,eGBTS,1
2,Low Fuel Alarm,AM0037,HUAWEI,Access,eGBTS,1
3,No License Running in System,AM0037,HUAWEI,Access,eGBTS,1
4,Generator On Load Alarm,AM0037,HUAWEI,Access,eGBTS,1
...,...,...,...,...,...,...
65,CSL Fault,AM0037,HUAWEI,Access,eGBTS,7
66,CSL Fault,AM0037,HUAWEI,Access,eGBTS,7
67,CSL Fault,AM0037,HUAWEI,Access,eGBTS,7
68,CSL Fault,AM0037,HUAWEI,Access,eGBTS,7
