In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.cluster import KMeans
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Path of folder
folder_path = '/content/drive/MyDrive/Colab Notebooks/2024_08_21/'

# Get list of all CSV files in the folder
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
dfs = []

# Loop through the CSV files and read each one into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
alarm_data = pd.concat(dfs, ignore_index=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
alarm_data.head()

Unnamed: 0,Last Occurred On,Alarm Source,Alarm Severity,Alarm Name,Alarm ID,Alarm Location Info,Site Name,Cleared On,First Occurred On,Clearance Status,Ticket ID,Site ID,Vendor,Domain,Device Type
0,2024-06-23 02:50:47,AKBAR_UGW,Major,IPPM session fault,2620,"Local IP address=172.27.24.249, VPN=VPN_S1, Pe...",Dummy,2024-06-23 02:52:32,2024-06-23 02:50:47,Cleared,,VV0000,HUAWEI,Core,Unknown
1,2024-06-23 02:50:47,U2020ACC,Major,Parallel Alarm Exceeds the Limit,832,"RuleID=1980, Source Alarm Information=""Alarm N...",,2024-06-23 03:09:40,2024-06-23 02:50:47,Cleared,,,HUAWEI,Others,
2,2024-06-23 02:50:46,Poruwadanda-KL0127-L,Major,User Plane Fault,25954,Service Type=X2,Poruwadanda,2024-06-23 03:37:01,2024-06-23 02:50:46,Cleared,,KL0127,HUAWEI,Access,eNodeB
3,2024-06-23 02:50:45,Kananvila-KL0116-L,Critical,eNodeB S1 Control Plane Transmission Interruption,29213,"eNodeB Function Name=Kananvila-KL0116-L, CN Op...",Kananvila,2024-06-23 03:26:12,2024-06-23 02:50:45,Cleared,,KL0116,HUAWEI,Access,eNodeB
4,2024-06-23 02:50:45,PILI-AMF-01,Major,S1ap Link Down,80589,"Service Instance=LINK_VNFC_999, Mobile Country...",Piliyandala,2024-06-23 03:36:39,2024-06-23 02:50:45,Cleared,,CM0091,HUAWEI,Core,UNC


In [None]:
# Filter relevant columns
columns_to_use = ['First Occurred On', 'Alarm Name', 'Site ID', 'Domain']
alarm_data_filtered = alarm_data[columns_to_use].copy()

# Convert date to datetime
alarm_data_filtered['First Occurred On'] = pd.to_datetime(alarm_data_filtered['First Occurred On'])

# Sort by site and date
alarm_data_filtered = alarm_data_filtered.sort_values(by=['Site ID', 'First Occurred On'])

# Encode categorical features
label_encoders = {}
for col in ['Alarm Name', 'Site ID', 'Domain']:
    le = LabelEncoder()
    alarm_data_filtered[col] = le.fit_transform(alarm_data_filtered[col])
    label_encoders[col] = le

# Check if target alarm exists and get its encoded value
target_alarm = 'RF Unit TX Channel Gain Out of Range'

# Ensure that the target alarm exists in the label encoder's classes
if target_alarm in label_encoders['Alarm Name'].classes_:
    # Get the encoded value for the target alarm
    target_alarm_encoded = label_encoders['Alarm Name'].transform([target_alarm])[0]

    # Create the target column (whether "RF Unit TX Channel Gain Out of Range" occurs within the next 7 days)
    alarm_data_filtered['Target'] = (alarm_data_filtered['Alarm Name'] == target_alarm_encoded).astype(int)
else:
    print(f"Error: '{target_alarm}' not found in the Alarm Name column.")


In [None]:
# Create rolling window sequences
def create_sequences(data, seq_length):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length].drop(columns='Target').values
        target = data.iloc[i+seq_length]['Target']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define sequence length (e.g., 7 days)
seq_length = 7  # Use last 7 occurrences to predict the next alarm occurrence
sequences, targets = create_sequences(alarm_data_filtered, seq_length)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(sequences, targets, test_size=0.2, random_state=42)

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Print classification report for performance metrics
print(classification_report(y_test, y_pred))

# Save the trained model for future predictions
model.save('alarm_prediction_model.h5')


## #using a lstm for the use of the predictive maintenance the main idea