In [44]:
!pip install flaml



In [45]:
!pip install --upgrade flaml
!pip install --upgrade xgboost




In [52]:
# Import necessary libraries
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


from google.colab import drive
drive.mount('/content/drive')

import os

# Path of folder
folder_path = '/content/drive/MyDrive/Colab Notebooks/new/'

# Get list of all CSV files in the folder
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
dfs = []

# Loop through the CSV files and read each one into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
data = pd.concat(dfs, ignore_index=True)





# Sort by 'First Occurred On' in descending order (latest alarms first)
df = data.sort_values(by='First Occurred On', ascending=False)

# Convert 'First Occurred On' to datetime, handle errors and fill NaT with a default date
df['First Occurred On'] = pd.to_datetime(df['First Occurred On'], errors='coerce')

# Reference time
reference_time = pd.to_datetime('2024-07-25 11:45:00')

# Calculate the relative day index, handle NaT by filling with a large negative number
df['Relative Day Index'] = ((df['First Occurred On'] - reference_time).dt.total_seconds() // 86400).fillna(-9999).astype(int)

# Adjust the index to set the 08:00 to 08:00 of the next day as 0, and previous days as -1, -2, -3, etc.
df['Relative Day Index'] = df['Relative Day Index'].apply(lambda x: x if x < 0 else x)


# Print the updated dataframe
df.head(50000)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,Last Occurred On,Alarm Source,Alarm Severity,Alarm Name,Alarm ID,Alarm Location Info,Site Name,Cleared On,First Occurred On,Clearance Status,Ticket ID,Site ID,Vendor,Domain,Device Type,Relative Day Index
0,2024-07-19 17:41:35,Pandiruppu-AM0037-GBHO,Major,Mains Failure Alarm,65085,"Cabinet No.=0, Subrack No.=0, Slot No.=19, Por...",Pandiruppu,2024-07-19 17:42:30,2024-07-19 17:41:35,Cleared,,AM0037,HUAWEI,Power,,-6
1,2024-07-19 09:02:15,Pandiruppu-AM0037-GBHO,Major,Mains Failure Alarm,65085,"Cabinet No.=0, Subrack No.=0, Slot No.=19, Por...",Pandiruppu,2024-07-19 09:06:07,2024-07-19 09:02:15,Cleared,,AM0037,HUAWEI,Power,,-7
2,2024-07-18 20:28:28,Pandiruppu-AM0037-L,Minor,RF Unit Temperature Unacceptable,26525,"Cabinet No.=0, Subrack No.=61, Slot No.=0, Boa...",Pandiruppu,2024-07-18 21:16:12,2024-07-18 20:28:28,Cleared,,AM0037,HUAWEI,Access,eNodeB,-7
3,2024-07-18 13:55:54,Pandiruppu-AM0037-GBHO,Major,Temperature Alarm,65091,"Cabinet No.=0, Subrack No.=0, Slot No.=19, Por...",Pandiruppu,2024-07-18 18:45:36,2024-07-18 13:55:54,Cleared,,AM0037,HUAWEI,Power,,-7
4,2024-07-17 17:54:45,Pandiruppu-AM0037-GBHO,Major,Mains Failure Alarm,65085,"Cabinet No.=0, Subrack No.=0, Slot No.=19, Por...",Pandiruppu,2024-07-17 18:21:11,2024-07-17 17:54:45,Cleared,,AM0037,HUAWEI,Power,,-8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,2024-04-23 09:26:32,Pandiruppu-AM0037-G,Critical,Cell Out of Service,21801,"Site Index=44, Cell Index=178, Alarm Cause=Oth...",Pandiruppu,2024-04-23 17:17:03,2024-04-23 09:26:32,Cleared,TT-20240423-00000127,AM0037,HUAWEI,Access,eGBTS,-94
549,2024-04-23 09:26:32,Pandiruppu-AM0037-G,Critical,Cell Out of Service,21801,"Site Index=44, Cell Index=180, Alarm Cause=Oth...",Pandiruppu,2024-04-23 17:14:08,2024-04-23 09:26:32,Cleared,TT-20240423-00000127,AM0037,HUAWEI,Access,eGBTS,-94
547,2024-04-23 09:26:32,Pandiruppu-AM0037-G,Critical,OML Fault,21807,"Site Index=44, BSC Subrack No.=0, BSC Slot No....",Pandiruppu,2024-04-23 13:17:15,2024-04-23 09:26:32,Cleared,,AM0037,HUAWEI,Access,eGBTS,-94
548,2024-04-23 09:26:32,Pandiruppu-AM0037-G,Major,ESL Link Fault,21805,"Site Index=44, Site Name=Pandiruppu-AM0037-G",Pandiruppu,2024-04-23 13:17:15,2024-04-23 09:26:32,Cleared,,AM0037,HUAWEI,Access,eGBTS,-94


In [47]:
from sklearn.preprocessing import MinMaxScaler,LabelEncoder

# Filter the data for HUAWEI and Access domain
filtered_data = df[(df['Vendor'] == 'HUAWEI')]
#(data['Domain'] == 'Access')


# Retain only the specified columns
columns_to_keep = ['Site ID', 'Alarm Name', 'Vendor', 'Domain', 'Device Type', 'Relative Day Index']
df_filtered = filtered_data[columns_to_keep]


df_filtered.head(50000)

Unnamed: 0,Site ID,Alarm Name,Vendor,Domain,Device Type,Relative Day Index
0,AM0037,Mains Failure Alarm,HUAWEI,Power,,-6
1,AM0037,Mains Failure Alarm,HUAWEI,Power,,-7
2,AM0037,RF Unit Temperature Unacceptable,HUAWEI,Access,eNodeB,-7
3,AM0037,Temperature Alarm,HUAWEI,Power,,-7
4,AM0037,Mains Failure Alarm,HUAWEI,Power,,-8
...,...,...,...,...,...,...
550,AM0037,Cell Out of Service,HUAWEI,Access,eGBTS,-94
549,AM0037,Cell Out of Service,HUAWEI,Access,eGBTS,-94
547,AM0037,OML Fault,HUAWEI,Access,eGBTS,-94
548,AM0037,ESL Link Fault,HUAWEI,Access,eGBTS,-94


In [48]:
# Filter data for specific Site ID (e.g., AM0037)
site_id = 'AM0037'
site_data = df_filtered[df_filtered['Site ID'] == site_id]

# Display the filtered data
site_data.head(50000)





Unnamed: 0,Site ID,Alarm Name,Vendor,Domain,Device Type,Relative Day Index
0,AM0037,Mains Failure Alarm,HUAWEI,Power,,-6
1,AM0037,Mains Failure Alarm,HUAWEI,Power,,-7
2,AM0037,RF Unit Temperature Unacceptable,HUAWEI,Access,eNodeB,-7
3,AM0037,Temperature Alarm,HUAWEI,Power,,-7
4,AM0037,Mains Failure Alarm,HUAWEI,Power,,-8
...,...,...,...,...,...,...
550,AM0037,Cell Out of Service,HUAWEI,Access,eGBTS,-94
549,AM0037,Cell Out of Service,HUAWEI,Access,eGBTS,-94
547,AM0037,OML Fault,HUAWEI,Access,eGBTS,-94
548,AM0037,ESL Link Fault,HUAWEI,Access,eGBTS,-94


In [49]:
from flaml import AutoML

# Handle missing values by filling with a default value (e.g., 'Unknown')
site_data.fillna('Unknown', inplace=True)

# Encode categorical variables using LabelEncoder
label_encoders = {}
for column in ['Alarm Name', 'Vendor', 'Domain', 'Device Type']:
    label_encoders[column] = LabelEncoder()
    site_data[column] = label_encoders[column].fit_transform(site_data[column])

# Create the target variable: whether the "RF Unit TX Channel Gain Out of Range" occurs in the next 7 days
target_alarm = label_encoders['Alarm Name'].transform(['RF Unit TX Channel Gain Out of Range'])[0]

# Shift the target variable to indicate future occurrences
site_data['Next_7_Days_Alarm'] = site_data['Alarm Name'].shift(-7).apply(lambda x: 1 if x == target_alarm else 0)

# Drop rows with NaN values after creating the target variable
site_data.dropna(inplace=True)

# Scale the features using MinMaxScaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(site_data[['Alarm Name', 'Relative Day Index']])

# Create a DataFrame with the scaled features and the target variable
features = pd.DataFrame(scaled_data, columns=['Scaled_Alarm_Name', 'Scaled_Relative_Day_Index'])
features['Next_7_Days_Alarm'] = site_data['Next_7_Days_Alarm'].values

In [50]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Assuming your data is already scaled and prepared in 'features'
X = features[['Scaled_Alarm_Name', 'Scaled_Relative_Day_Index']]
y = features['Next_7_Days_Alarm']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Predict for the next 7 days based on the latest data
latest_data = features.iloc[-1, :-1].values.reshape(1, -1)  # Take the last row excluding the target
next_7_days_prediction = clf.predict(latest_data)
print(f"Will 'RF Unit TX Channel Gain Out of Range' occur in the next 7 days? {'Yes' if next_7_days_prediction[0] == 1 else 'No'}")


Accuracy: 1.00
Will 'RF Unit TX Channel Gain Out of Range' occur in the next 7 days? No




In [51]:
import joblib

# Assuming 'clf' is your trained model
joblib.dump(clf, '/content/drive/MyDrive/alarm_prediction_model.pkl')


['/content/drive/MyDrive/alarm_prediction_model.pkl']