In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.cluster import KMeans
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os

# Mount Google Drive
drive.mount('/content/drive')

# Path of folder
folder_path = '/content/drive/MyDrive/Colab Notebooks/2024_08_09/'

# Get list of all CSV files in the folder
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
dfs = []

# Loop through the CSV files and read each one into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
data = pd.concat(dfs, ignore_index=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  df = pd.read_csv(file)


In [2]:
data.head()

Unnamed: 0,Last Occurred On,Alarm Source,Alarm Severity,Alarm Name,Alarm ID,Alarm Location Info,Site Name,Cleared On,First Occurred On,Clearance Status,Ticket ID,Site ID,Vendor,Domain,Device Type,Prediction Probability
0,2024-08-09 15:25:42,LOGGALOYA-MWI-CETR-VLL,Critical,Link Down,3,source=LOGGALOYA-MWI-CETR-VLL location=If Inde...,Loggaloya,2024-08-09 15:25:45,2024-08-09 15:25:42,Cleared,,BD0070,HUAWEI,IPCore,Router,
1,2024-08-09 15:25:28,LOGGALOYA-MWI-CETR-VLL,Critical,Link Down,3,source=LOGGALOYA-MWI-CETR-VLL location=If Inde...,Loggaloya,2024-08-09 15:25:30,2024-08-09 15:25:28,Cleared,,BD0070,HUAWEI,IPCore,Router,
2,2024-08-09 15:25:13,LOGGALOYA-MWI-CETR-VLL,Critical,Link Down,3,source=LOGGALOYA-MWI-CETR-VLL location=If Inde...,Loggaloya,2024-08-09 15:25:15,2024-08-09 15:25:13,Cleared,,BD0070,HUAWEI,IPCore,Router,
3,2024-08-09 15:24:57,LOGGALOYA-MWI-CETR-VLL,Critical,Link Down,3,source=LOGGALOYA-MWI-CETR-VLL location=If Inde...,Loggaloya,2024-08-09 15:25:00,2024-08-09 15:24:57,Cleared,,BD0070,HUAWEI,IPCore,Router,
4,2024-08-09 15:24:42,LOGGALOYA-MWI-CETR-VLL,Critical,Link Down,3,source=LOGGALOYA-MWI-CETR-VLL location=If Inde...,Loggaloya,2024-08-09 15:24:45,2024-08-09 15:24:42,Cleared,,BD0070,HUAWEI,IPCore,Router,


In [3]:
# Step 2: Keep only the necessary columns
columns_to_keep = ['Alarm Name', 'Site ID', 'Vendor', 'Domain', 'Device Type', 'First Occurred On']
data = data[columns_to_keep]


In [4]:
# Step 3: Convert 'First Occured On' to datetime format
data['First Occurred On'] = pd.to_datetime(data['First Occurred On'])

# Step 3.1: Calculate 'Relative Day Index' by subtracting the earliest date in the dataset
min_date = data['First Occurred On'].min()
data['Relative Day Index'] = (data['First Occurred On'] - min_date).dt.days


In [5]:
# Step 4: Handle missing values (if any)
data.dropna(inplace=True)  # Dropping rows with missing values, you can also choose to fill them

# Step 4.1: Encode categorical variables
label_encoders = {}
categorical_columns = ['Alarm Name', 'Site ID', 'Vendor', 'Domain', 'Device Type']

for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le  # Store label encoders for later use

# Step 4.2: Scale numerical features
scaler = MinMaxScaler()
data[['Relative Day Index']] = scaler.fit_transform(data[['Relative Day Index']])


In [6]:
data.head()

Unnamed: 0,Alarm Name,Site ID,Vendor,Domain,Device Type,First Occurred On,Relative Day Index
0,390,594,0,2,16,2024-08-09 15:25:42,1.0
1,390,594,0,2,16,2024-08-09 15:25:28,1.0
2,390,594,0,2,16,2024-08-09 15:25:13,1.0
3,390,594,0,2,16,2024-08-09 15:24:57,1.0
4,390,594,0,2,16,2024-08-09 15:24:42,1.0


In [None]:
# Step 5: Explore association rule mining
from mlxtend.frequent_patterns import apriori, association_rules

# Step 5.1: Create a pivot table with binary encoding for association rule mining
pivot_table = data.pivot_table(index='Relative Day Index', columns='Alarm Name', aggfunc='size', fill_value=0)
binary_encoded_df = pivot_table.applymap(lambda x: 1 if x > 0 else 0)

# Step 5.2: Apply the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(binary_encoded_df, min_support=0.5, use_colnames=True)

# Step 5.3: Generate association rules from the frequent itemsets
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)


  binary_encoded_df = pivot_table.applymap(lambda x: 1 if x > 0 else 0)


In [None]:
# Step 6: Unsupervised learning using KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)  # Change n_clusters based on your data
data['Cluster'] = kmeans.fit_predict(data[['Relative Day Index'] + categorical_columns])

# Step 6.1: Visualize the clusters (Optional)
sns.scatterplot(data=data, x='Relative Day Index', y='Alarm Name', hue='Cluster')
plt.show()


In [None]:
# Step 7: Prepare data for Random Forest Classifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Step 7.1: Define the target and features
target = 'Alarm Name'  # Assuming you want to predict the 'Alarm Name' or modify as per your need
features = ['Site ID', 'Vendor', 'Domain', 'Device Type', 'Relative Day Index']

X = data[features]
y = data[target]

# Step 7.2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7.3: Train the Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Step 7.4: Make predictions and evaluate the model
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))
