In [1]:
!pip install neuralprophet

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.cluster import KMeans
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os

# Mount Google Drive
drive.mount('/content/drive')

# Path of folder
folder_path = '/content/drive/MyDrive/Colab Notebooks/2024_08_21/'

# Get list of all CSV files in the folder
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
dfs = []

# Loop through the CSV files and read each one into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
data = pd.concat(dfs, ignore_index=True)

# Select relevant columns
data_selected = data[['Alarm Name', 'Alarm Location Info', 'First Occurred On', 'Site ID', 'Vendor', 'Domain', 'Device Type']]

# Get unique site IDs
site_ids = data_selected['Site ID'].unique()

# Initialize a dictionary to hold forecasts for each site
forecasts = {}



Collecting neuralprophet
  Downloading neuralprophet-0.9.0-py3-none-any.whl.metadata (9.0 kB)
Collecting captum>=0.6.0 (from neuralprophet)
  Downloading captum-0.7.0-py3-none-any.whl.metadata (26 kB)
Collecting kaleido==0.2.1 (from neuralprophet)
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Collecting pytorch-lightning>=2.0.0 (from neuralprophet)
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>=1.0.0 (from neuralprophet)
  Downloading torchmetrics-1.4.1-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning>=2.0.0->neuralprophet)
  Downloading lightning_utilities-0.11.6-py3-none-any.whl.metadata (5.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2.0.0->neuralprophet)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2.0.0->neuralprophet)
  Usin

In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from neuralprophet import NeuralProphet, set_log_level

# Ensure 'First Occurred On' is in datetime format
data_selected['First Occurred On'] = pd.to_datetime(data_selected['First Occurred On'])

# Select only the 5 most recent site IDs based on the latest occurrence in the dataset
recent_site_ids = data_selected.groupby('Site ID')['First Occurred On'].max().nlargest(5).index

# Iterate over each of the 5 recent site IDs to process and forecast alarms
for site_id in recent_site_ids:
    # Print the current Site ID
    print(f"Processing Site ID: {site_id}")

    # Get alarms for the current site ID
    site_alarms = data_selected[data_selected['Site ID'] == site_id]

    # Encode the Alarm Name column and rename that column as y
    le = LabelEncoder()
    site_alarms['y'] = le.fit_transform(site_alarms['Alarm Name'])
    site_alarms = site_alarms.drop('Alarm Name', axis=1)

    # Rename First Occurred On column as ds
    site_alarms = site_alarms.rename(columns={'First Occurred On': 'ds'})

    # Ensure 'ds' is datetime
    site_alarms['ds'] = pd.to_datetime(site_alarms['ds'])

    # Sort according to ds column
    site_alarms = site_alarms.sort_values(by='ds', ascending=False)

    # Drop rows where y is 'Unknown'
    site_alarms = site_alarms[site_alarms['y'] != 'Unknown']

    # Remove duplicates in ds
    site_alarms = site_alarms.drop_duplicates(subset=['ds'])

    # Keep only ds and y columns
    site_alarms = site_alarms[['ds', 'y']]

    # Disable logging messages unless there is an error
    set_log_level("ERROR")

    # Set the index and resample
    site_alarms = site_alarms.set_index('ds').resample('H').mean().reset_index()

    # Create a NeuralProphet model with default parameters
    m = NeuralProphet(
        learning_rate=0.01,          # Set the learning rate to 0.01
        batch_size=16,
        seasonality_mode='additive',  # Seasonality mode (choose between 'additive' or 'multiplicative')
    )

    # Use static plotly in notebooks
    m.set_plotting_backend("plotly-static")

    # Fit the model on the dataset
    metrics = m.fit(site_alarms)

    # Create a new dataframe reaching 24*7 into the future for our forecast
    df_future = m.make_future_dataframe(site_alarms, n_historic_predictions=True, periods=24*7)

    # Predict the future
    forecast = m.predict(df_future)

    # Round predictions to the nearest integer
    forecast['yhat1'] = forecast['yhat1'].round().astype(int)

    # Extract the date from the datetime
    forecast['date'] = forecast['ds'].dt.date

    # Store the forecast in the dictionary
    forecasts[site_id] = forecast

    # Print the current Site ID before plotting
    print(f"Plotting forecast for Site ID: {site_id}")

    # Visualize the forecast
    m.plot(forecast)



    # Uncomment this block to visualize the forecast for each site if needed
    # import matplotlib.pyplot as plt
    # plt.figure(figsize=(15, 7))  # Create a new figure with adjusted size
    # plt.scatter(forecast['ds'], forecast['yhat1'], color='red', label='yhat forecast')
    # plt.xlabel('Date')
    # plt.ylabel('yhat')
    # plt.title(f'Forecasted Alarms for Site {site_id}')
    # plt.legend()
    # plt.grid(True)
    # plt.xticks(rotation=45, ha='right')
    # plt.tight_layout()
    # plt.show()

# Example: print the forecast alarms around value 13 for one site
# To be adjusted if you want to print for all sites or specific ones

# Choose a site ID to filter and print (example: site_id='AM0053')
example_site_id = 'AM0053'
if example_site_id in forecasts:
    forecast = forecasts[example_site_id]
    filtered_forecast = forecast[(forecast['yhat1'] == 26)]
    last_date = site_alarms['ds'].max()
    filtered_forecast = filtered_forecast[filtered_forecast['ds'] > last_date]

    for index, row in filtered_forecast.iterrows():
        print(f"Date: {row['ds']}, Alarm (yhat1): {row['yhat1']:.2f}")




Processing Site ID: CM0091
Error processing Site ID CM0091: Dataframe has less than n_forecasts + n_lags rows. Forecasting not possible. Please either use a larger dataset, or adjust the model parameters.
Processing Site ID: PU0077


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  site_alarms['y'] = le.fit_transform(site_alarms['Alarm Name'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  site_alarms['y'] = le.fit_transform(site_alarms['Alarm Name'])



Error processing Site ID PU0077: Dataframe has less than n_forecasts + n_lags rows. Forecasting not possible. Please either use a larger dataset, or adjust the model parameters.
Processing Site ID: JA0112


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  site_alarms['y'] = le.fit_transform(site_alarms['Alarm Name'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  site_alarms['y'] = le.fit_transform(site_alarms['Alarm Name'])



Error processing Site ID JA0112: Inputs/targets with missing values detected. Please either adjust imputation parameters, or set 'drop_missing' to True to drop those samples.
Processing Site ID: CM0424


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  site_alarms['y'] = le.fit_transform(site_alarms['Alarm Name'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  site_alarms['y'] = le.fit_transform(site_alarms['Alarm Name'])



Error processing Site ID CM0424: Inputs/targets with missing values detected. Please either adjust imputation parameters, or set 'drop_missing' to True to drop those samples.


In [None]:
# List of specific alarm names to filter
alarm_names = [
    "Board Hardware Fault",
    "Radio Signaling Link Disconnected",
    "Cell Capability Degraded",
    "RF Unit CPRI Interface Error",
    "RF Unit VSWR Threshold Crossed",
    "RF Unit Maintenance Link Failure",
    "BBU CPRI Interface Error",
    "RF Unit External Power Supply Insufficient",
    "RF Unit TX Channel Gain Out of Range",
    "RF Unit Hardware Fault",
    "RF Unit DC Input Power Failure",
    "Monitoring Device Maintenance Link Failure",
    "Inter-Board Service Link Failure",
    "BBU Fan Stalled",
    "RF Unit Optical Module Fault",
    "Power Supply Insufficient for Multiple RF Units",
    "RF Unit AC Input Power Failure",
    "Cell RX Channel Interference Noise Power Unbalanced",
    "MAC Excessive Frame Error Rate",
    "Transmission Optical Interface Error",
    "BSL Fault",
    "GNSS Antenna Fault",
    "RF Unit RX Channel RTWP/RSSI Too Low",
    "RF Out of Service",
    "RF Unit Clock Problem",
    "Inter-System Communication Failure",
    "RF Unit Baseband Running Error",
    "BBU Board Maintenance Link Failure",
    "GNSS Locked Satellites Insufficient",
    "Board Temperature Unacceptable",
    "RF Unit Temperature Unacceptable",
    "RHUB CPRI Interface Error",
    "Inter-Board CANBUS Communication Failure",
    "Monitoring Device Power Supply Problem",
    "RF Unit Software Program Error",
    "RF Unit Input Power Out of Range",
    "RHUB-pRRU CPRI Interface Error",
    "RHUB Unit AC Input Power Failure",
    "NR DU Cell TRP Unavailable",
    "BBU CPRI Optical Module Fault",
    "Board Input Voltage Out of Range",
    "Board Powered Off",
    "Board Unavailable",
    "BBU CPRI Line Rate Negotiation Abnormal",
    "Transmission Optical Module Fault"
]

In [None]:
# Second loop: Decode the forecasted values, remove duplicates, and print the actual unique alarm names
for site_id in recent_site_ids:
    forecast = forecasts[site_id]

    # Filter the forecast to include only dates after 2024-08-21
    forecast_filtered = forecast[forecast['ds'] > pd.to_datetime('2024-08-21')]

    # Get the valid alarm codes that the LabelEncoder knows
    known_alarm_codes = le.transform(le.classes_)

    # Further filter the forecast to only include known alarm codes
    forecast_filtered = forecast_filtered[forecast_filtered['yhat1'].isin(known_alarm_codes)]

    # Decode the filtered 'yhat1' values back to their original alarm names
    forecast_filtered['Predicted Alarm Name'] = le.inverse_transform(forecast_filtered['yhat1'])

    # Remove duplicates based on 'ds' and 'Predicted Alarm Name'
    forecast_filtered = forecast_filtered.drop_duplicates(subset=['ds', 'Predicted Alarm Name'])

    # Print the forecast with actual unique alarm names for the current Site ID
    unique_alarm_names = forecast_filtered['Predicted Alarm Name'].unique()
    print(f"Forecasted Unique Alarm Names for Site ID {site_id} after 2024-08-21:")
    for alarm_name in unique_alarm_names:
        print(alarm_name)

    # Print the full forecast dataframe with duplicates removed for more details
    print(f"Full Forecast for Site ID {site_id} after 2024-08-21 with duplicates removed:")
    print(forecast_filtered[['ds', 'Predicted Alarm Name']])


Forecasted Unique Alarm Names for Site ID CM0091 after 2024-08-21:
SCCP Message Loop
SCCP Subsystem Fault
SCCP status is inconsistent between DCU module and SSU modules
STP Discarded Error Messages by the Self PC Alarm
Security Hardening Is Not Made
Service Not Ready
Slow Disk Response
Storage Device Drive Fault
System Used Resource will overrun the threshold of Connection Admission Control 
Signalling path disconnected
Service Center and Service Communication Failure
TCP Attack
The DFS peer status changed to Down
The DSG remote config is timeout
The Hard Disk Is Isolated
Full Forecast for Site ID CM0091 after 2024-08-21 with duplicates removed:
                      ds                               Predicted Alarm Name
2194 2024-08-21 01:00:00                                  SCCP Message Loop
2195 2024-08-21 02:00:00                                  SCCP Message Loop
2196 2024-08-21 03:00:00                               SCCP Subsystem Fault
2197 2024-08-21 04:00:00  SCCP status is i

In [None]:
# Second loop: Decode the forecasted values, filter by alarm names, and remove duplicates
for site_id in recent_site_ids:
    forecast = forecasts[site_id]

    # Filter the forecast to include only dates after 2024-08-21
    forecast_filtered = forecast[forecast['ds'] > pd.to_datetime('2024-08-21')]

    # Get the valid alarm codes that the LabelEncoder knows
    known_alarm_codes = le.transform(le.classes_)

    # Further filter the forecast to only include known alarm codes
    forecast_filtered = forecast_filtered[forecast_filtered['yhat1'].isin(known_alarm_codes)]

    # Decode the filtered 'yhat1' values back to their original alarm names
    forecast_filtered['Predicted Alarm Name'] = le.inverse_transform(forecast_filtered['yhat1'])

    # Filter the forecast to include only the specified alarm names
    forecast_filtered = forecast_filtered[forecast_filtered['Predicted Alarm Name'].isin(alarm_names)]

    # Remove duplicates based on 'ds' and 'Predicted Alarm Name'
    forecast_filtered = forecast_filtered.drop_duplicates(subset=['ds', 'Predicted Alarm Name'])

    # Print the forecast with actual unique alarm names for the current Site ID
    unique_alarm_names = forecast_filtered['Predicted Alarm Name'].unique()
    print(f"Forecasted Unique Alarm Names for Site ID {site_id} after 2024-08-21:")
    for alarm_name in unique_alarm_names:
        print(alarm_name)

    # Print the full forecast dataframe with duplicates removed for more details
    print(f"Full Forecast for Site ID {site_id} after 2024-08-21 with duplicates removed:")
    print(forecast_filtered[['ds', 'Predicted Alarm Name']])

Forecasted Unique Alarm Names for Site ID CM0091 after 2024-08-21:
Full Forecast for Site ID CM0091 after 2024-08-21 with duplicates removed:
Empty DataFrame
Columns: [ds, Predicted Alarm Name]
Index: []
Forecasted Unique Alarm Names for Site ID VV0000 after 2024-08-21:
RF Unit Hardware Fault
RF Unit Maintenance Link Failure
Full Forecast for Site ID VV0000 after 2024-08-21 with duplicates removed:
                      ds              Predicted Alarm Name
2203 2024-08-21 10:00:00            RF Unit Hardware Fault
2209 2024-08-21 16:00:00  RF Unit Maintenance Link Failure
2210 2024-08-21 17:00:00            RF Unit Hardware Fault
2237 2024-08-22 20:00:00            RF Unit Hardware Fault
2262 2024-08-23 21:00:00  RF Unit Maintenance Link Failure
2284 2024-08-24 19:00:00            RF Unit Hardware Fault
2285 2024-08-24 20:00:00            RF Unit Hardware Fault
Forecasted Unique Alarm Names for Site ID PU0077 after 2024-08-21:
BBU CPRI Interface Error
Full Forecast for Site ID PU0077 a