In [28]:
!pip install neuralprophet



In [29]:
from neuralprophet import NeuralProphet

In [30]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.cluster import KMeans
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os

In [31]:
# Mount Google Drive
drive.mount('/content/drive')

# Path of folder
folder_path = '/content/drive/MyDrive/Colab Notebooks/2024_08_28/'

# Get list of all CSV files in the folder
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
dfs = []

# Loop through the CSV files and read each one into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
data = pd.concat(dfs, ignore_index=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
data_selected = data[['Alarm Name', 'First Occurred On', 'Site ID', 'Vendor', 'Domain', 'Device Type']]
data_selected.head(10)

Unnamed: 0,Alarm Name,First Occurred On,Site ID,Vendor,Domain,Device Type
0,RF Unit TX Channel Gain Out of Range,2024-08-20 23:39:39,AM0053,HUAWEI,Access,eNodeB
1,Air Conditioner Frequent High Pressure,2024-08-20 23:22:45,AM0212,HUAWEI,Power,
2,RF Unit TX Channel Gain Out of Range,2024-08-20 23:03:16,AM0053,HUAWEI,Access,eNodeB
3,RF Unit TX Channel Gain Out of Range,2024-08-20 22:59:56,AM0053,HUAWEI,Access,eNodeB
4,RF Unit TX Channel Gain Out of Range,2024-08-20 22:34:32,AM0053,HUAWEI,Access,eNodeB
5,Air Conditioner Frequent High Pressure,2024-08-20 22:13:43,AM0212,HUAWEI,Power,
6,RF Unit TX Channel Gain Out of Range,2024-08-20 21:59:10,AM0053,HUAWEI,Access,eNodeB
7,RF Unit TX Channel Gain Out of Range,2024-08-20 21:12:22,AM0053,HUAWEI,Access,eNodeB
8,RF Unit TX Channel Gain Out of Range,2024-08-20 21:09:48,AM0053,HUAWEI,Access,eNodeB
9,Air Conditioner Frequent High Pressure,2024-08-20 21:03:50,AM0212,HUAWEI,Power,


In [33]:
# prompt: for each sites remove duplicates of same alarm for same 2 hour  range in a day, and give the head of dataset

# Convert 'ds' to datetime if it's not already
data_selected['ds'] = pd.to_datetime(data_selected['ds'])

# Function to remove duplicates within a 2-hour range for a site and day
def remove_duplicates_2hr(df):
  df['day'] = df['ds'].dt.date
  df['hour'] = df['ds'].dt.hour
  df = df.drop_duplicates(subset=['Site ID', 'y', 'day', 'hour'], keep='first')
  df = df.drop(['day', 'hour'], axis=1)
  return df

# Apply the function to each site
data_selected = data_selected.groupby('Site ID').apply(remove_duplicates_2hr).reset_index(drop=True)

# Display the head of the dataset
data_selected.head()

KeyError: 'ds'

In [None]:

# Keep only 'ds' and 'y' columns (preprocessing for NeuralProphet)
data_selected = data_selected[['ds', 'y', 'Site ID']]
data_selected.head(10)

In [None]:
# Encode categorical columns
le_alarm = LabelEncoder()
data_selected['y'] = le_alarm.fit_transform(data_selected['Alarm Name'])
data_selected = data_selected.drop('Alarm Name', axis=1)

le_site = LabelEncoder()
data_selected['Site ID'] = le_site.fit_transform(data_selected['Site ID'])

# Rename 'First Occurred On' to 'ds'
data_selected = data_selected.rename(columns={'First Occurred On': 'ds'})

# Convert 'ds' to datetime
data_selected['ds'] = pd.to_datetime(data_selected['ds'])

# Drop rows where 'y' is unknown (if applicable)
data_selected = data_selected[data_selected['y'] != 'Unknown']

# Resample data to a consistent frequency (e.g., daily)
data_selected = data_selected.set_index('ds').resample('H').mean().reset_index()

# Initialize and train NeuralProphet model
m = NeuralProphet(
    learning_rate=0.01,
    batch_size=16,
    seasonality_mode='additive',
)

# Set plotting backend
m.set_plotting_backend("plotly-static")

# Fit the model
metrics = m.fit(data_selected)


In [None]:
# Forecast the future
df_future = m.make_future_dataframe(data_selected, n_historic_predictions=True, periods=24*7)
forecast = m.predict(df_future)

# Visualize the forecast
m.plot(forecast)

# Plot 'yhat' as dots on top of the existing plot
forecast['yhat1'] = forecast['yhat1'].round().astype(int)
forecast['date'] = forecast['ds'].dt.date

plt.figure(figsize=(15, 7))
plt.scatter(forecast['ds'], forecast['yhat1'], color='red', label='yhat forecast')
plt.xlabel('Date')
plt.ylabel('yhat')
plt.title('Forecasted Alarms')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.yticks()
plt.tight_layout()
plt.show()

# Example: Checking for a specific alarm (e.g., 21) in the forecast
alarm_21_predicted = forecast[forecast['yhat1'] == 21]
if alarm_21_predicted.empty:
    print("Alarm 21 is NOT predicted to occur in the next 7 days.")
else:
    print("Alarm 21 IS predicted to occur in the next 7 days.")

# Filtering forecasted alarms and printing unique alarms per day
filtered_forecast = forecast[(forecast['yhat1'] == 21)]
filtered_forecast = filtered_forecast[filtered_forecast['ds'] > pd.to_datetime('2024-08-24 12:00:00')]

for index, row in filtered_forecast.iterrows():
    print(f"Date: {row['ds']}, Alarm (yhat1): {row['yhat1']:.2f}")



In [None]:
# prompt: encode the Alarm name column  and rename that column as y

le = LabelEncoder()
PU0146_alarms['y'] = le.fit_transform(PU0146_alarms['Alarm Name'])
PU0146_alarms = PU0146_alarms.drop('Alarm Name', axis=1)
PU0146_alarms.head(10)

In [None]:
# prompt: give mapping of encoded value to alarm name  print line by line

for i, alarm_name in enumerate(le.classes_):
  print(f'{i}: {alarm_name}')

In [None]:
# prompt: rename first occured column as ds

PU0146_alarms = PU0146_alarms.rename(columns={'First Occurred On': 'ds'})
PU0146_alarms.head(10)


In [None]:
import pandas as pd

# Load the dataset from the CSV file using pandas
# df = pd.read_csv("https://github.com/ourownstory/neuralprophet-data/raw/main/kaggle-energy/datasets/tutorial01.csv")
# Plot the dataset, showing price (y column) over time (ds column)
plt = PU0146_alarms.plot(x="ds", y="y", figsize=(15, 5))

In [None]:
# prompt: sort accoding to ds column latest alarms

PU0146_alarms['ds'] = pd.to_datetime(PU0146_alarms['ds'])
PU0146_alarms = PU0146_alarms.sort_values(by='ds', ascending=False)
PU0146_alarms.head(10)


In [None]:
# prompt: drop rows with y is unknown

PU0146_alarms = PU0146_alarms[PU0146_alarms['y'] != 'Unknown']

In [None]:
# prompt: remove duplicates in ds

PU0146_alarms = PU0146_alarms.drop_duplicates(subset=['ds'])
PU0146_alarms.head(10)


In [None]:
# prompt: keep only ds and y columns

PU0146_alarms = PU0146_alarms[['ds', 'y']]
PU0146_alarms.head(10)


In [None]:
# Import the NeuralProphet class
from neuralprophet import NeuralProphet, set_log_level

# Disable logging messages unless there is an error
set_log_level("ERROR")

PU0146_alarms['ds'] = pd.to_datetime(PU0146_alarms['ds'])  # Ensure 'ds' is datetime
PU0146_alarms = PU0146_alarms.set_index('ds').resample('H').mean().reset_index()

# Create a NeuralProphet model with default parameters
# Set the learning rate and define other hyperparameters directly
m = NeuralProphet(
    learning_rate=0.01,          # Set the learning rate to 0.005
    batch_size=32,
    seasonality_mode='additive',  # Seasonality mode (choose between 'additive' or 'multiplicative')
)

# Use static plotly in notebooks
m.set_plotting_backend("plotly-static")

# Fit the model on the dataset (this might take a bit)
metrics = m.fit(PU0146_alarms)

In [None]:
# Create a new dataframe reaching 24*7 into the future for our forecast, n_historic_predictions also shows historic data
df_future = m.make_future_dataframe(PU0146_alarms, n_historic_predictions=True, periods=24*7)

# Predict the future
forecast = m.predict(df_future)

# Visualize the forecast
m.plot(forecast)

In [None]:
# Create a new dataframe reaching 7 days into the future for our forecast
df_future = m.make_future_dataframe(PU0146_alarms, n_historic_predictions=True, periods=24*7)

# Predict the future
forecast = m.predict(df_future)

import matplotlib.pyplot as plt

# Round predictions to the nearest integer
forecast['yhat1'] = forecast['yhat1'].round().astype(int)

# Extract the date from the datetime
forecast['date'] = forecast['ds'].dt.date

# No limit applied, include all predictions
daily_predictions = forecast.reset_index(drop=True)

# Plot 'yhat' as dots on top of the existing plot
plt.figure(figsize=(15, 7))  # Create a new figure with adjusted size
plt.scatter(daily_predictions['ds'], daily_predictions['yhat1'], color='red', label='yhat forecast')

# Add labels to data points
for i, row in daily_predictions.iterrows():
    plt.text(row['ds'], row['yhat1'], f'{row["yhat1"]}', fontsize=8, ha='right', va='bottom')

plt.xlabel('Date')
plt.ylabel('yhat')
plt.title('Forecasted Alarms')
plt.legend()
plt.grid(True)  # Add grid lines for better readability

# Set x-axis ticks to show all dates, rotate for better readability
plt.xticks(rotation=45, ha='right')

# Automatically set y-axis ticks based on data
plt.yticks()

plt.tight_layout()  # Adjust layout to prevent clipping
plt.show()  # Display the plot

In [None]:
# prompt: in next 7 days the alarm 21 will come or not ?

# Check if alarm 21 is predicted in the next 7 days
alarm_21_predicted = daily_predictions[daily_predictions["yhat1"] == 21]

if alarm_21_predicted.empty:
  print("Alarm 21 is NOT predicted to occur in the next 7 days.")
else:
  print("Alarm 21 IS predicted to occur in the next 7 days.")



In [None]:
# prompt: print the forecast alarms around value 13  with the date, print only the data after the latest day of initial data

# Filter forecast for alarms around value 13
filtered_forecast = forecast[(forecast['yhat1'] == 21)]

# # Get the latest date in the original data
# last_date = PU0146_alarms['ds'].max()

# # Filter for dates after the last date in the original data
# filtered_forecast = filtered_forecast[filtered_forecast['ds'] > last_date]

# Filter for dates after 2024-08-24 12:00:00
filtered_forecast = filtered_forecast[filtered_forecast['ds'] > pd.to_datetime('2024-08-24 12:00:00')]

# Print the filtered forecast with date and yhat1
for index, row in filtered_forecast.iterrows():
  print(f"Date: {row['ds']}, Alarm (yhat1): {row['yhat1']:.2f}")


In [None]:
# prompt: for above output print unique alarm per day

# Group by date and get the unique alarm values
unique_alarms_per_day = filtered_forecast.groupby('date')['yhat1'].unique()

# Print the unique alarms for each day
for date, alarms in unique_alarms_per_day.items():
    print(f"Date: {date}, Unique Alarms: {alarms}")

In [None]:
# prompt: view the label encoders used

for i, alarm_name in enumerate(le.classes_):
  print(f'{i}: {alarm_name}')


In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt

# Limit to 2 or 3 alarms per day
forecast['date'] = forecast['ds'].dt.date  # Extract the date from the datetime
daily_predictions = forecast.groupby('date').apply(lambda x: x.nlargest(10, 'yhat1')).reset_index(drop=True)

# Create a new figure with adjusted size
fig, ax = plt.subplots(figsize=(55, 25))

# Plot 'yhat1' as dots on top of the existing plot
ax.scatter(daily_predictions['ds'], daily_predictions['yhat1'], color='red', label='yhat1 forecast')

# Add labels to data points
for i, row in daily_predictions.iterrows():
    ax.text(row['ds'], row['yhat1'], f'{row["yhat1"]:.2f}', fontsize=9, ha='right', va='bottom')

ax.set_xlabel('Date')
ax.set_ylabel('yhat1')
ax.set_title('Top 2 or 3 Alarms per Day')
ax.legend()
ax.grid(True)  # Add grid lines for better readability

# Set x-axis ticks to show all dates
plt.xticks(daily_predictions['ds'], rotation=45, ha='right')

# Set y-axis ticks to show all yhat1 values
plt.yticks(daily_predictions['yhat1'])

plt.tight_layout()  # Adjust layout to prevent clipping
plt.show()  # Display the plot