In [None]:
import pandas as pd
import numpy as np
from datetime import timedelta
import csv
from remove_chattering import remove_nuisance_alarms 

In [None]:
system = " "

In [None]:
alarmSeries = pd.read_csv(f'../../data/{system}_alarms.csv')

In [None]:
columnsToKeep = ["systemId", "deviceId", "alarmNumber", "level","description","startTimestamp", "endTimestamp"]
alarmSeries = alarmSeries[columnsToKeep]
alarmSeries["startTimestamp"] = pd.to_datetime(alarmSeries["startTimestamp"], errors='coerce')
alarmSeries["endTimestamp"] = pd.to_datetime(alarmSeries["endTimestamp"], errors='coerce')
alarmSeries = alarmSeries.dropna(subset=["startTimestamp", "endTimestamp"])
alarmSeries = alarmSeries.sort_values(by=["startTimestamp", "alarmNumber"])

In [None]:

useless_alarms = [""]
messages = ["Message"]

no_chattering_alarms = remove_nuisance_alarms(alarmSeries, 10, messages, useless_alarms)
no_chattering_alarms.reset_index(drop=True, inplace=True)
no_chattering_alarms = no_chattering_alarms.sort_values(by=["deviceId", "startTimestamp", "alarmNumber"])


In [None]:
def calculate_alarm_rate(df, time_column, window_size_minutes=10):
    """
    Calculates the rate of alarm per window_size_minutes for every minute in the DataFrame.
    
    :param df: A pandas DataFrame.
    :param time_column: The name of the column in df which contains the datetime values.
    :param window_size_minutes: The size of the rolling window in minutes.
    :return: A DataFrame with an additional column 'alarm_rate' representing the rate of events per window.
    """    
    # Setting the time column as the index
    df.set_index(time_column, inplace=True)

    # Resample the data to 1-minute intervals, counting the number of events in each interval
    df_resampled = df.resample('1T').count()

    # Calculate rolling sum over the specified window size
    rolling_counts = df_resampled.rolling(window=f'{window_size_minutes}T').sum()

    # Renaming the column to 'alarm_rate'
    rolling_counts.rename(columns={df_resampled.columns[0]: 'alarm_rate'}, inplace=True)

    return rolling_counts

In [None]:
alarm_rate_by_device = no_chattering_alarms.groupby("deviceId").apply(calculate_alarm_rate, "startTimestamp", 10)

In [None]:
# Detect alarm floods
# Alarm flood starts when alarm flood starts to increase and reaches set rate of alarms
# Ends when alarm rate reaches half of the start rate
# 10 alarms /10 minutes - from ANS-ISA-18.2
def find_event_sequences(series, T = 10, flood_threshold = 10, gamma = 0.5):
    """
    Detect alarm floods
    Alarm flood start: when alarm rate reaches threshold - T
    Alarm flood end: when alarm rate goes lower than threshold*gamma
    
    :param df: A pandas DataFrame.
    :param time_column: The name of the column in df which contains the datetime values.
    :param T: Size of time window in minutes
    :param flood_threshold: rate of alarms needed to start alarm flood
    :param gamma: multiplier to the threshold to detect end of alarm flood
    :return: list of tuples which contain start and endtimestamp of alarm flood as pd timestamp
    """    
    sequences = []
    sequence_start = None
    
    for idx, row in series.iteritems():
        if sequence_start is None and row >= flood_threshold:
            sequence_start = idx[1]
        elif sequence_start is not None and row < flood_threshold*gamma:
            sequences.append((sequence_start - pd.Timedelta(T, "minute"), idx[1]))
            sequence_start = None
    return sequences

In [None]:
alarm_rate_by_device = alarm_rate_by_device["alarm_rate"]

In [None]:

device_alarm_floods = alarm_rate_by_device.groupby("deviceId").apply(find_event_sequences, 10, 3)

In [None]:
df = pd.DataFrame([(index, *tup) for index, lst in device_alarm_floods.items() for tup in lst], columns=['deviceId', 'startTimestamp', 'endTimestamp'])

In [None]:
df.to_csv(f'../../data/{system}_alarm_floods.csv', index=False)

In [None]:
for device, flood in device_alarm_floods.iteritems():
    with open(f"../../data/detected_floods/{device}_verylow_T_alarm_floods.csv", 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["startTimestamp", "endTimestamp"])
        for s, e in flood:
            writer.writerow([s.strftime("%Y-%m-%d %H:%M:%S.%f%z"),e.strftime("%Y-%m-%d %H:%M:%S.%f%z")])