In [None]:
# Reads file of alarms and file which contains start and end times of alarm floods
# 1. Create new alarm ids using alarm number and device id
# 3. removes chattering alarms
# 2. Remove fields not used by algorithms
# 4. Normalizes timestamps so that each floods starts at time 0, difference in milliseconds
# 5. Writes preprocessed file as csv

In [None]:
import pandas as pd
from remove_chattering import remove_nuisance_alarms 

In [None]:
alarms = pd.read_csv(f'../../data/CSD_alarms.csv')

In [None]:
device_floods = pd.read_csv(f'../../data/CSD_alarm_floods.csv')

In [None]:
columnsToKeep = ["systemId", "deviceId", "alarmNumber", "level","description","startTimestamp", "endTimestamp"]
alarms = alarms[columnsToKeep]
alarms["startTimestamp"] = pd.to_datetime(alarms["startTimestamp"], errors='coerce')
alarms["endTimestamp"] = pd.to_datetime(alarms["endTimestamp"], errors='coerce')
alarms = alarms.dropna(subset=["startTimestamp", "endTimestamp"])
alarms = alarms.sort_values(by="startTimestamp")
    
device_floods["startTimestamp"] = pd.to_datetime(device_floods["startTimestamp"], errors='coerce')
device_floods["endTimestamp"] = pd.to_datetime(device_floods["endTimestamp"], errors='coerce')

In [None]:
def calculate_time_difference(group, start_time):
    group['startTimestamp'] = pd.to_datetime(group['startTimestamp'])
    group['endTimestamp'] = pd.to_datetime(group['endTimestamp'])

    group['startTimestamp'] = ((group['startTimestamp'] - start_time).dt.total_seconds() * 1000).astype(int)
    group['endTimestamp'] = ((group['endTimestamp'] - start_time).dt.total_seconds() * 1000).astype(int)
    group.loc[group['endTimestamp'] < 0, 'endTimestamp'] = int(1e8)
    
    return group

In [None]:
# For now check only crane floods

crane_floods = device_floods[device_floods["deviceId"].str.contains("Crane")].reset_index()
system_groupd = alarms.groupby("systemId")

useless_alarms = []
messages = []

floods_to_label_df = pd.DataFrame()
for i, row in crane_floods.iterrows():
    systemId = row["deviceId"].split("_")[0]
    system_alarms = system_groupd.get_group(systemId)

    alarm_flood_alarms = system_alarms[(system_alarms["startTimestamp"] >= row["startTimestamp"]) & (system_alarms["startTimestamp"] <= row["endTimestamp"])]
    alarm_flood_alarms["flood_id"] = i
    floods_to_label_df = pd.concat((floods_to_label_df, alarm_flood_alarms))

floods_to_label_df["deviceId"] = floods_to_label_df["deviceId"] + "_" + floods_to_label_df["flood_id"].map(str)
df =  remove_nuisance_alarms(floods_to_label_df, 10, messages, useless_alarms)
df = df.sort_values(by=["flood_id", "startTimestamp"]).reset_index(drop=True)
df["alarmNumber"] = df["deviceId"].str.split("_").str[1] + "_" + df["alarmNumber"]
df = df[["flood_id", "alarmNumber", "startTimestamp", "endTimestamp"]]
normalized_floods_df = df.groupby('flood_id').apply(lambda group: calculate_time_difference(group, crane_floods.loc[group["flood_id"].iloc[0]]["startTimestamp"]))


In [None]:
normalized_floods_df.to_csv(f"../../data/preprocessed/Crane_alarm_floods.csv", index=False)