In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import seaborn as sns
from collections import Counter


In [None]:
# Create a timeline plot for alarm flood
# Include chattering alarms

system = "CSD"
alarms = pd.read_csv(f'../../data/{system}_similar_alarms.csv')
alarm_floods = pd.read_csv(f'../../data/{system}_alarm_floods.csv')
columnsToKeep = ["deviceId", "alarmNumber", "level","description","startTimestamp", "endTimestamp"]
alarms = alarms[columnsToKeep]
alarms["startTimestamp"] = pd.to_datetime(alarms["startTimestamp"], errors='coerce')
alarms["endTimestamp"] = pd.to_datetime(alarms["endTimestamp"], errors='coerce')
alarms = alarms.dropna(subset=["startTimestamp", "endTimestamp"])
alarms = alarms.sort_values(by="startTimestamp")
alarm_floods["startTimestamp"] = pd.to_datetime(alarm_floods["startTimestamp"], errors='coerce')
alarm_floods["endTimestamp"] = pd.to_datetime(alarm_floods["endTimestamp"], errors='coerce')

In [None]:
crane_floods = alarm_floods[alarm_floods["deviceId"].str.contains("Crane")].reset_index()

In [None]:
results = {}

In [None]:
alarms["systemId"] = alarms["deviceId"].map(lambda x: x.split("_")[0])
results["number of systems"] = alarms["systemId"].nunique()
results["number of alarm floods"] = len(crane_floods)
results["recorded years"] = alarms.groupby("systemId").apply(lambda x: x["endTimestamp"].max() - x["startTimestamp"].min()).sum() / np.timedelta64(1, 'Y')
alarms["date"] = alarms["startTimestamp"].map(lambda x: x.date().strftime("%Y-%m-%d"))
results["highest daily alarm count"] = alarms.groupby(["systemId", "date"]).apply(lambda x: x["alarmNumber"].count()).max()

recorded_days = alarms.groupby("systemId").apply(lambda x: x["endTimestamp"].max() - x["startTimestamp"].min()).sum() / np.timedelta64(1, 'D')
results["mean daily alarm count"] = alarms.groupby(["systemId", "date"]).apply(lambda x: x["alarmNumber"].count()).sum() / recorded_days

crane_floods["date"] = crane_floods["startTimestamp"].map(lambda x: x.date().strftime("%Y-%m-%d"))
results["days with alarm flood"] = crane_floods.groupby(["deviceId", "date"]).apply(lambda _: 1).sum()

crane_floods["duration"] = crane_floods["endTimestamp"] - crane_floods["startTimestamp"]
results["mean alarm flood duration minutes"] = crane_floods["duration"].mean() / np.timedelta64(1, 'm')

In [None]:
system_groupd = alarms.groupby("systemId")
floods_to_label_df = pd.DataFrame()
for i, row in crane_floods.iterrows():
    systemId = row["deviceId"].split("_")[0]
    system_alarms = system_groupd.get_group(systemId)

    alarm_flood_alarms = system_alarms[(system_alarms["startTimestamp"] >= row["startTimestamp"]) & (system_alarms["startTimestamp"] <= row["endTimestamp"])]
    alarm_flood_alarms["flood_id"] = i
    floods_to_label_df = pd.concat((floods_to_label_df, alarm_flood_alarms))

In [None]:
floods_to_label_df["deviceId"] = floods_to_label_df["deviceId"] + "_" + floods_to_label_df["flood_id"].map(str)
floods_to_label_df["alarmNumber"] = floods_to_label_df["deviceId"].str.split("_").str[1] + "_" + floods_to_label_df["alarmNumber"]

In [None]:
results["unique alarm variables"] =  floods_to_label_df["alarmNumber"].nunique()

In [None]:
def calculate_alarm_rate(df, time_column, window_size_minutes=10):
    """
    Calculates the rate of alarm per window_size_minutes for every minute in the DataFrame.
    
    :param df: A pandas DataFrame.
    :param time_column: The name of the column in df which contains the datetime values.
    :param window_size_minutes: The size of the rolling window in minutes.
    :return: A DataFrame with an additional column 'alarm_rate' representing the rate of events per window.
    """    
    # Setting the time column as the index
    df.set_index(time_column, inplace=True)

    # Resample the data to 1-minute intervals, counting the number of events in each interval
    df_resampled = df.resample('1T').count()

    # Calculate rolling sum over the specified window size
    rolling_counts = df_resampled.rolling(window=f'{window_size_minutes}T').sum()

    # Renaming the column to 'alarm_rate'
    rolling_counts.rename(columns={df_resampled.columns[0]: 'alarm_rate'}, inplace=True)

    return rolling_counts

In [None]:
alarm_rate_by_system = alarms.groupby("deviceId").apply(calculate_alarm_rate, "startTimestamp", 10)

In [None]:
results["peak alarm rate / 10 min"] = int(alarm_rate_by_system["alarm_rate"].max())

In [None]:
labels = pd.read_csv("../../data/classification/final_classes_v2.csv")

In [None]:
results["root causes identified"] = labels["class"].nunique()

In [None]:
df_results = pd.DataFrame(results.items(), columns=["statistic", "value"])
df_results.to_csv("../../data/stats/general_stats.csv", index=False, sep=";")

In [None]:
# Visualize number of floods per root cause
sns.set_palette("deep")
sns.set_style("whitegrid")
sns.set_context("paper")


label_counts = labels.groupby("class").apply(lambda x: x["class"].count())

ax = sns.barplot(x=label_counts.index, y=label_counts.values, edgecolor="black", linewidth=1.5)
ax.set_xlabel("Root cause", fontsize=12)
ax.set_ylabel("Number of floods", fontsize=12)
plt.show()

In [None]:
# visualize flood durations per root cause

In [None]:
crane_floods["label"] = labels["class"]
crane_floods["duration"] = crane_floods["duration"] / np.timedelta64(1, 'm')

In [None]:
labels

In [None]:
labels.loc[0]["class"]

In [None]:
floods_to_label_df["label"] = floods_to_label_df["flood_id"].map(lambda x: labels.loc[x]["class"])

In [None]:
counts_by_flood = floods_to_label_df.groupby("label").apply(lambda x: x.groupby("flood_id").apply(lambda xd: len(xd)))

In [None]:
counts_by_flood

In [None]:
ax = sns.boxplot(x=counts_by_flood.index.get_level_values(0), y=counts_by_flood.values)
ax.set_xlabel("Root cause", fontsize=12)
ax.set_ylabel("Alarm instances in flood", fontsize=12)
plt.show()

In [None]:
labels[labels["class"] == 7].iloc[0]["class description"]

In [None]:
level_counts = alarms["level"].value_counts()

In [None]:
alarms["deviceId"] = alarms["deviceId"].map(lambda x: x.split("_")[1])

In [None]:

alarms["variable_id"] = alarms["deviceId"] + "_" + alarms["alarmNumber"]

In [None]:
level_counts = alarms.groupby("variable_id").apply(lambda x: x["level"].iloc[0]).value_counts()

In [None]:
alarms["variable_id"].nunique()

In [None]:
level_counts

In [None]:
plt.figure(figsize=(8, 8))
plt.pie(level_counts, labels=level_counts.index,colors=['#FF5F5A','#FFDF5A','#5ABEFF','#FFBE5A'], autopct='%1.1f%%', startangle=140)
plt.show()

In [None]:
labels[["class", "class description"]].sort_values(by="class").groupby("class").first().to_csv("../../data/stats/root_cause_descriptions.csv", sep=";")