In [None]:
import pandas as pd
import pyarrow as pa

# Read arima-x forecast overview
arima_x_forecast = pd.read_parquet('../data/arima-data_week15/arima-x_forecast_overview_first15000_2021-07-27T11-18-42.parquet', engine='pyarrow')

In [None]:
extended_df = arima_x_forecast.copy()

In [None]:
# Add Threat score/Critical Success Index
# TS= TP/(TP+FN+FP)

extended_df["TS"] = extended_df["TP"]/(extended_df["TP"]+extended_df["FN"]+extended_df["FP"])


In [None]:
import numpy as np
#Add Matthews correlation coefficient (MCC)
#  √TPR×TNR×PPV×NPV − √FNR×FPR×FOR×FDR

# First we need to calculate the helpers
# * PPV: TP/(TP+FP) [positive predictive value]
# * NPV: TN/(TN+FN) [negative predictive value]
# * FOR: FN/(FN+TN) [false omission rate]
# * FDR: FP/(FP+TP)  [false discovery rate]

extended_df["PPV"] = extended_df["TP"]/(extended_df["TP"]+extended_df["FP"])
extended_df["NPV"] = extended_df["TN"]/(extended_df["TN"]+extended_df["FN"])
extended_df["FOR"] = extended_df["FN"]/(extended_df["FN"]+extended_df["TN"])
extended_df["FDR"] = extended_df["FP"]/(extended_df["FP"]+extended_df["TP"])

extended_df["MCC_firstPart"] = np.sqrt((extended_df["TPR"]*extended_df["TNR"]*extended_df["PPV"]*extended_df["NPV"])) 
extended_df["MCC_secondPart"] = np.sqrt((extended_df["FNR"]*extended_df["FPR"]*extended_df["FOR"]*extended_df["FDR"]))
extended_df["MCC"] = extended_df["MCC_firstPart"] - extended_df["MCC_secondPart"]

# remove helpers
extended_df = extended_df.drop(columns=["PPV","NPV","FOR","FDR","MCC_firstPart","MCC_secondPart"])


In [None]:
#Add weighted score from https://physionet.org/content/challenge-2015/1.0.0/
#Original: (TP+TN)/(TP+TN+FP+5*FN)
#Adapted: (TP)/(TP+FN+5*FP) 
# Adapted score weigths FP instead of FN and removes TN

extended_df["WEIGHTED_SCORE_AA"] = extended_df["TP"]/(extended_df["TP"]+extended_df["FN"]+(5*extended_df["FP"]))

In [None]:
extended_df.columns

In [None]:
extended_df = extended_df[['ID', 'PARAMETER', 'RUNTIME', 'MODEL', 'SCALED', 'LIBRARY','ENDOGENOUS', 'EXOGENOUS', 'FIRST_FORECAST', 'ALARM_TYPE',
 'TP', 'FN','FP', 'TN', 'FPR', 'TPR', 'FNR', 'TNR', 'ACC', 'F1S','TS', 'MCC', 'WEIGHTED_SCORE_AA','N_CHUNKS','N_ITERATIONS']]

In [None]:
extended_df

In [None]:
# Write extended dataframe to parquet
import datetime as dt
timestamp = dt.datetime.today().strftime('%Y-%m-%dT%H-%M-%S')

pd.DataFrame(extended_df).to_parquet('../data/arima-data_week15/arima-x_forecast_overview_first15000_' + timestamp + '_extended.parquet', engine='pyarrow')
