In [None]:
import pandas as pd
import numpy as np

import methods.assam as assam
import methods.gmm as gmm
import methods.hmm as hmm
import methods.lstm as lstm
import methods.nw as nw
import methods.svm as svm
import methods.sw as sw
import methods.tfidf_lr as tfidf_lr
import methods.word2vec as word2vec
from methods.evaluate import print_metrics


In [None]:
system = "CSD"
X_train = pd.read_csv(f"../data/preprocessed/{system}_train.csv", index_col=0)
X_test = pd.read_csv(f"../data/preprocessed/{system}_test.csv", index_col=0)
Y_train = pd.read_csv(f"../data/ground_truth/{system}_train_labels.csv", index_col=0)
Y_train = Y_train.iloc[:,0]
Y_test = pd.read_csv(f"../data/ground_truth/{system}_test_labels.csv", index_col=0)
Y_test = Y_test.iloc[:,0]

X = pd.concat([X_train, X_test])

vocab = {alarm: i for i, alarm in enumerate(X["alarmNumber"].unique())} 
n_classes = Y_train.nunique()

In [None]:
# Unkown samples not used in training data (marked as -1)
Y_train = Y_train[Y_train != -1]
X_train = X_train[X_train["flood_id"].isin(Y_train.index)]
n_classes = Y_train.nunique()

In [None]:
# Skip if multiRocket is not setup
import methods.castle as castle
castle_model = castle.CASTLE_Classifier(vocab)
castle_model.fit(X_train, Y_train)

In [None]:
predictions = castle_model.predict(X_test)
print("CASTLE metrics")
print_metrics(predictions, Y_test)

In [None]:
assam_model = assam.ASSAM_Classifier(vocab)
assam_model.fit(X_train, Y_train)

In [None]:
assam_predictions = assam_model.predict(X_test)
print("Assam metrics")
print_metrics(assam_predictions, Y_test)

In [None]:
gmm_model = gmm.GMM_Classifier(vocab)
gmm_model.fit(X_train, Y_train)

In [None]:
gmm_predictions = gmm_model.predict(X_test)
print("GMM metrics")
print_metrics(gmm_predictions, Y_test)

In [None]:
hmm_model = hmm.HMM_Classifier(vocab)
hmm_model.fit(X_train, Y_train)

In [None]:
hmm_predictions = hmm_model.predict(X_test)
print("HMM metrics")
print_metrics(hmm_predictions, Y_test)

In [None]:
lstm_model = lstm.LSTM_Classifier(vocab,19, embedding_dim=40,slen=51)
lstm_model.fit(X_train, Y_train)

In [None]:
lstm_predictions = lstm_model.predict(X_test)
print("LSTM metrics")
print_metrics(lstm_predictions, Y_test)

In [None]:
nw_model = nw.NW_Classifier(vocab)
nw_model.fit(X_train, Y_train)

In [None]:
nw_predictions = nw_model.predict(X_test)
print("Needleman-Wunsch metrics")
print_metrics(nw_predictions, Y_test)

In [None]:
# Extra stuff
nw_model.plot_distinquishability()
print("Not significant alarms")
print(nw_model.non_significant_alarms())

In [None]:
svm_model = svm.SVM_Classifier(vocab, max_lag=1)
svm_model.fit(X_train, Y_train)

In [None]:
svm_predictions = svm_model.predict(X_test)
print("SVM metrics")
print_metrics(svm_predictions, Y_test)

In [None]:
sw_model = sw.SW_Classifier(vocab)
sw_model.fit(X_train, Y_train)

In [None]:
sw_predictions = sw_model.predict(X_test)
print("Smith-Waterman metrics")
print_metrics(sw_predictions, Y_test)

In [None]:
tfidf_lr_model = tfidf_lr.TFIDF_LR_Classifier(vocab, use_confidence_thresholds=False)
tfidf_lr_model.fit(X_train, Y_train)

In [None]:
tfidf_lr_predictions = tfidf_lr_model.predict(X_test)
print("TFIDF-LR metrics")
print_metrics(tfidf_lr_predictions, Y_test)

In [None]:
# Word2Vec model can use categories to improve word embeddings
# Create a mapping from alarm embedding to device
device_mapping = np.zeros((len(vocab)))
for i, row in X.iterrows():
    # Remove prefix
    alarm = str(row["alarmNumber"])
    d = alarm.split("_")[0]
    if d == "System Device":
        device_mapping[vocab[alarm]] = 0
    elif d == "Crane":
        device_mapping[vocab[alarm]] = 1
    elif "MC" in d:
        device_mapping[vocab[alarm]] = 2
    elif "L" in d:
        device_mapping[vocab[alarm]] = 3
    elif "M" in d:
        device_mapping[vocab[alarm]] = 4
    else:
        device_mapping[vocab[alarm]] = 5


In [None]:
# More categories can be created for alarms
# This example only used device categories but things such as alarm priority or type of process variable tracked can be used

word2vec_model = word2vec.Word2Vec_Classifier(vocab)
word2vec_model.fit(X_train, Y_train, 50, [device_mapping])

In [None]:
word2vec_predictions = word2vec_model.predict(X_test)
print("Word2Vec metrics")
print_metrics(word2vec_predictions, Y_test)

In [None]:
word2vec_model.plot_embeddings(np.arange(0,20))