In [1]:
%reload_ext autoreload
%autoreload 2

import sys
sys.path.append('../artitect/')

In [2]:
import pickle
from itertools import repeat
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import torch
from artifact import Saw
from sliding_window_detector import SlidingWindowTransformerDetector, ConvolutionalSlidingWindowDetector, SlidingWindowLinearDetector
from mask_detector import WindowLinearDetector, WindowTransformerDetector, ConvolutionDetector

from data import RealisticArtifactDataset, CachedArtifactDataset, TestArtifactDataset, CenteredArtifactDataset

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
torch.set_grad_enabled(False)
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

In [6]:
test_width = 512

test_path = Path("../data/test_files/test_label_CinCECGTorso512.pkl")
test = CachedArtifactDataset(file=test_path)

val_path = Path("../data/val_files/val_SW_noCiECGT512.pkl")
val = CachedArtifactDataset(file=val_path)

In [8]:
paths_SW = "../models/SW_adaFCN_Trans.ckpt" # SW ada 1d CNN Transformer
SW_adaCNNTrans_detector = SlidingWindowTransformerDetector.load_from_checkpoint(paths_SW).cpu()

paths_SW = "../models/SW_adaFCN_Trans_rejectionSampling0_1.ckpt" # SW ada 1d CNN Transformer
SW_adaCNNTrans_detector_RS01 = SlidingWindowTransformerDetector.load_from_checkpoint(paths_SW).cpu()

paths_SW = "../models/SW_adaFCN_Trans_rejectionSampling0_2.ckpt" # SW ada 1d CNN Transformer
SW_adaCNNTrans_detector_RS02 = SlidingWindowTransformerDetector.load_from_checkpoint(paths_SW).cpu()

paths_SW = "../models/SW_adaFCN_Trans_rejectionSampling0_3.ckpt" # SW ada 1d CNN Transformer
SW_adaCNNTrans_detector_RS03 = SlidingWindowTransformerDetector.load_from_checkpoint(paths_SW).cpu()

SW_detectors = [SW_adaCNNTrans_detector.eval(), SW_adaCNNTrans_detector_RS01.eval(), SW_adaCNNTrans_detector_RS02.eval(), SW_adaCNNTrans_detector_RS03.eval()]

  rank_zero_warn(


In [9]:
def baseline_detector(input: torch.Tensor) -> int:   
    input.squeeze(0)
    prediction = 0

    center = int(input.shape[1]/2)
    # flag points with very high increment as artifact
    # Calculate increments by subtracting the tensor shifted by one from the original tensor
    increments = (input[0][1:] - input[0][:-1]).abs()
    mean_increment = torch.mean(increments)
    std_increment = torch.std(increments)

    if increments[center-1] > (mean_increment + 3*std_increment):
        prediction = 1
    
    return prediction

## Predictions on Validation set for threshold calculation with fbeta score

In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix
import pandas as pd

all_predictions_valSet = pd.DataFrame(columns=['Detector_id', 'predictions'])
index = 0
gt = list()

for detector in SW_detectors:

    preds = list()

    for sample in val:
        example = sample["data"]
        window  = detector.window
        length  = len(example)

        # add artifact to data
        example_data = torch.tensor(example + sample["artifact"])

        # set detector to evaluation mode
        detector.eval()
        # make prediction and insert into prediction
        prediction = detector(example_data.unsqueeze(0))

        # update count
        preds = preds + [prediction.numpy()]

        if index == 0 :
            gt = gt + [sample["label"]]
    

    new_row = pd.DataFrame([{
        'Detector_id': index +1,
        'predictions': preds
    }])

    all_predictions_valSet = pd.concat([all_predictions_valSet,new_row])
    index = index +1
    print(len(all_predictions_valSet))

all_predictions_valSet = pd.concat([
    all_predictions_valSet,
    pd.DataFrame([{
        'Detector_id': 0,
        'predictions': gt
    }])
    ])


1


  return F.conv1d(input, weight, bias, self.stride,


2
3
4


In [11]:
from sklearn.metrics import fbeta_score

gt = all_predictions_valSet.iloc[-1]["predictions"]
metrics = pd.DataFrame()
for index, row in all_predictions_valSet.iterrows():

    max_fbeta = 0

    for threshold in np.linspace(0,  1,  100):
        predictions = np.where(np.array(row['predictions']) > threshold, 1, 0)

        fbeta = fbeta_score(gt, predictions, average='macro', beta=0.5)

        if (fbeta > max_fbeta):
            max_fbeta = fbeta
            best_threshold_fbeta = threshold

    predictions = np.where(np.array(row['predictions']) > best_threshold_fbeta, 1, 0)

    tn, fp, fn, tp = confusion_matrix(gt, predictions, labels=[0, 1]).ravel()

    metric = pd.DataFrame([{
        'index': index,
        'detector': f"detector{row['Detector_id']}",
        'threshold': best_threshold_fbeta,
        'fbeta_score': fbeta_score(gt, predictions, beta=0.5),
        'accuracy': accuracy_score(gt, predictions),
        'precision': precision_score(gt, predictions),
        'recall': recall_score(gt, predictions),
        'mse': mean_squared_error(gt, predictions), 
        'tn': tn,
        'fp': fp, 
        'fn': fn, 
        'tp': tp
    }])

    metrics = pd.concat([metrics, metric])


In [12]:
thresholds = list(metrics["threshold"])

In [13]:
thresholds = [0.343, 0.414, 0.454, 0.454]

In [14]:
thresholds = [0] + thresholds

## Sliding Window on Industry Data

In [19]:
import pandas as pd

test_width = 512
real_data_francois = pd.read_pickle('../data/francois_artifacts/francois_normalized_dataset.pickle') 

In [10]:
real_data_df = pd.read_pickle('../data/real/normalized_deviation_updated_TEST.pickle') 
ground_truth = pd.read_csv('../data/gt_changes_only_relabeled_200K.csv')

In [11]:
ground_truth.drop(columns=["Unnamed: 0"], inplace=True)

In [12]:
def baseline_detector(input: torch.Tensor) -> int:   
    input.squeeze(0)
    prediction = 0

    center = int(input.shape[1]/2)
    # flag points with very high increment as artifact
    # Calculate increments by subtracting the tensor shifted by one from the original tensor
    increments = (input[0][1:] - input[0][:-1]).abs()
    mean_increment = torch.mean(increments)
    std_increment = torch.std(increments)

    if increments[center-1] > (mean_increment + 1*std_increment):
        prediction = 1
    
    return prediction

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix
import pandas as pd

all_predictions_real = pd.DataFrame(columns=['Detector_id', 'predictions'])
dist = test_width // 2

index = 1
gt = list()
preds_baseline = list()

for index, row in ground_truth[:400].iterrows():
    example_data = torch.tensor(real_data_df[0][int(row["position"]-dist) : int(row["position"]+dist)])
    
    prediction_baseline = baseline_detector(example_data.unsqueeze(0))
    preds_baseline = preds_baseline + [prediction_baseline]

    gt = gt + [row["gt"]]

all_predictions_real = pd.concat([
    all_predictions_real,
    pd.DataFrame([{
        'Detector_id': index,
        'predictions': preds_baseline
    }])
    ], ignore_index=True)

index = 2

for detector in SW_detectors:

    preds = list()

    for index, row in ground_truth[:400].iterrows():
        example_data = torch.tensor(real_data_df[0][int(row["position"]-dist) : int(row["position"]+dist)])
        # make prediction and insert into prediction
        prediction = detector(example_data.unsqueeze(0))

        # update count
        preds = preds + [prediction.numpy()] 

    new_row = pd.DataFrame([{
        'Detector_id': index ,
        'predictions': preds
    }])

    all_predictions_real = pd.concat([all_predictions_real,new_row], ignore_index=True)
    index = index +1
    print(len(all_predictions_real))


all_predictions_real = pd.concat([
    all_predictions_real,
    pd.DataFrame([{
        'Detector_id': 0,
        'predictions': gt
    }])
    ], ignore_index=True)

2


  return F.conv1d(input, weight, bias, self.stride,


3
4
5


In [15]:
all_predictions_real.drop(all_predictions_real.tail(1).index,inplace=True)

NameError: name 'all_predictions_real' is not defined

In [22]:
all_predictions_real

Unnamed: 0,Detector_id,predictions
0,399,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, ..."
1,399,"[0.29026586, 0.45724502, 0.20606177, 0.1667097..."
2,399,"[0.43129468, 0.3768898, 0.34215453, 0.39736745..."
3,399,"[0.4509386, 0.43353567, 0.3983038, 0.4221497, ..."
4,399,"[0.5060227, 0.44110414, 0.40732878, 0.4215525,..."


In [23]:
from sklearn.metrics import fbeta_score

best_threshold_fbeta = 0.5

gt_real = ground_truth[:400]["gt"]
metrics_real = pd.DataFrame()

for index, row in all_predictions_real.iterrows():

    predictions = np.where(np.array(row['predictions']) > thresholds[index], 1, 0)
    indices_tp = [i for i, val in enumerate(predictions[gt_real==1] == 1) if val]
    indices_fn = [i for i, val in enumerate(predictions[gt_real==1] == 0) if val]

    tn, fp, fn, tp = confusion_matrix(gt_real, predictions, labels=[0, 1]).ravel()

    metric = pd.DataFrame([{
        'index': index,
        'detector': f"Detector{row['Detector_id']}",
        'threshold': thresholds[index],
        'fbeta_score': fbeta_score(gt_real, predictions, beta=0.5),
        'accuracy': accuracy_score(gt_real, predictions),
        'precision': precision_score(gt_real, predictions),
        'recall': recall_score(gt_real, predictions),
        'mse': mean_squared_error(gt_real, predictions), 
        'tn': tn,
        'fp': fp, 
        'fn': fn, 
        'tp': tp, 
        'indices_tp': indices_tp, 
        'indices_fn': indices_fn
    }])

    metrics_real = pd.concat([metrics_real, metric])


In [24]:
metrics_real

Unnamed: 0,index,detector,threshold,fbeta_score,accuracy,precision,recall,mse,tn,fp,fn,tp,indices_tp,indices_fn
0,0,Detector399,0.0,0.068807,0.5525,0.272727,0.017241,0.4475,218,8,171,3,"[3, 9, 81]","[0, 1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 1..."
0,1,Detector399,0.343,0.657439,0.6925,0.752475,0.436782,0.3075,201,25,98,76,"[2, 3, 6, 8, 9, 10, 12, 15, 16, 19, 20, 21, 22...","[0, 1, 4, 5, 7, 11, 13, 14, 17, 18, 25, 26, 27..."
0,2,Detector399,0.414,0.686813,0.7075,0.806452,0.431034,0.2925,208,18,99,75,"[0, 3, 8, 12, 15, 16, 19, 20, 21, 24, 26, 29, ...","[1, 2, 4, 5, 6, 7, 9, 10, 11, 13, 14, 17, 18, ..."
0,3,Detector399,0.454,0.62749,0.675,0.768293,0.362069,0.325,207,19,111,63,"[8, 15, 19, 20, 24, 26, 29, 30, 32, 35, 36, 42...","[0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14..."
0,4,Detector399,0.454,0.622318,0.6725,0.794521,0.333333,0.3275,211,15,116,58,"[0, 8, 15, 16, 19, 24, 26, 29, 30, 35, 36, 38,...","[1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 1..."


In [110]:
preds_FCNTrans = np.stack(all_predictions_real["predictions"][7])
gt = np.array(gt_real)

In [111]:
neg_mask = (gt == 0)
pos_mask = (gt == 1)

negative_gt_preds = preds_FCNTrans[neg_mask]
positive_gt_preds = preds_FCNTrans[pos_mask]

negmin_index = np.argmin(negative_gt_preds)
negmax_index = np.argmax(negative_gt_preds)
posmin_index = np.argmin(positive_gt_preds)
posmax_index = np.argmax(positive_gt_preds)

original_negmin = np.where(neg_mask)[0][negmin_index]
original_negmax = np.where(neg_mask)[0][negmax_index]
original_posmin = np.where(pos_mask)[0][posmin_index]
original_posmax = np.where(pos_mask)[0][posmax_index]

In [None]:
index = original_negmin

plt.figure(figsize=(10, 3))
example_data = torch.tensor(real_data_df[0][int(ground_truth["position"][index]-dist) : int(ground_truth["position"][index]+dist)])
plt.plot(example_data, label="window without artifact", linewidth=2, color='grey')
plt.axvline(x=256, c="red", linestyle='--', dashes=(4,4), label="center", linewidth = 1)
plt.rcParams.update({'font.size': 16})
plt.xlabel('#Time steps [a.u.]') 
plt.ylabel("[a.u.]")
plt.title(f"gt = {gt[index]}; output = {preds_FCNTrans[index]}; prediction = {int(preds_FCNTrans[index] > 0.252)}")
plt.legend()
plt.show()

In [None]:
index = original_negmax

plt.figure(figsize=(10, 3))
example_data = torch.tensor(real_data_df[0][int(ground_truth["position"][index]-dist) : int(ground_truth["position"][index]+dist)])
plt.plot(example_data, label="window without artifact", linewidth=2, color='grey')
plt.axvline(x=256, c="red", linestyle='--', dashes=(4,4), label="center", linewidth = 1)
plt.rcParams.update({'font.size': 16})
plt.xlabel('#Time steps [a.u.]') 
plt.ylabel("[a.u.]")
plt.title(f"gt = {gt[index]}; output = {preds_FCNTrans[index]}; prediction = {int(preds_FCNTrans[index] > 0.252)}")
plt.legend()
plt.show()

In [None]:
index = original_posmin

plt.figure(figsize=(10, 3))
example_data = torch.tensor(real_data_df[0][int(ground_truth["position"][index]-dist) : int(ground_truth["position"][index]+dist)])
plt.plot(example_data, label="window with artifact", linewidth=2, color='blue')
plt.axvline(x=256, c="red", linestyle='--', dashes=(4,4), label="position of artifact", linewidth = 1)
plt.rcParams.update({'font.size': 16})
plt.xlabel('#Time steps [a.u.]') 
plt.ylabel("[a.u.]")
plt.title(f"gt = {gt[index]}; output = {preds_FCNTrans[index]}; prediction = {int(preds_FCNTrans[index] > 0.252)}")
plt.legend()
plt.show()

In [None]:
index = original_posmax

plt.figure(figsize=(10, 3))
example_data = torch.tensor(real_data_df[0][int(ground_truth["position"][index]-dist) : int(ground_truth["position"][index]+dist)])
plt.plot(example_data, label="window with artifact", linewidth=2, color='blue')
plt.axvline(x=256, c="red", linestyle='--', dashes=(4,4), label="position of artifact", linewidth = 1)
plt.rcParams.update({'font.size': 16})
plt.xlabel('#Time steps [a.u.]') 
plt.ylabel("[a.u.]")
plt.title(f"gt = {int(gt[index])}; output = {preds_FCNTrans[index]}; prediction = {int(preds_FCNTrans[index] > 0.252)}")
plt.legend()
plt.show()