In [2]:
%reload_ext autoreload
%autoreload 2

import sys
sys.path.append('../src_jobs/')

In [3]:
import pickle
from itertools import repeat
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import torch
from artifact import Saw
from detector import WindowTransformerDetector

from data import RealisticArtifactDataset, CachedArtifactDataset, TestArtifactDataset

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f73f40ae4a0>

In [5]:
test_width = 1024

london_test = Path("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/data/validation512.london.pkl")

In [6]:
def load_series(names: list[str], split: str):
    series = list()
    counts = list()
    for name in names:
        with open(f"../data/processed/{name}_{split}.pickle", "rb") as f:
            raw = [a for a in pickle.load(f) if len(a) > test_width]
            series.extend(np.array(a).astype(np.float32) for a in raw)
            counts.extend(repeat(1 / len(raw), len(raw)))
    counts = np.array(counts)
    return series, counts / counts.sum()

In [7]:
val = CachedArtifactDataset(file=london_test)

In [8]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

#autheticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient.from_config(
    credential=credential,
    path="config.json",
)

Found the config file in: config.json


In [None]:
import azure.ai.ml._artifacts._artifact_utilities as artifact_utils

first_detector = WindowTransformerDetector.load_from_checkpoint("../models/model.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_full_train", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_full = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=50000.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_noLondon", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_6 = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=17000.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_mask_5TrainDS", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_5 = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=8000.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_mask_4TrainDS", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_4 = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=29000-v1.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_mask_3TrainDS", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_3 = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=12000.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_mask_2TrainDS", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_2 = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=7000.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_mask_1TrainDS", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_1 = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=1000-v4.ckpt").cpu()

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix
import pandas as pd

metrics = pd.DataFrame(columns=['detector', 'accuracy', 'precision', 'recall', 'mse'])
index = 0

preds_1 = list()
preds_2 = list()
preds_3 = list()
preds_4 = list()
preds_5 = list()
preds_6 = list()
preds_full = list()
gt = list()

threshold = 0.5

for sample in val:
    example = sample["data"]
    stride  = 64
    window  = transformer_detector_1.window
    length  = len(example)

    # add artifact to data
    example_data = torch.tensor(example + sample["artifact"])

    # store prediction for each window
    predictions_1 = torch.zeros(((length - window) // stride + 1, length))
    predictions_2 = torch.zeros(((length - window) // stride + 1, length))
    predictions_3 = torch.zeros(((length - window) // stride + 1, length))
    predictions_4 = torch.zeros(((length - window) // stride + 1, length))
    predictions_5 = torch.zeros(((length - window) // stride + 1, length))
    predictions_6 = torch.zeros(((length - window) // stride + 1, length))
    predictions_full = torch.zeros(((length - window) // stride + 1, length))

    # store number of times each mask was predicted
    masks = torch.zeros(length)
    for i, j in enumerate(range(0, length - window + 1, stride)):
        # slice out window
        s = example_data[j : j + window]
        # make prediction and insert into prediction
        predictions_1[i, j : j + window] = transformer_detector_1(s.unsqueeze(0))
        predictions_2[i, j : j + window] = transformer_detector_2(s.unsqueeze(0))
        predictions_3[i, j : j + window] = transformer_detector_3(s.unsqueeze(0))
        predictions_4[i, j : j + window] = transformer_detector_4(s.unsqueeze(0))
        predictions_5[i, j : j + window] = transformer_detector_5(s.unsqueeze(0))
        predictions_6[i, j : j + window] = transformer_detector_6(s.unsqueeze(0))
        predictions_full[i, j : j + window] = transformer_detector_full(s.unsqueeze(0))

        # update count
        masks[j : j + window] += 1

    predictions_1 = predictions_1.sum(axis=0) / masks
    preds_1 = preds_1 + predictions_1.tolist()
    predictions_1 = np.where(predictions_1.numpy() > threshold, 1, 0)
    predictions_2 = predictions_2.sum(axis=0) / masks
    preds_2 = preds_2 + predictions_2.tolist()
    predictions_2 = np.where(predictions_2.numpy() > threshold, 1, 0)
    predictions_3 = predictions_3.sum(axis=0) / masks
    preds_3 = preds_3 + predictions_3.tolist()
    predictions_3 = np.where(predictions_3.numpy() > threshold, 1, 0)
    predictions_4 = predictions_4.sum(axis=0) / masks
    preds_4 = preds_4 + predictions_4.tolist()
    predictions_4 = np.where(predictions_4.numpy() > threshold, 1, 0)
    predictions_5 = predictions_5.sum(axis=0) / masks
    preds_5 = preds_5 + predictions_5.tolist()
    predictions_5 = np.where(predictions_5.numpy() > threshold, 1, 0)
    predictions_6 = predictions_6.sum(axis=0) / masks
    preds_6 = preds_6 + predictions_6.tolist()
    predictions_6 = np.where(predictions_6.numpy() > threshold, 1, 0)
    predictions_full = predictions_full.sum(axis=0) / masks
    preds_full = preds_full + predictions_full.tolist()
    predictions_full = np.where(predictions_full.numpy() > threshold, 1, 0)
    gt = gt + sample["mask"].tolist()

    tn_1, fp_1, fn_1, tp_1 = confusion_matrix(sample["mask"], predictions_1, labels=[0, 1]).ravel()
    tn_2, fp_2, fn_2, tp_2 = confusion_matrix(sample["mask"], predictions_2, labels=[0, 1]).ravel()
    tn_3, fp_3, fn_3, tp_3 = confusion_matrix(sample["mask"], predictions_3, labels=[0, 1]).ravel()
    tn_4, fp_4, fn_4, tp_4 = confusion_matrix(sample["mask"], predictions_4, labels=[0, 1]).ravel()
    tn_5, fp_5, fn_5, tp_5 = confusion_matrix(sample["mask"], predictions_5, labels=[0, 1]).ravel()
    tn_6, fp_6, fn_6, tp_6 = confusion_matrix(sample["mask"], predictions_6, labels=[0, 1]).ravel()
    tn_full, fp_full, fn_full, tp_full = confusion_matrix(sample["mask"], predictions_full, labels=[0, 1]).ravel()

    new_row_1 = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_1',
        'accuracy': accuracy_score(sample["mask"], predictions_1),
        'precision': precision_score(sample["mask"], predictions_1),
        'recall': recall_score(sample["mask"], predictions_1),
        'mse': mean_squared_error(sample["mask"], predictions_1), 
        'tn': tn_1,
        'fp': fp_1, 
        'fn': fn_1, 
        'tp': tp_1
    }])

    new_row_2 = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_2',
        'accuracy': accuracy_score(sample["mask"], predictions_2),
        'precision': precision_score(sample["mask"], predictions_2),
        'recall': recall_score(sample["mask"], predictions_2),
        'mse': mean_squared_error(sample["mask"], predictions_2),
        'tn': tn_2,
        'fp': fp_2, 
        'fn': fn_2, 
        'tp': tp_2
    }])

    new_row_3 = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_3',
        'accuracy': accuracy_score(sample["mask"], predictions_3),
        'precision': precision_score(sample["mask"], predictions_3),
        'recall': recall_score(sample["mask"], predictions_3),
        'mse': mean_squared_error(sample["mask"], predictions_3),
        'tn': tn_3,
        'fp': fp_3, 
        'fn': fn_3, 
        'tp': tp_3
    }])

    new_row_4 = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_4',
        'accuracy': accuracy_score(sample["mask"], predictions_4),
        'precision': precision_score(sample["mask"], predictions_4),
        'recall': recall_score(sample["mask"], predictions_4),
        'mse': mean_squared_error(sample["mask"], predictions_4),
        'tn': tn_4,
        'fp': fp_4,
        'fn': fn_4, 
        'tp': tp_4
    }])

    new_row_5 = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_5',
        'accuracy': accuracy_score(sample["mask"], predictions_5),
        'precision': precision_score(sample["mask"], predictions_5),
        'recall': recall_score(sample["mask"], predictions_5),
        'mse': mean_squared_error(sample["mask"], predictions_5),
        'tn': tn_5,
        'fp': fp_5, 
        'fn': fn_5, 
        'tp': tp_5
    }])

    new_row_6 = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_6',
        'accuracy': accuracy_score(sample["mask"], predictions_6),
        'precision': precision_score(sample["mask"], predictions_6),
        'recall': recall_score(sample["mask"], predictions_6),
        'mse': mean_squared_error(sample["mask"], predictions_6),
        'tn': tn_6,
        'fp': fp_6,
        'fn': fn_6, 
        'tp': tp_6
    }])

    new_row_full = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_full',
        'accuracy': accuracy_score(sample["mask"], predictions_full),
        'precision': precision_score(sample["mask"], predictions_full),
        'recall': recall_score(sample["mask"], predictions_full),
        'mse': mean_squared_error(sample["mask"], predictions_full),
        'tn': tn_full,
        'fp': fp_full,
        'fn': fn_full,
        'tp': tp_full
    }])

    metrics = pd.concat([metrics, new_row_1, new_row_2, new_row_3, new_row_4, new_row_5, new_row_6, new_row_full], ignore_index=True)
    index = index+1


In [12]:
metrics_total = metrics.groupby('detector')[['accuracy', 'precision', 'recall', 'mse']].mean()
conf_mat_values = metrics.groupby('detector')[['tn', 'fp', 'fn', 'tp']].sum()

In [None]:
metrics_total

In [None]:
conf_mat_values = conf_mat_values.convert_dtypes()
conf_mat_values

In [15]:
from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(gt, preds_1)
J = precision + recall
ix = np.argmax(J)
best_thresh_1 = thresholds[ix]

precision, recall, thresholds = precision_recall_curve(gt, preds_2)
J = precision + recall
ix = np.argmax(J)
best_thresh_2 = thresholds[ix]

precision, recall, thresholds = precision_recall_curve(gt, preds_3)
J = precision + recall
ix = np.argmax(J)
best_thresh_3 = thresholds[ix]

precision, recall, thresholds = precision_recall_curve(gt, preds_4)
J = precision + recall
ix = np.argmax(J)
best_thresh_4 = thresholds[ix]

precision, recall, thresholds = precision_recall_curve(gt, preds_5)
J = precision + recall
ix = np.argmax(J)
best_thresh_5 = thresholds[ix]

precision, recall, thresholds = precision_recall_curve(gt, preds_6)
J = precision + recall
ix = np.argmax(J)
best_thresh_6 = thresholds[ix]

precision, recall, thresholds = precision_recall_curve(gt, preds_full)
J = precision + recall
ix = np.argmax(J)
best_thresh_full = thresholds[ix]


In [16]:
predictions_1 = np.where(preds_1 > best_thresh_1, 1, 0)
predictions_2 = np.where(preds_2 > best_thresh_2, 1, 0)
predictions_3 = np.where(preds_3 > best_thresh_3, 1, 0)
predictions_4 = np.where(preds_4 > best_thresh_4, 1, 0)
predictions_5 = np.where(preds_5 > best_thresh_5, 1, 0)
predictions_6 = np.where(preds_6 > best_thresh_6, 1, 0)
predictions_full = np.where(preds_full > best_thresh_full, 1, 0)

tn_1, fp_1, fn_1, tp_1 = confusion_matrix(gt, predictions_1, labels=[0, 1]).ravel()
tn_2, fp_2, fn_2, tp_2 = confusion_matrix(gt, predictions_2, labels=[0, 1]).ravel()
tn_3, fp_3, fn_3, tp_3 = confusion_matrix(gt, predictions_3, labels=[0, 1]).ravel()
tn_4, fp_4, fn_4, tp_4 = confusion_matrix(gt, predictions_4, labels=[0, 1]).ravel()
tn_5, fp_5, fn_5, tp_5 = confusion_matrix(gt, predictions_5, labels=[0, 1]).ravel()
tn_6, fp_6, fn_6, tp_6 = confusion_matrix(gt, predictions_6, labels=[0, 1]).ravel()
tn_full, fp_full, fn_full, tp_full = confusion_matrix(gt, predictions_full, labels=[0, 1]).ravel()

metrics = pd.DataFrame([{
    'index': index,
    'detector': 'transformer_detector_1',
    'threshold': best_thresh_1,
    'accuracy': accuracy_score(gt, predictions_1),
    'precision': precision_score(gt, predictions_1),
    'recall': recall_score(gt, predictions_1),
    'mse': mean_squared_error(gt, predictions_1), 
    'tn': tn_1,
    'fp': fp_1, 
    'fn': fn_1, 
    'tp': tp_1
},
{
    'index': index,
    'detector': 'transformer_detector_2',
    'threshold': best_thresh_2,
    'accuracy': accuracy_score(gt, predictions_2),
    'precision': precision_score(gt, predictions_2),
    'recall': recall_score(gt, predictions_2),
    'mse': mean_squared_error(gt, predictions_2),
    'tn': tn_2,
    'fp': fp_2, 
    'fn': fn_2, 
    'tp': tp_2
},
{
    'index': index,
    'detector': 'transformer_detector_3',
    'threshold': best_thresh_3,
    'accuracy': accuracy_score(gt, predictions_3),
    'precision': precision_score(gt, predictions_3),
    'recall': recall_score(gt, predictions_3),
    'mse': mean_squared_error(gt, predictions_3),
    'tn': tn_3,
    'fp': fp_3, 
    'fn': fn_3, 
    'tp': tp_3
},
{
    'index': index,
    'detector': 'transformer_detector_4',
    'threshold': best_thresh_4,
    'accuracy': accuracy_score(gt, predictions_4),
    'precision': precision_score(gt, predictions_4),
    'recall': recall_score(gt, predictions_4),
    'mse': mean_squared_error(gt, predictions_4),
    'tn': tn_4,
    'fp': fp_4,
    'fn': fn_4, 
    'tp': tp_4
},
{
    'index': index,
    'detector': 'transformer_detector_5',
    'threshold': best_thresh_5,
    'accuracy': accuracy_score(gt, predictions_5),
    'precision': precision_score(gt, predictions_5),
    'recall': recall_score(gt, predictions_5),
    'mse': mean_squared_error(gt, predictions_5),
    'tn': tn_5,
    'fp': fp_5, 
    'fn': fn_5, 
    'tp': tp_5
},
{
    'index': index,
    'detector': 'transformer_detector_6',
    'threshold': best_thresh_6,
    'accuracy': accuracy_score(gt, predictions_6),
    'precision': precision_score(gt, predictions_6),
    'recall': recall_score(gt, predictions_6),
    'mse': mean_squared_error(gt, predictions_6),
    'tn': tn_6,
    'fp': fp_6,
    'fn': fn_6, 
    'tp': tp_6
},
{
    'index': index,
    'detector': 'transformer_detector_full',
    'threshold': best_thresh_full,
    'accuracy': accuracy_score(gt, predictions_full),
    'precision': precision_score(gt, predictions_full),
    'recall': recall_score(gt, predictions_full),
    'mse': mean_squared_error(gt, predictions_full),
    'tn': tn_full,
    'fp': fp_full,
    'fn': fn_full,
    'tp': tp_full
}]
)

In [None]:
metrics.index=[1,2,3,4,5,6,7]
metrics

In [44]:
metrics.to_csv("metrics.csv")

In [None]:
import matplotlib.pyplot as plt

metrics["precision"].iloc[:-1].plot(label="precision for training on #dataset", xlabel="number of training datasets", ylabel="precision", title="Precision after training for 30000 steps")
plt.plot(7, metrics["precision"].iloc[-1], 'rx', label="precision training on all datasets including validationset")
plt.legend()

In [None]:
import azure.ai.ml._artifacts._artifact_utilities as artifact_utils

data_asset = ml_client.data.get("output_Train_GPU_mask_5TrainDS_noSolar", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_5nS = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=17000-v1.ckpt").cpu()


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix
import pandas as pd

metrics = pd.DataFrame(columns=['detector', 'accuracy', 'precision', 'recall', 'mse'])
index = 0

preds_noSolar = list()
gt = list()

threshold = 0.5

for sample in val:
    example = sample["data"]
    stride  = 64
    window  = transformer_detector_5nS.window
    length  = len(example)

    # add artifact to data
    example_data = torch.tensor(example + sample["artifact"])

    # store prediction for each window
    predictions_noSolar = torch.zeros(((length - window) // stride + 1, length))

    # store number of times each mask was predicted
    masks = torch.zeros(length)
    for i, j in enumerate(range(0, length - window + 1, stride)):
        # slice out window
        s = example_data[j : j + window]
        # make prediction and insert into prediction
        predictions_noSolar[i, j : j + window] = transformer_detector_5nS(s.unsqueeze(0))

        # update count
        masks[j : j + window] += 1

    predictions_noSolar = predictions_noSolar.sum(axis=0) / masks
    preds_noSolar = preds_noSolar + predictions_noSolar.tolist()
    predictions_noSolar = np.where(predictions_noSolar.numpy() > threshold, 1, 0)
    gt = gt + sample["mask"].tolist()

    tn_noSolar, fp_noSolar, fn_noSolar, tp_noSolar = confusion_matrix(sample["mask"], predictions_noSolar, labels=[0, 1]).ravel()

    new_row = pd.DataFrame([{
        'index': index,
        'detector': 'transformer_detector_5noSolar',
        'accuracy': accuracy_score(sample["mask"], predictions_noSolar),
        'precision': precision_score(sample["mask"], predictions_noSolar),
        'recall': recall_score(sample["mask"], predictions_noSolar),
        'mse': mean_squared_error(sample["mask"], predictions_noSolar), 
        'tn': tn_noSolar,
        'fp': fp_noSolar, 
        'fn': fn_noSolar, 
        'tp': tp_noSolar
    }])

    metrics = pd.concat([metrics, new_row], ignore_index=True)
    index = index+1

In [47]:
from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(gt, preds_noSolar)
J = precision + recall
ix = np.argmax(J)
best_thresh_noSolar = thresholds[ix]

In [48]:
predictions_noSolar = np.where(preds_noSolar > best_thresh_noSolar, 1, 0)

tn_noSolar, fp_noSolar, fn_noSolar, tp_noSolar = confusion_matrix(gt, predictions_noSolar, labels=[0, 1]).ravel()

metrics_noSolar = pd.DataFrame([{
    'index': index,
    'detector': 'transformer_detector_noSolar',
    'threshold': best_thresh_noSolar,
    'accuracy': accuracy_score(gt, predictions_noSolar),
    'precision': precision_score(gt, predictions_noSolar),
    'recall': recall_score(gt, predictions_noSolar),
    'mse': mean_squared_error(gt, predictions_noSolar), 
    'tn': tn_noSolar,
    'fp': fp_noSolar, 
    'fn': fn_noSolar, 
    'tp': tp_noSolar
},
]
)

In [None]:
metrics_noSolar

In [50]:
metrics_noSolar.to_csv("metrics_noSolar.csv")