In [1]:
%reload_ext autoreload
%autoreload 2   

import sys
sys.path.append('../artitect/')

In [3]:
import torch
import pickle
import warnings
import numpy as np
from torch.utils.data import DataLoader
from pathlib import Path
from itertools import repeat
from artifact import Saw, Saw_centered, Saw_centered_Francois
from data import ArtifactDataset, CachedArtifactDataset, RealisticArtifactDataset, CenteredArtifactDataset
from datetime import datetime
from sliding_window_detector import SlidingWindowTransformerDetector
from utilities import parameters_k
import pytz

# stop warnings
torch.set_float32_matmul_precision("high")
warnings.filterwarnings("ignore", ".*does not have many workers.*")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
# Storing hyperparameters as a dictionary, because we can directly log this config dict to W&B.
CONFIG = dict(
    # width of window
    width = 64,
    convolution_features=[256, 128, 64, 32],
    convolution_width=[5, 9, 17, 33],
    convolution_dropout=0.0,
    transformer_heads=2,
    transformer_feedforward=128,
    transformer_layers=2,
    transformer_dropout=0,
    loss="label",
    loss_boost_fp=0,
    
    artifact=Saw(),
    # Optimizer Parameter

    # LearningRate Scheduler
    
    # parameters for study
    batch_size = 32, # 'values': [32, 64, 128]
    
    wandb_group_name = "test_setup",
    wandb_project_name = "artifactory"
)

All settings.

In [6]:
val_file = Path(f"../data/val_allnoCinECGT{CONFIG['width']}.pkl")
val_datasets = [
    "australian_electricity_demand_dataset",
    "electricity_hourly_dataset",
    "electricity_load_diagrams",
    "HouseholdPowerConsumption1",
    "london_smart_meters_dataset_without_missing_values",
    "solar_10_minutes_dataset",
    "wind_farms_minutely_dataset_without_missing_values",
    'ACSF1',
    # 'CinCECGTorso',
    'HouseTwenty',
    'Mallat',
    'MixedShapesRegularTrain',
    'Phoneme',
    'PigArtPressure',
    'PigCVP',
    'Rock',
    'SemgHandGenderCh2',
    'mitbih',
    'ptbdb',
    'etth',
    'ettm'
]

In [7]:
# model
model = SlidingWindowTransformerDetector(window=CONFIG["width"],                    
                                  convolution_features=CONFIG["convolution_features"],
                                  convolution_width=CONFIG["convolution_width"],
                                  convolution_dropout=CONFIG["convolution_dropout"],
                                  transformer_heads=CONFIG["transformer_heads"],
                                  transformer_feedforward=CONFIG["transformer_feedforward"],
                                  transformer_layers=CONFIG["transformer_layers"],
                                  transformer_dropout=CONFIG["transformer_dropout"],
                                  loss=CONFIG["loss"],
                                  loss_boost_fp=CONFIG["loss_boost_fp"])
# model = ConvolutionDetector(convolution_features=[128, 64, 32],
#                             convolution_width=[5, 9, 33],
#                             convolution_dilation=[1, 1, 1],
#                             convolution_dropout=0.0,
#                             convolution_activation="sigmoid")
model_name = f"{model.__class__.__name__}_{parameters_k(model)}_{datetime.now(pytz.timezone('Europe/Amsterdam')).strftime('%d-%m-%Y_%H:%M:%S')}"
CONFIG['wandb_run_name'] = model_name


train_datasets = [
    # 'CinCECGTorso', # do not train on this dataset for validation purposes
    "ETTm",  # 1
    "ETTh",  # 2
    "electricity_load_diagrams",  # 3
    "australian_electricity_demand_dataset",  # 4
    "Phoneme",  # 5
    "electricity_hourly_dataset",  # 6
    "HouseholdPowerConsumption1",  # 7
    "london_smart_meters_dataset_without_missing_values",  # 8
    "SemgHandGenderCh2",  # 9
    "PigCVP",  # 10
    "HouseTwenty",  # 11
    "wind_farms_minutely_dataset_without_missing_values",  # 12
    "ptbdb",  # 13
    "mitbih",  # 14
    "PigArtPressure",  # 15
    "solar_10_minutes_dataset",  # 16
    "Mallat",  # 17
    "MixedShapesRegularTrain",  # 18
    "Rock",  # 19
    "ACSF1",  # 20
]
print(model_name)

SlidingWindowTransformerDetector_528.96K_04-07-2024_13:30:37


  rank_zero_warn(


Loading data.

In [8]:
def load_series(names: list[str], split: str, path: str):
    series: list[np.ndarray] = list()
    counts: list[float] = list()
    for name in names:
        try:
            with open(f"{path}/{name}_{split}.pickle", "rb") as f:
                raw = [a for a in pickle.load(f) if len(a) > CONFIG['width']]
                series.extend(np.array(a).astype(np.float32) for a in raw)
                counts.extend(repeat(1 / len(raw), len(raw)))
        except:
            print(f"Dataset {name} not in input folder!")
    counts = np.array(counts)
    return series, np.divide(counts, np.sum(counts))

In [9]:
# train
train_data, train_weights = load_series(train_datasets, "TRAIN", path="../data/processed")
train_dataset = CenteredArtifactDataset(train_data,
                                width=CONFIG["width"],
                                padding=64,
                                artifact=CONFIG["artifact"],
                                weight=train_weights) 
train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"])

Dataset ETTm not in input folder!
Dataset ETTh not in input folder!
Dataset electricity_load_diagrams not in input folder!
Dataset australian_electricity_demand_dataset not in input folder!
Dataset Phoneme not in input folder!
Dataset electricity_hourly_dataset not in input folder!
Dataset HouseholdPowerConsumption1 not in input folder!
Dataset london_smart_meters_dataset_without_missing_values not in input folder!
Dataset HouseTwenty not in input folder!
Dataset wind_farms_minutely_dataset_without_missing_values not in input folder!
Dataset ptbdb not in input folder!
Dataset mitbih not in input folder!
Dataset PigArtPressure not in input folder!
Dataset solar_10_minutes_dataset not in input folder!
Dataset Mallat not in input folder!
Dataset MixedShapesRegularTrain not in input folder!
Dataset Rock not in input folder!
Dataset ACSF1 not in input folder!


In [13]:
val_data, val_weights = load_series(val_datasets, "VAL", "../data/processed")


Dataset australian_electricity_demand_dataset not in input folder!
Dataset electricity_hourly_dataset not in input folder!
Dataset electricity_load_diagrams not in input folder!
Dataset HouseholdPowerConsumption1 not in input folder!
Dataset london_smart_meters_dataset_without_missing_values not in input folder!
Dataset solar_10_minutes_dataset not in input folder!
Dataset wind_farms_minutely_dataset_without_missing_values not in input folder!
Dataset ACSF1 not in input folder!
Dataset HouseTwenty not in input folder!
Dataset Mallat not in input folder!
Dataset MixedShapesRegularTrain not in input folder!
Dataset Phoneme not in input folder!
Dataset PigArtPressure not in input folder!
Dataset Rock not in input folder!
Dataset mitbih not in input folder!
Dataset ptbdb not in input folder!
Dataset etth not in input folder!
Dataset ettm not in input folder!


In [14]:
train_data, train_weights = load_series(val_datasets, "TRAIN", "../data/processed")
val_data, val_weights = load_series(val_datasets, "VAL", "../data/peocessed")

Dataset australian_electricity_demand_dataset not in input folder!
Dataset electricity_hourly_dataset not in input folder!
Dataset electricity_load_diagrams not in input folder!
Dataset HouseholdPowerConsumption1 not in input folder!
Dataset london_smart_meters_dataset_without_missing_values not in input folder!
Dataset solar_10_minutes_dataset not in input folder!
Dataset wind_farms_minutely_dataset_without_missing_values not in input folder!
Dataset ACSF1 not in input folder!
Dataset HouseTwenty not in input folder!
Dataset Mallat not in input folder!
Dataset MixedShapesRegularTrain not in input folder!
Dataset Phoneme not in input folder!
Dataset PigArtPressure not in input folder!
Dataset Rock not in input folder!
Dataset mitbih not in input folder!
Dataset ptbdb not in input folder!
Dataset etth not in input folder!
Dataset ettm not in input folder!
Dataset australian_electricity_demand_dataset not in input folder!
Dataset electricity_hourly_dataset not in input folder!
Dataset el

In [18]:
input_path = "../data/processed"

if not val_file.exists():
    data, weights = load_series(val_datasets, split="VAL", path=input_path)
    val_gen = CenteredArtifactDataset(data,
                              width=CONFIG["width"],
                              padding=16,
                              artifact=Saw_centered_Francois(),
                              weight=weights
                              )
    val = CachedArtifactDataset.generate(val_gen,
                                         n=2048,
                                         to=val_file)
else:
    val = CachedArtifactDataset(file=val_file)
val_loader = DataLoader(val, batch_size=CONFIG["batch_size"])

Dataset australian_electricity_demand_dataset not in input folder!
Dataset electricity_hourly_dataset not in input folder!
Dataset electricity_load_diagrams not in input folder!
Dataset HouseholdPowerConsumption1 not in input folder!
Dataset london_smart_meters_dataset_without_missing_values not in input folder!
Dataset solar_10_minutes_dataset not in input folder!
Dataset wind_farms_minutely_dataset_without_missing_values not in input folder!
Dataset ACSF1 not in input folder!
Dataset HouseTwenty not in input folder!
Dataset Mallat not in input folder!
Dataset MixedShapesRegularTrain not in input folder!
Dataset Phoneme not in input folder!
Dataset PigArtPressure not in input folder!
Dataset Rock not in input folder!
Dataset mitbih not in input folder!
Dataset ptbdb not in input folder!
Dataset etth not in input folder!
Dataset ettm not in input folder!
