In [4]:
%reload_ext autoreload
%autoreload 2

import sys
sys.path.append('../src_jobs/')

In [5]:
import pickle
from itertools import repeat
from pathlib import Path
import torch
import numpy as np
from artifact import Saw
from detector import WindowTransformerDetector

from data import RealisticArtifactDataset, CachedArtifactDataset, TestArtifactDataset

In [6]:
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fb386692350>

In [7]:
width = 512

london_test = Path("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/data/validation512.london.pkl")

In [8]:
def load_series(names: list[str], split: str, path: str):
    series: list[np.ndarray] = list()
    counts: list[float] = list()
    for name in names:
        with open(f"{path}/{name}_{split}.pickle", "rb") as f:
            raw = [a for a in pickle.load(f) if len(a) > width]
            series.extend(np.array(a).astype(np.float32) for a in raw)
            counts.extend(repeat(1 / len(raw), len(raw)))
    counts = np.array(counts)
    return series, np.divide(counts, np.sum(counts))

In [9]:
val = CachedArtifactDataset(file=london_test)

In [10]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

#autheticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient.from_config(
    credential=credential,
    path="config.json",
)

Found the config file in: config.json


In [11]:
import azure.ai.ml._artifacts._artifact_utilities as artifact_utils

data_asset = ml_client.data.get("output_Train_GPU_full_train", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_full = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=50000.ckpt").cpu()

data_asset = ml_client.data.get("output_Train_GPU_noLondon", version="1")
artifact_utils.download_artifact_from_aml_uri(uri=data_asset.path, destination="./checkpoints_transformer/", datastore_operation=ml_client.datastores)
transformer_detector_noLondon = WindowTransformerDetector.load_from_checkpoint("/workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/checkpoints_transformer/epoch=0-step=17000.ckpt").cpu()

Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using the AzCopyv10 tool for this file transfer.

Example: azcopy copy 'https://m3mlopssadev.blob.core.windows.net/azureml-blobstore-206414f2-5a5c-4209-8dbe-6d0e233cd920/artifactory/' './checkpoints_transformer/' 

See https://docs.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.
Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using the AzCopyv10 tool for this file transfer.

Example: azcopy copy 'https://m3mlopssadev.blob.core.windows.net/azureml-blobstore-206414f2-5a5c-4209-8dbe-6d0e233cd920/artifactory/' './checkpoints_transformer/' 

See https://docs.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.
Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using the AzCopyv10 tool for this file transfer.

Example: azcopy copy 'h

In [12]:
from pytorch_lightning import LightningModule
from detector import WindowTransformerDetector
from torch.nn import Linear, Sigmoid
from pytorch_lightning.utilities import grad_norm
from torch.nn.functional import binary_cross_entropy
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchmetrics.classification import (
    BinaryAccuracy,
    BinaryF1Score,
)

class FineTunedSlidingWindowDetector(LightningModule):
    
    def __init__(
        self,
        pretrainedModel: WindowTransformerDetector, 
        pooling: str = 'avg',
        act_fct = Sigmoid()
    ):
        super().__init__()
        self.window = pretrainedModel.window
        self.loss = 'label'
        self.loss_boost_fp = pretrainedModel.loss_boost_fp
        self.save_hyperparameters()
        # convolutional layers to extract features
        self.convolutions = pretrainedModel.convolutions
        # positional encoding for transformer
        self.position = pretrainedModel.position
        self.dropout = pretrainedModel.dropout
        # transformer block
        self.transformer = pretrainedModel.transformer
        # final linear block to convert each
        # feature to a single value
        self.pooling = pooling
        self.linear = Linear(pretrainedModel.convolutions[-2].out_channels, 1)
        self.act_fct = act_fct
        self.f1_score = BinaryF1Score(multidim_average="global")
        self.accuracy = BinaryAccuracy(multidim_average="global")

    def forward(self, x):
        """
        Input: (batch, window)
        Output: (batch, window)
        """
        x = x.unsqueeze(1)
        x = self.convolutions(x)
        # transpose to use convolution features
        # as datapoint embeddings
        x = x.transpose(1, 2)
        # apply position and transformer block
        x = self.position(x)
        # apply dropout before transformer
        x = self.dropout(x)
        x = self.transformer(x)
        # different options of pooling
        if self.pooling == "cls":
            x = x[:,0,:]
        elif self.pooling == "max":
            x = x.max(1)[0]
        elif self.pooling == "avg":
            x = x.mean(1)
        else: 
            # another linear layer instead of pooling
            x = x.transpose(1, 2)
            x = Linear(self.window, 1)(x)
            x = x.squeeze()
        # convert output tokens back to predictions
        x = self.linear(x)
        x = x.squeeze()
        # activation function for binary classification
        x = self.act_fct(x)
        return x

    def training_step(self, batch, _):
        y = self.forward(batch["data"] + batch["artifact"])
        m = batch[self.loss].float()
        loss = binary_cross_entropy(y, m)
        if self.loss_boost_fp > 0 and self.loss_boost_fp <= 1:
            loss_fp = binary_cross_entropy(y[m == 0], m[m == 0])
            loss += self.loss_boost_fp * loss_fp
            self.log("train_fp", loss_fp.item())
        accuracy = self.accuracy(y, m)
        f1_score = self.f1_score(y, m)
        self.log_dict(
            {
                "train_loss": loss.item(),
                "train_accuracy": accuracy,
                "train_f1_score": f1_score,
            },
            on_step=True,
            on_epoch=True,
            prog_bar=True,
        )
        self.log("train", loss.item())
        return loss

    def validation_step(self, batch, _):
        y = self.forward(batch["data"] + batch["artifact"])
        m = batch[self.loss].float()
        loss = binary_cross_entropy(y, m)
        loss_fp = binary_cross_entropy(y[m == 0], m[m == 0])
        self.log("validation", loss.item())
        self.log("validation_fp", loss_fp.item())
        return loss

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=1e-3)
        scheduler = ReduceLROnPlateau(
            optimizer,
            mode="min",
            factor=0.1,
            patience=1,
            min_lr=1e-6,
            verbose=True,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "step",
                "frequency": 1000,
                "name": "learning_rate",
                "strict": True,
                "monitor": "validation",
            },
        }

    def on_before_optimizer_step(self, _):
        self.log_dict(grad_norm(self, norm_type=2))

In [13]:
from utilities import parameters_k
from datetime import datetime

model = FineTunedSlidingWindowDetector(
    pretrainedModel = transformer_detector_noLondon, 
    pooling = 'avg',
    act_fct = Sigmoid()
    )

model_name = f"{model.__class__.__name__}_{parameters_k(model)}_{datetime.now().strftime('%d-%m-%Y_%H:%M:%S')}"

  rank_zero_warn(
  rank_zero_warn(


In [14]:
val_path = "/workspaces/AICoE_Ramping_Artefacts/artifactory-master/data/validation_slidingWindow_512.pkl"
input_path = "/workspaces/AICoE_Ramping_Artefacts/artifactory-master/data/processed"
output_path = "/workspaces/AICoE_Ramping_Artefacts/artifactory-master/data/output"

In [15]:
train_datasets = [
    "australian_electricity_demand_dataset",
    "electricity_hourly_dataset",
    "electricity_load_diagrams",
    "HouseholdPowerConsumption1",
    #"HouseholdPowerConsumption2",
    #"london_smart_meters_dataset_without_missing_values",
    "solar_10_minutes_dataset",
    "wind_farms_minutely_dataset_without_missing_values",
]

In [16]:
import mlflow

def print_auto_logged_info(r):
    tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [
        f.path for f in mlflow.MlflowClient().list_artifacts(r.info.run_id, "model")
    ]
    print(f"run_id: {r.info.run_id}")
    print(f"artifacts: {artifacts}")
    print(f"params: {r.data.params}")
    print(f"metrics: {r.data.metrics}")
    print(f"tags: {tags}")

In [17]:
from artifact import Saw_centered
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import MLFlowLogger
from torch.utils.data import DataLoader
from data import CenteredArtifactDataset

batch_size=32

# validation
val_file = Path(f"{val_path}")
val = CachedArtifactDataset(file=val_file)
val_loader = DataLoader(val, batch_size=batch_size)

# train
train_data, train_weights = load_series(train_datasets, "TRAIN", str(input_path))
train_dataset = CenteredArtifactDataset(
    train_data,
    width=model.window,
    padding=64,
    artifact=Saw_centered(),
    weight=train_weights,
)
train_loader = DataLoader(train_dataset, batch_size=batch_size)

# sanity check
print("sanity Check: ")
batch = next(iter(train_loader))
batch["data"]

# initialize callbacks
checkpointcallback = ModelCheckpoint(
    monitor="validation",
    mode="min",
    save_top_k=1,
    dirpath=output_path,
)
lr_monitor = LearningRateMonitor(logging_interval="step")

# initialize logger
logger = MLFlowLogger(
    log_model="all",
    run_name=model_name,
    experiment_name="transformer_detector_iter_ds",
    tracking_uri=mlflow.get_tracking_uri(),
)
print("mlflow_uri: ", mlflow.get_tracking_uri())

# initialize trainer
trainer = Trainer(
    logger=logger,
    max_steps=100,
    val_check_interval=20,
    callbacks=[checkpointcallback, lr_monitor],
    # accelerator="gpu",
    # devices=1,
)

# Set mlflow_experiment:
mlflow.set_experiment("transformer_detector_iter_ds")
# Auto log all MLflow entities
mlflow.pytorch.autolog(log_every_n_step=10)

# Train the model.
with mlflow.start_run(
    # nested=False,
    run_name=model_name,
    # experiment_id="transformer_detector_iter_ds",
) as run:
    print("Starting training.")
    trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

print("Training completed.")

# Fetch the auto logged parameters and metrics.
print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id))

# # Save model:
# mlflow.pytorch.save_model(pytorch_model=model, path=[output_path + "_model"])

# model.save()

# End mlflow run:
mlflow.end_run()

print("Job completed successfully.")


sanity Check: 
mlflow_uri:  file:///workspaces/AICoE_Ramping_Artefacts/artifactory-master/notebooks/mlruns


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name         | Type                        | Params
-------------------------------------------------------------
0 | convolutions | Sequential                  | 503 K 
1 | position     | SinusoidalPositionEmbedding | 0     
2 | dropout      | Dropout                     | 0     
3 | transformer  | TransformerEncoder          | 25.4 K
4 | linear       | Linear                      | 33    
5 | act_fct      | Sigmoid                     | 0     
6 | f1_score     | BinaryF1Score               | 0     
7 | accuracy     | BinaryAccuracy              | 0     
-------------------------------------------------------------
528 K     Trainable params
0         Non-trainable params
528 K     Total params
2.116     Total estimated model params size (MB)


Starting training.
Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: : 100it [03:18,  1.99s/it, v_num=9861, train_loss_step=0.357, train_accuracy_step=0.812, train_f1_score_step=0.750, train_loss_epoch=0.436, train_accuracy_epoch=0.796, train_f1_score_epoch=0.717]

`Trainer.fit` stopped: `max_steps=100` reached.


Epoch 0: : 100it [03:18,  1.99s/it, v_num=9861, train_loss_step=0.357, train_accuracy_step=0.812, train_f1_score_step=0.750, train_loss_epoch=0.436, train_accuracy_epoch=0.796, train_f1_score_epoch=0.717]


: 