In [63]:
%reload_ext autoreload
%autoreload 2   

import sys
sys.path.append('../src_jobs/')

In [64]:
import torch
import pickle
import warnings
import numpy as np
import mlflow
from lightning.pytorch import Trainer
from lightning.pytorch.loggers import MLFlowLogger
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor, RichProgressBar
from torch.utils.data import DataLoader
from pathlib import Path
from itertools import repeat
from artifact import Saw
from data import ArtifactDataset, CachedArtifactDataset
from detector import ConvolutionDetector
from utilities import parameters_k
from datetime import datetime
import pytz

# stop warnings
torch.set_float32_matmul_precision("high")
warnings.filterwarnings("ignore", ".*does not have many workers.*")

In [65]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [66]:
# # width of window
width = 512
# convolution_features = [256, 128, 64, 32]
# convolution_width = [5, 9, 17, 33]
# convolution_dropout = 0.0
# transformer_heads = 2
# transformer_feedforward = 128
# transformer_layers = 2
# transformer_dropout = 0
# loss = "mask"
# loss_boost_fp = 0
artifact = Saw(min_width=4, max_width=32)
# # Optimizer Parameter
# # LearningRate Scheduler
# # parameters for study
batch_size = 128  # 'values': [32, 64, 128]
group_name = "test_setup"
project_name = "artifactory"

All settings.

In [67]:
# model
model = ConvolutionDetector(convolution_features=[32, 16],
                            convolution_width=[3, 5],
                            convolution_dilation=[1, 1],
                            convolution_dropout=0.0,
                            convolution_activation="sigmoid")
model_name = f"{model.__class__.__name__}_{parameters_k(model)}_{datetime.now().strftime('%d-%m-%Y_%H:%M:%S')}"
run_name = model_name

val_datasets = [
    "australian_electricity_demand_dataset",
    # "electricity_hourly_dataset",
    # "electricity_load_diagrams",
    # "HouseholdPowerConsumption1",
    # "HouseholdPowerConsumption2",
    # "london_smart_meters_dataset_without_missing_values",
    # "solar_10_minutes_dataset",
    # "wind_farms_minutely_dataset_without_missing_values",
]
train_datasets = [
    "australian_electricity_demand_dataset",
    # "electricity_hourly_dataset",
    # "electricity_load_diagrams",
    # "HouseholdPowerConsumption1",
    # "HouseholdPowerConsumption2",
    # "london_smart_meters_dataset_without_missing_values",
    # "solar_10_minutes_dataset",
    # "wind_farms_minutely_dataset_without_missing_values",
]
print(model_name)

ConvolutionDetector_2.70K_12-01-2024_14:26:14


Loading data.

In [68]:
def load_series(names: list[str], split: str):
    series = list()
    counts = list()
    for name in names:
        with open(f"../data/processed/{name}_{split}.pickle", "rb") as f:
            raw = [a for a in pickle.load(f) if len(a) > width]
            series.extend(np.array(a).astype(np.float32) for a in raw)
            counts.extend(repeat(1 / len(raw), len(raw)))
    counts = np.array(counts)
    return series, counts / counts.sum()

In [69]:
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    # We don't need to keep track of gradients here so we wrap it in torch.no_grad()
    with torch.no_grad():
        # Loop through the data
        for x, y in loader:

            # Get to correct shape
            x = x.reshape(x.shape[0], -1)

            # Forward pass
            scores = model(x)
            _, predictions = scores.max(1)

            # Check how many we got correct
            num_correct += (predictions == y).sum()

            # Keep track of number of samples
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples


In [70]:
# train
train_data, train_weights = load_series(train_datasets, "TRAIN")
train_dataset = ArtifactDataset(train_data,
                                width=width,
                                padding=64,
                                artifact=artifact,
                                weight=train_weights) 
train_loader = DataLoader(train_dataset, batch_size=batch_size)

In [71]:
val_file = Path(f"../data/validation{width}.australian.pkl")
# validation
if not val_file.exists():
    val_data, val_weights = load_series(val_datasets, "TEST")
    val_gen = ArtifactDataset(val_data,
                              width=width,
                              padding=64,
                              artifact=artifact,
                              weight=val_weights)
    val = CachedArtifactDataset.generate(val_gen,
                                         n=2048,
                                         to=val_file)
else:
    val = CachedArtifactDataset(file=val_file)
val_loader = DataLoader(val, batch_size=batch_size)

Sanity check.

In [72]:
batch = next(iter(train_loader))
batch["data"]

tensor([[0.5308, 0.5090, 0.5026,  ..., 0.4493, 0.4551, 0.4724],
        [0.3037, 0.3061, 0.3091,  ..., 0.1730, 0.1670, 0.1652],
        [0.2437, 0.2393, 0.2332,  ..., 0.1914, 0.1813, 0.1786],
        ...,
        [0.3147, 0.2959, 0.2694,  ..., 0.4088, 0.4269, 0.4550],
        [0.2275, 0.2498, 0.2953,  ..., 0.5478, 0.5325, 0.5079],
        [0.4945, 0.4959, 0.4924,  ..., 0.1746, 0.1923, 0.2314]])

In [73]:
# mlflow.set_tracking_uri(mlflow_uri)
mlflow.set_experiment("Training_mlFLow_tests")
mlflow.pytorch.autolog()
mlflow.start_run(run_name="baseline_1")
mlflow.set_tracking_uri("http://127.0.0.1:5000")



In [74]:
mlflow.get_experiment(mlflow.active_run().info.experiment_id).name

'Training_mlFLow_tests'

In [75]:

# from azureml.core.run import Run
# run = Run.get_context()
# mlflow_url = run.experiment.workspace.get_mlflow_tracking_uri()
# mlf_logger = MLFlowLogger(experiment_name=run.experiment.name, tracking_uri=mlflow_url)
# mlf_logger._run_id = run.id
# trainer.logger = mlf_logger

# run.complete()

Training!

In [76]:
# initialize callbacks
checkpointcallback = ModelCheckpoint(monitor="validation",
                                     mode="min",
                                     save_top_k=1)
lr_monitor = LearningRateMonitor(logging_interval='step')

# initialize logger
logger = MLFlowLogger(
                    log_model="all",
                    experiment_name=mlflow.get_experiment(mlflow.active_run().info.experiment_id).name,
                    tracking_uri=mlflow.get_tracking_uri(),
                    run_id=mlflow.active_run().info.run_id
    )

# initialize trainer
trainer = Trainer(logger=logger,
                  max_steps=1000,
                  val_check_interval=500,
                  callbacks=[checkpointcallback,
                             lr_monitor, 
                             # RichProgressBar()
                             ])

# train
trainer.fit(model,
            train_dataloaders=train_loader,
            val_dataloaders=val_loader)


trainer.validate(model, val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type           | Params
------------------------------------------------
0 | convolutions | Sequential     | 2.7 K 
1 | f1_score     | BinaryF1Score  | 0     
2 | accuracy     | BinaryAccuracy | 0     
------------------------------------------------
2.7 K     Trainable params
0         Non-trainable params
2.7 K     Total params
0.011     Total estimated model params size (MB)


Epoch 0: : 1000it [00:36, 27.60it/s, v_num=d322, train_loss_step=0.0335, train_accuracy_step=0.964, train_f1_score_step=0.000, train_loss_epoch=0.0437, train_accuracy_epoch=0.953, train_f1_score_epoch=0.00156]

`Trainer.fit` stopped: `max_steps=1000` reached.


Epoch 0: : 1000it [00:36, 27.59it/s, v_num=d322, train_loss_step=0.0335, train_accuracy_step=0.964, train_f1_score_step=0.000, train_loss_epoch=0.0437, train_accuracy_epoch=0.953, train_f1_score_epoch=0.00156]




Validation DataLoader 0: 100%|██████████| 16/16 [00:00<00:00, 96.76it/s] 


[{'validation': 0.03300036862492561, 'validation_fp': 0.0033573145046830177}]

In [1]:
mlflow.end_run()

NameError: name 'mlflow' is not defined