In [1]:
import os

os.chdir("..")
print(f"Changed working directory to: {os.getcwd()}")

Changed working directory to: /home/jovyan/work/FlareSense


In [2]:
import torch
import mlflow
import dagshub
import torch.nn as nn
import torch.optim as optim
import src.utils.data as data
import pytorch_lightning as pl
import torchvision.models as models

from torchvision import transforms
from torchmetrics.classification import BinaryPrecision, BinaryRecall

mlflow.pytorch.autolog()
torch.set_float32_matmul_precision('high')


* 'schema_extra' has been renamed to 'json_schema_extra'
2023-11-07 10:34:48.423837: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-07 10:34:48.489329: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-07 10:34:48.504256: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-07 10:34:48.856257: W tensorflow/stream_executor/platform/defaul

In [3]:
class ResNet50BinaryClassifier(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.resnet50 = models.resnet50()
        num_features = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Sequential(nn.Linear(num_features, 1), nn.Sigmoid())

        # Deaktivieren der Gradienten für alle Layers
        for param in self.resnet50.parameters():
            param.requires_grad = False

        # Aktivieren der Gradienten ab `layer3`
        layers_to_train = ['layer3', 'layer4', 'avgpool', 'fc']
        for name, child in self.resnet50.named_children():
            if name in layers_to_train:
                for param in child.parameters():
                    param.requires_grad = True


        # Initialisierung der Metriken
        self.precision = BinaryPrecision(threshold=0.5)
        self.recall = BinaryRecall(threshold=0.5)

        # Initialisieren der Listen
        self.test_labels = []
        self.test_preds = []
        self.val_outputs = []
        self.val_labels = []
        self.val_preds = []

    def forward(self, x):
        return self.resnet50(x)

    def __step(self, batch):
        images, info = batch

        binary_labels = [0 if label == "no_burst" else 1 for label in info['label']]
        binary_labels = torch.tensor(binary_labels).float().view(-1, 1)
        binary_labels = binary_labels.to(images.device)

        images = images.expand(-1, 3, -1, -1)
        outputs = self(images)
        return outputs, binary_labels

    def training_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)
        loss = nn.BCELoss()(outputs, binary_labels)

        self.log("train_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)
        loss = nn.BCELoss()(outputs, binary_labels)

        # Labels und Vorhersagen für spätere Verwendung speichern
        self.test_labels.append(binary_labels)
        self.test_preds.append(outputs)

        # Berechnen und protokollieren des Verlusts
        self.log(
            "test_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True
        )
        return loss

    def on_test_epoch_end(self):
        # Konvertieren der gesammelten Daten in einzelne Tensoren
        test_labels = torch.cat(self.test_labels, dim=0)
        test_preds = torch.cat(self.test_preds, dim=0)

        # Berechnen der Metriken
        precision = self.precision(test_preds, test_labels)
        recall = self.recall(test_preds, test_labels)

        # Protokollieren der Metriken
        self.log("test_precision", precision)
        self.log("test_recall", recall)

        # Bereinigen der Listen für die nächste Epoche
        self.test_labels = []
        self.test_preds = []

    def validation_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)

        # Labels und Vorhersagen für die spätere Verwendung speichern
        predictions = (outputs >= 0.5).int()
        self.val_labels.append(binary_labels.int())
        self.val_preds.append(predictions)

    def on_validation_epoch_end(self):
        # Alle Validierungsdaten wurden gesammelt, und wir sind am Ende der Epoche.
        val_labels = torch.cat(self.val_labels, dim=0)
        val_preds = torch.cat(self.val_preds, dim=0)

        # Berechnen der Metriken
        precision = self.precision(val_preds, val_labels)
        recall = self.recall(val_preds, val_labels)

        # Protokollieren der Metriken
        self.log("val_precision", precision)
        self.log("val_recall", recall)

        # Vergessen Sie nicht, die Listen für die nächste Validierungsrunde zu leeren
        self.val_labels = []
        self.val_preds = []

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001, weight_decay=0)

In [4]:
# Erstellen Sie eine Instanz des Modells
model = ResNet50BinaryClassifier()
data_folder_path = "data/raw/burst_images/"

data_module = data.ECallistoDataModule(
    data_folder=data_folder_path,
    transform=transforms.Compose(
        [
            transforms.ToPILImage(),
            transforms.Resize((193, 240), antialias=True),
            transforms.ToTensor(),
        ]
    ),
    batch_size=64,
    num_workers=16,
    val_ratio=0.15,
    test_ratio=0.15,
    split_by_date=True,
    filter_instruments=["australia_assa_02"],
)
data_module.setup()

dagshub.init("FlareSense", "FlareSense", mlflow=True)
mlflow.start_run()
# Erstellen Sie einen Trainer für das Training
trainer = pl.Trainer(max_epochs=50)

# Starten Sie das Training
trainer.fit(
    model,
    train_dataloaders=data_module.train_dataloader(),
    val_dataloaders=data_module.val_dataloader(),
)

# Starten Sie die Tests
trainer.test(model, dataloaders=data_module.test_dataloader())

# Beenden Sie die MLflow-Sitzung
mlflow.end_run()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type            | Params
----------------------------------------------
0 | resnet50  | ResNet          | 23.5 M
1 | precision | BinaryPrecision | 0     
2 | recall    | BinaryRecall    | 0     
----------------------------------------------
22.1 M    Trainable params
1.4 M     Non-trainable params
23.5 M    Total params
94.040    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

