In [1]:
import os

os.chdir("..")
print(f"Changed working directory to: {os.getcwd()}")

Changed working directory to: /Users/patrickschuermann/Documents/GitHub/FlareSense


In [2]:
import torch
import mlflow
import dagshub
import torch.nn as nn
import torch.optim as optim
import src.utils.data as data
import pytorch_lightning as pl
import torchvision.models as models

from torchvision import transforms
from torchmetrics.classification import BinaryPrecision, BinaryRecall

mlflow.pytorch.autolog()
torch.set_float32_matmul_precision('high')

In [3]:
class ResNet50BinaryClassifier(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.resnet50 = models.resnet50()
        self.resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_features = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Linear(num_features, 1)

        # Deaktivieren der Gradienten für alle Layers außer dem letzten
        for param in self.resnet50.parameters():
            param.requires_grad = False
        for param in self.resnet50.fc.parameters():
            param.requires_grad = True


        # Initialisierung der Metriken
        self.precision = BinaryPrecision(threshold=0.5)
        self.recall = BinaryRecall(threshold=0.5)

        # Initialisieren der Listen
        self.test_labels = []
        self.test_preds = []
        self.val_outputs = []
        self.val_labels = []
        self.val_preds = []

    def forward(self, x):
        return self.resnet50(x)

    def __step(self, batch):
        images, info = batch

        binary_labels = [0 if label == "no_burst" else 1 for label in info['label']]
        binary_labels = torch.tensor(binary_labels).float().view(-1, 1)
        binary_labels = binary_labels.to(images.device)

        outputs = self(images)
        return outputs, binary_labels

    def training_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)
        loss = nn.BCEWithLogitsLoss()(outputs, binary_labels)

        self.log("train_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)
        loss = nn.BCEWithLogitsLoss()(outputs, binary_labels)

        # Labels und Vorhersagen für spätere Verwendung speichern
        self.test_labels.append(binary_labels)
        self.test_preds.append(outputs)

        # Berechnen und protokollieren des Verlusts
        self.log(
            "test_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True
        )
        return loss

    def on_test_epoch_end(self):
        # Konvertieren der gesammelten Daten in einzelne Tensoren
        test_labels = torch.cat(self.test_labels, dim=0)
        test_preds = torch.cat(self.test_preds, dim=0)

        # Berechnen der Metriken
        precision = self.precision(test_preds, test_labels)
        recall = self.recall(test_preds, test_labels)

        # Protokollieren der Metriken
        self.log("test_precision", precision)
        self.log("test_recall", recall)

        # Bereinigen der Listen für die nächste Epoche
        self.test_labels = []
        self.test_preds = []

    def validation_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)

        # Labels und Vorhersagen für die spätere Verwendung speichern
        predictions = (outputs >= 0.5).int()
        self.val_labels.append(binary_labels.int())
        self.val_preds.append(predictions)

    def on_validation_epoch_end(self):
        # Alle Validierungsdaten wurden gesammelt, und wir sind am Ende der Epoche.
        val_labels = torch.cat(self.val_labels, dim=0)
        val_preds = torch.cat(self.val_preds, dim=0)

        # Berechnen der Metriken
        precision = self.precision(val_preds, val_labels)
        recall = self.recall(val_preds, val_labels)

        # Protokollieren der Metriken
        self.log("val_precision", precision)
        self.log("val_recall", recall)

        # Vergessen Sie nicht, die Listen für die nächste Validierungsrunde zu leeren
        self.val_labels = []
        self.val_preds = []

    def configure_optimizers(self):
        return optim.Adam(self.resnet50.fc.parameters(), lr=0.001, weight_decay=0)

In [5]:
# Erstellen Sie eine Instanz des Modells
model = ResNet50BinaryClassifier()
data_folder_path = "data/raw/burst_images/"

data_module = data.ECallistoDataModule(
    data_folder=data_folder_path,
    transform=transforms.Compose(
        [
            transforms.ToPILImage(),
            transforms.Resize((193, 240), antialias=True),
            transforms.ToTensor(),
        ]
    ),
    batch_size=64,
    num_workers=16,
    val_ratio=0.15,
    test_ratio=0.15,
    split_by_date=True,
    filter_instruments=["australia_assa_02"],
)
data_module.setup()

dagshub.init("FlareSense", "FlareSense", mlflow=True)
mlflow.start_run()
# Erstellen Sie einen Trainer für das Training
trainer = pl.Trainer(max_epochs=1)

# Starten Sie das Training
trainer.fit(
    model,
    train_dataloaders=data_module.train_dataloader(),
    val_dataloaders=data_module.val_dataloader(),
)

# Starten Sie die Tests
trainer.test(model, dataloaders=data_module.test_dataloader())

# Speichern Sie das Modell
torch.save(model.state_dict(), "models/ResNet50BinaryClassifier.pth")

# Beenden Sie die MLflow-Sitzung
mlflow.end_run()

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name      | Type            | Params
----------------------------------------------
0 | resnet50  | ResNet          | 23.5 M
1 | precision | BinaryPrecision | 0     
2 | recall    | BinaryRecall    | 0     
----------------------------------------------
2.0 K

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing: |          | 0/? [00:00<?, ?it/s]

/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 64. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 10. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
