In [11]:
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from torchvision import transforms
from sklearn.metrics import precision_recall_fscore_support, classification_report
from torchmetrics.classification import BinaryPrecision, BinaryRecall

In [12]:
import torchmetrics
torchmetrics.__version__

'1.0.0'

In [13]:
import os

# go to root directory if needed
print(f"Current working directory: {os.getcwd()}")
if os.getcwd().split("/")[-1] == "notebooks":
    os.chdir("..")
    print(f"Changed working directory to: {os.getcwd()}")

Current working directory: /Users/patrickschuermann/Documents/GitHub/FlareSense


In [14]:
import src.utils.data as data


In [20]:
# Definieren Sie das PyTorch Lightning-Modell

class ResNet50BinaryClassifier(pl.LightningModule):
    def __init__(self):
        super().__init__()
        # Laden Sie das vordefinierte ResNet-50-Modell ohne den letzten Klassifikations-Layer
        self.resnet50 = models.resnet50(pretrained=False)
        self.resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_features = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Linear(num_features, 1) 

        # Initialisierung der Metriken
        self.precision = BinaryPrecision(threshold=0.5)
        self.recall = BinaryRecall(threshold=0.5)

        # Initialisieren Sie Listen, um Labels und Vorhersagen während des Tests zu sammeln
        self.test_labels = []
        self.test_preds = []
        self.val_outputs = []
        self.val_labels = []
        self.val_preds = []

    def forward(self, x):
        return self.resnet50(x)
    
    def __step(self, batch):
        images, _, labels_tuple, _ = batch

        binary_labels = [0 if label == 'no_burst' else 1 for label in labels_tuple]
        binary_labels = torch.tensor(binary_labels).float().view(-1, 1)
        binary_labels = binary_labels.to(images.device)

        outputs = self(images)
        return outputs, binary_labels


    def training_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)
        loss = nn.BCEWithLogitsLoss()(outputs, binary_labels)
        
        self.log('train_loss', loss)
        return loss
    
    def test_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)
        loss = nn.BCEWithLogitsLoss()(outputs, binary_labels)

        # Labels und Vorhersagen für die spätere Verwendung speichern
        # predictions = (outputs >= 0.5).int()
        self.test_labels.append(binary_labels)
        self.test_preds.append(outputs)

        # Berechnen und protokollieren des Verlusts
        self.log('test_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def on_test_epoch_end(self):
        # Konvertieren der gesammelten Daten in einzelne Tensoren
        test_labels = torch.cat(self.test_labels, dim=0)
        test_preds = torch.cat(self.test_preds, dim=0)

        # Umwandeln in Listen für sklearn
        # test_labels_list = test_labels.cpu().numpy().tolist()
        # test_preds_list = test_preds.cpu().numpy().tolist()
        
        # Berechnen von Precision, Recall und F1-Score
        # precision, recall, f1, _ = precision_recall_fscore_support(test_labels_list, test_preds_list, average='binary')
        # precision, recall = precision_recall(test_preds, test_labels, threshold = 0.5, average = "micro")
        
        precision = self.precision(test_preds, test_labels)
        recall = self.recall(test_preds, test_labels)

        # Protokollieren der Metriken
        self.log('test_precision', precision)
        self.log('test_recall', recall)
        # self.log('test_f1', torch.tensor(f1, dtype=torch.float32))
        
        # Bereinigen der Listen für die nächste Epoche
        self.test_labels = []
        self.test_preds = []

    def validation_step(self, batch, batch_idx):
        outputs, binary_labels = self.__step(batch)

        # Labels und Vorhersagen für die spätere Verwendung speichern
        predictions = (outputs >= 0.5).int()
        self.val_labels.append(binary_labels.int())
        self.val_preds.append(predictions)

    def on_validation_epoch_end(self):
        # Alle Validierungsdaten wurden gesammelt, und wir sind am Ende der Epoche.
        val_labels = torch.cat(self.val_labels, dim=0)
        val_preds = torch.cat(self.val_preds, dim=0)

        # Umwandeln in Listen für sklearn
        val_labels_list = val_labels.cpu().numpy().tolist()
        val_preds_list = val_preds.cpu().numpy().tolist()

        # Erstellen des Klassifikationsberichts
        report = classification_report(val_labels_list, val_preds_list, target_names=['no_burst', 'burst'])
        print("\nClassification Report:\n", report)

        # Vergessen Sie nicht, die Listen für die nächste Validierungsrunde zu leeren
        self.val_labels = []
        self.val_preds = []

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        return optimizer

# Erstellen Sie eine Instanz des Modells
model = ResNet50BinaryClassifier()
data_folder_path = "data/raw/burst_images/"

data_module = data.ECallistoDataModule(
    data_folder=data_folder_path,
    transform=transforms.Compose(
        [
            transforms.ToPILImage(),
            transforms.Resize((193, 240), antialias=True),
            transforms.ToTensor(),
        ]
    ),
    batch_size=64,
    num_workers=0,
    val_ratio=0.15,
    test_ratio=0.05,
)
data_module.setup()

# Erstellen Sie einen Trainer für das Training
trainer = pl.Trainer(max_epochs=1)

# Laden Sie den DataLoader für die Trainingsdaten
train_loader = data_module.train_dataloader()

# Starten Sie das Training
trainer.fit(model, train_loader)

test_loader = data_module.test_dataloader()
trainer.test(model, test_loader)

val_loader = data_module.val_dataloader()
trainer.validate(model, val_loader)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name      | Type            | Params
----------------------------------------------
0 | resnet50  | ResNet          | 23.5 M
1 | precision | BinaryPrecision | 0     
2 | recall    | BinaryRecall    | 0     
----------------------------------------------
23.5 M    Trainable params
0         Non-trainable params
23.5 M    Total params
94.015    Total estimated model params size (MB)
/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may 

Epoch 0:   0%|          | 0/1081 [00:00<?, ?it/s] 

/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 68/68 [00:23<00:00,  2.85it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_loss_epoch        0.6744991540908813
     test_precision        0.006410256493836641
       test_recall          0.01785714365541935
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/Users/patrickschuermann/.local/share/virtualenvs/Deep_Learning-EdiEON_k/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0:   0%|          | 0/1081 [07:45<?, ?it/s] [00:57<00:00,  3.55it/s]

Classification Report:
               precision    recall  f1-score   support

    no_burst       0.99      1.00      0.99     12792
       burst       0.00      0.00      0.00       175

    accuracy                           0.99     12967
   macro avg       0.49      0.50      0.50     12967
weighted avg       0.97      0.99      0.98     12967

Validation DataLoader 0: 100%|██████████| 203/203 [00:59<00:00,  3.42it/s]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[{}]

In [None]:
data_module.test_dataloader().

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet50
from torchmetrics.classification import Precision, Recall, F1

class AutoEncoder(nn.Module):
    def __init__(self, model_params):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential()
        k = model_params["encoder_kernel_size"]
        for _ in range(model_params["num_hidden_layers"]):
            self.encoder.add_module('conv', nn.Conv2d(in_channels=model_params["input_shape"][0], out_channels=model_params["encoder_filters"], kernel_size=k, padding='same'))
            self.encoder.add_module('bn', nn.BatchNorm2d(model_params["encoder_filters"]))
            self.encoder.add_module('relu', nn.ReLU(inplace=True))
            self.encoder.add_module('pool', nn.MaxPool2d(kernel_size=2, stride=2, padding='same'))
        self.encoded = nn.Sequential(
            nn.Conv2d(in_channels=model_params["encoder_filters"], out_channels=32, kernel_size=3, padding='same'),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.encoded(x)
        return x

class Classifier(nn.Module):
    def __init__(self, model_params):
        super(Classifier, self).__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential()
        for _ in range(model_params["num_dense_layers"]):
            self.layers.add_module('dense', nn.Linear(in_features=np.product(model_params["input_shape"]), out_features=model_params["neurons_dense_layer"]))
            self.layers.add_module('bn', nn.BatchNorm1d(model_params["neurons_dense_layer"]))
            self.layers.add_module('relu', nn.ReLU(inplace=True))
            self.layers.add_module('drop', nn.Dropout(model_params["dropout"]))
        self.output = nn.Linear(in_features=model_params["neurons_dense_layer"], out_features=1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.layers(x)
        x = self.output(x)
        return torch.sigmoid(x)

class CombinedModel(nn.Module):
    def __init__(self, model_params):
        super(CombinedModel, self).__init__()
        self.autoencoder = AutoEncoder(model_params)
        self.classifier = Classifier(model_params)

    def forward(self, x):
        x = self.autoencoder(x)
        x = self.classifier(x)
        return x