In [1]:
import os

os.chdir("..")
print(f"Changed working directory to: {os.getcwd()}")

Changed working directory to: /mnt/nas05/clusterdata01/home2/p.schuermann/FlareSense


In [2]:
import torch
import mlflow
import dagshub
import itertools
import torchmetrics
import src.utils.data15min as data
import pytorch_lightning as pl

from huggingface_hub import snapshot_download
from torchvision import transforms
from tqdm.notebook import tqdm
from src.models.CustomCNN import CustomCNN

mlflow.pytorch.autolog()
torch.set_float32_matmul_precision("high")



In [3]:
DATA_FOLDER_PATH = "data/raw/exported/"
INSTRUMENTS = ["Australia-ASSA_02", "Australia-ASSA_62"]
NUM_WORKERS = 7

In [4]:
# download needed files
snapshot_download(
    "StellarMilk/ecallisto-bursts",
    repo_type="dataset",
    allow_patterns=[f"{instrument}.zip" for instrument in INSTRUMENTS] + ["metadata.csv"],
    local_dir=DATA_FOLDER_PATH,
    revision="main",
)

for instrument in INSTRUMENTS:
    # if data available, skip
    if os.path.exists(f"{DATA_FOLDER_PATH}{instrument}"):
        print(f"Skipping {instrument}")
        continue
    
    # unzip if needed
    print(f"Unzipping {instrument}")
    !unzip -q {DATA_FOLDER_PATH}{instrument}.zip -d {DATA_FOLDER_PATH}

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Skipping Australia-ASSA_02
Skipping Australia-ASSA_62


In [5]:
lst_lr = [1e-3, 3e-4, 1e-4, 3e-5]
lst_weight_decay = [3e-3, 1e-3, 3e-4, 1e-4]
lst_dropout_p = [0.5]

data_module = data.ECallistoDataModule(
    data_folder=DATA_FOLDER_PATH,
    batch_size=64,
    num_workers=NUM_WORKERS,
    val_ratio=0.15,
    test_ratio=0.15,
    img_size=(193, 225),
    use_augmented_data=True,
    filter_instruments=["Australia-ASSA_02", "Australia-ASSA_62"],
    seed=0,
)
data_module.setup()

for current_lr, current_weight_decay, current_dropout_p in itertools.product(lst_lr, lst_weight_decay, lst_dropout_p):
    model = CustomCNN(lr=current_lr, weight_decay=current_weight_decay, dropout_p=current_dropout_p)

    dagshub.init("FlareSense", "FlareSense", mlflow=True)
    mlflow.start_run()

    mlflow.log_params({
        "model": "CustomCNN",
        "dropout_p": current_dropout_p,
        "batch_size": data_module.batch_size,               
        "val_ratio": data_module.val_ratio,
        "test_ratio": data_module.test_ratio,
        "use_data_augmentation": data_module.use_augmented_data,
        "filter_instruments": data_module.filter_instruments,
    })

    run_id = mlflow.active_run().info.run_id
    print(f"Run ID: {run_id}")
    print(f"Link: https://dagshub.com/FlareSense/FlareSense/experiments/#/experiment/m_{run_id}")

    trainer = pl.Trainer(max_epochs=30, log_every_n_steps=1)

    trainer.fit(
        model,
        train_dataloaders=data_module.train_dataloader(),
        val_dataloaders=data_module.val_dataloader(),
    )

    trainer.test(model, dataloaders=data_module.test_dataloader())

    mlflow.end_run()

Run ID: 6f99727e19384922a408401b1e9e56a1
Link: https://dagshub.com/FlareSense/FlareSense/experiments/#/experiment/m_6f99727e19384922a408401b1e9e56a1


/home2/p.schuermann/.local/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:191: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python3 /home2/p.schuermann/.local/lib/python3.10/site-pack ...


GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


/home2/p.schuermann/.local/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name      | Type            | Params
----------------------------------------------
0 | precision | BinaryPrecision | 0     
1 | recall    | BinaryRecall    | 0     
2 | conv1     | Conv2d          | 640   
3 | conv2     | Conv2d          | 73.9 K
4 | conv3     | Conv2d          | 295 K 
5 | conv4     | Conv2d          | 1.2 M 
6 | fc1       | Linear          | 5.5 M 
7 | fc2       | Linear          | 65    
8 | pool      | MaxPool2d       | 0     
9 | dropout   | Dropout         | 0     
----------------------------------------------
7.1 M     Trainable params
0         Non-trainable params
7.1 M     Total params
28.220    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]