In [8]:
import lightning as L
from pytorch_eo.datasets.cyclone.TropicalCycloneDataset import WindData
from pytorch_eo.tasks.wind_speed.wind_speed_detection import WindSpeedDetection
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelCheckpoint
import albumentations as A
from albumentations.pytorch import ToTensorV2
from einops import rearrange
from torchvision import transforms
import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger

########################################################
## Train a model for wind speed estimation with the Tropical Cyclone Dataset
## When downloading data via EOTDL Metdata training_set_features.csv/ training_set_labels.csv are missing
# use azcopy and source cooperative to download metadata
# /fastdata/azcopy sync https://radiantearth.blob.core.windows.net/mlhub/nasa-tropical-storm-challenge . --recursive=false
# Code from: https://drivendata.co/blog/predict-wind-speeds-benchmark/ 
# organize content downloaded via EOTDL in train and test folders: tropical-cyclone-dataset/v1/train and tropical-cyclone-dataset/v1/test
# untar the folder nasa_tropical_storm_competition_train_source inside the train folder and the folder nasa_tropical_storm_competition_test_source inside the test folder
# now you can start training


hparams = {
    "lr": 2e-4,
    "embedding_dim": 100,
    "dropout": 0.1,
    "max_epochs": 10,
    "batch_size": 10,
    "num_workers": 0,
    "gradient_clip_val": 1,
    "val_sanity_checks": 0,
    "output_path": "model-outputs",
    "log_path": "logs",
}

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Instantiate the data module
ds = WindData(
    data_path='/home/anna/.cache/eotdl/datasets/tropical-cyclone-dataset/v1', # path to downloaded data
    batch_size=32,
    num_workers=8,
    train_transform=transform,
    val_transform=transform,
    test_transform=transform,
)

# Instantiate the model
model = WindSpeedDetection(**hparams)

# Set up the logger
logger = TensorBoardLogger(save_dir=hparams["log_path"], name="benchmark_model")

# Set up the checkpoint callback
checkpoint_callback = ModelCheckpoint(
    dirpath=hparams["output_path"],
    monitor="avg_epoch_val_loss",
    mode="min",
    verbose=True,
)

# Set up the trainer
trainer = pl.Trainer(
    max_epochs=hparams["max_epochs"],
    default_root_dir=hparams["output_path"],
    logger=logger,
    callbacks=[checkpoint_callback],
    gradient_clip_val=hparams["gradient_clip_val"],
    num_sanity_val_steps=hparams["val_sanity_checks"],
)

# Train the model
trainer.fit(model, ds)


Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
