## CBAM Restnet Classifier 
----


In [1]:
## libreries
import numpy as np
import yaml
import logging
import traceback
import wandb

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor, EarlyStopping


from models.data import LandsatDataModule
from models.nn import CBAMResNet
from models.losses import CombinedLoss
from models.trainers import FeatureAwareTrainer

In [2]:
# loging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()




In [3]:
# config
CONFIG_FILE = '/teamspace/studios/this_studio/geointelligence/inegi-zindi/examples/configs/cbam_resnet_config.yaml'
with open(CONFIG_FILE, 'r') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)
    logger.info("Config loaded successfully")

2024-09-28 05:34:57,147 - INFO - Config loaded successfully


In [4]:

# Create the HDF5DataModule from the configuration
data_module_config = config['data_module']
data_module = LandsatDataModule.from_config(data_module_config)
logger.info("DataModule created successfully")

2024-09-28 05:35:44,227 - INFO - DataModule created successfully


In [6]:
# Create the model from the configuration
model_config = config['model']

# Crear el modelo desde la configuraci√≥n
model = CBAMResNet.from_config(model_config)
logger.info("Model created successfully")

# Create the loss function from the configuration
# Automatically set 'embedding_size' in the model to be equal to 'feat_dim' in the center loss function
config['loss_functions']['center']['params']['feat_dim'] = config['model']['embedding_size']

loss_config = config['loss_functions']
loss = CombinedLoss.from_config(loss_config)
logger.info("Loss function created successfully")

# Create the training module
optimizer_config = config['optimizer']
scheduler_config = config['scheduler']

trainer_module = FeatureAwareTrainer(model, loss, optimizer_config, scheduler_config)
logger.info("Feature Aware Trainer module created successfully")

# Initialize wandb
run_name = f"{model.get_class_name()}_embed{config['model']['embedding_size']}"
wandb.init(project="INEGI", entity="geo-dl", config=config, name=run_name)

2024-09-28 05:37:42,781 - INFO - Model created successfully
2024-09-28 05:37:42,783 - INFO - Loss function created successfully
2024-09-28 05:37:42,790 - INFO - Feature Aware Trainer module created successfully


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mjpoolcen[0m ([33mgeo-dl[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
# Setup wandb logger
wandb_logger = WandbLogger(project="INEGI", entity="geo-dl")

model_name = f'inegi-{model.get_class_name()}'

# Setup model checkpoint callback
checkpoint_callback = ModelCheckpoint(
    dirpath='checkpoints',
    filename=f'{model_name}'+'-{epoch:02d}-{val_loss:.2f}',
    save_top_k=3,
    monitor='val_aucroc',
    mode='max'
        )

early_stop_callback = EarlyStopping(
            monitor='val_aucroc', # Metric to monitor
            patience=10,          # Number of epochs with no improvement before stopping training
            verbose=True,        # To display messages during training
            mode='max',          # 'min' to reduce the metric, 'max' to maximize it
            min_delta=0.0     # Minimum improvement considered significant
        )

# Learning rate monitor
lr_monitor = LearningRateMonitor(logging_interval='epoch')

# Setup trainer
trainer = pl.Trainer(
            max_epochs=30,
            logger=wandb_logger,
            log_every_n_steps=5,
            callbacks=[checkpoint_callback, lr_monitor, early_stop_callback],
            accumulate_grad_batches=1,
            devices=1, #if torch.cuda.is_available() else None,
            accelerator='gpu' if torch.cuda.is_available() else 'cpu'
        )
logger.info("Lightning Trainer created successfully")

# Train the model
trainer.fit(trainer_module, data_module)

# Close wandb run
wandb.finish()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
2024-09-28 05:40:13,393 - INFO - Lightning Trainer created successfully
/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:390: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type            | Params
----------------------------------------------
0 | model     | CBAMResNet      | 5.1 M 
1 | loss      | CombinedLoss    | 256   
2 | accuracy  | BinaryAccuracy  | 0     
3 | precision | BinaryPrecision | 0     
4 | recall    | BinaryRecall    | 0     
5 | f1        | BinaryF1Score   | 0     
6 | aucroc    | BinaryAUROC     | 0     
----------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "h5py/h5t.pyx", line 1455, in h5py.h5t._c_int
KeyError: 0

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = self.dataset.__getitems__(possibly_batched_index)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/data/dataset.py", line 364, in __getitems__
    return [self.dataset[self.indices[idx]] for idx in indices]
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/data/dataset.py", line 364, in <listcomp>
    return [self.dataset[self.indices[idx]] for idx in indices]
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/INEGI_Zindi-0.1.0-py3.10.egg/models/data/dataset.py", line 25, in __getitem__
    image = np.array(self.hdf['images'][idx], dtype=self.dtype)  # Shape: (16, 16, 6) unsigned int
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/h5py/_hl/dataset.py", line 781, in __getitem__
    mtype = h5t.py_create(new_dtype)
  File "h5py/h5t.pyx", line 1663, in h5py.h5t.py_create
  File "h5py/h5t.pyx", line 1687, in h5py.h5t.py_create
  File "h5py/h5t.pyx", line 1705, in h5py.h5t.py_create
  File "h5py/h5t.pyx", line 1459, in h5py.h5t._c_int
TypeError: Unsupported integer size (0)
