In [None]:
# Import libraries
import datetime as dt
import torch 
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from argparse import ArgumentParser
from sslcd import SEN12MSDataModule, CloudDetectionDataModule
from sslcd import DeepCluster, MoCo, ResNet
from sslcd import seed_all
from sslcd import DelayedUnfreeze
torch.set_float32_matmul_precision('medium')

In [3]:
dataset_path = "/projects/sampeo/RepreSentCCN/represent_uc3_cloud_detection/Datasets"

## MoCo: Pretraining on SEN12MS

In [None]:
config = dict(
    # DataModule Settings
    data_dir = dataset_path+"/SEN12MS",
    seed = 42,
    batch_size = 64,
    num_workers = 8,
    patch_size = 256,

    # Trainer Settings
    gpus = 1,
    accelerator="gpu",
    
    # Model Parameters
    input_ch = 13,
    band_set = 's2-all',
    max_epochs = 100
)


parser = ArgumentParser()
parser = pl.Trainer.add_argparse_args(parser)
parser = MoCo.add_model_specific_args(parser)
parser = SEN12MSDataModule.add_model_specific_args(parser)

args, arg_strings = parser.parse_known_args([], None)
for key, value in config.items():
    setattr(args, key, value)

seed_all(config['seed'])

datamodule = SEN12MSDataModule.from_argparse_args(args)

model = MoCo(**args.__dict__)

checkpointer = pl.callbacks.ModelCheckpoint(
    dirpath=f'./experiments/sen12ms/moco/pretraining',
    filename="{epoch}-{val_loss:.2f}",
    monitor='val_loss',
    save_last=True
)

callbacks = [checkpointer]

In [None]:
current_datetime = dt.datetime.now().strftime("%Y%m%d-%H%M%S")

logger = TensorBoardLogger(
                    save_dir="experiments/logs",
                    name = "tensorboard/",
                    version=f"sen12ms_moco_pretraining_{current_datetime}"
                    )

trainer = pl.Trainer.from_argparse_args(args, enable_checkpointing=True, 
                                        logger=logger, 
                                        callbacks=callbacks)

trainer.fit(model, datamodule=datamodule)

## DeepCluster: Pretraining on WHUS2-CD+/CloudSEN12

In [None]:
dataset_name = {"WHUS2CD":"WHUS2-CD+",
                "CloudSen12":"CloudSEN12"}
dataset = "WHUS2CD"  #"CloudSen12"

In [None]:
config = dict(
    # DataModule Settings
    data_dir = dataset_path+f"/{dataset}",
    seed = 42,
    batch_size = 128,
    num_workers = 8,
    training_set_fraction = 0.80, # Used only for WHUS2-CD+ to split into train/val
    patch_size = 256,
    dataset = dataset_name[dataset],
    pretraining = True

    # Trainer Settings
    gpus = 1,
    accelerator="gpu",
    
    # Model Parameters
    use_mlp = False,
    input_ch = 13,
    num_classes = 21,
    backbone = "resnet18",
    proj_hidden_dim = 2048,
    proj_output_dim = 128,
    temperature = 0.1,
    kmeans_iters = 10,
    
    # Optimizer Parameters
    optimizer = "Adam",
    scheduler = "CosineAnnealingLR",
    momentum = 0.9,
    max_epochs = 100,
    learning_rate = 0.6,
    classifier_lr = 0.1,
)


parser = ArgumentParser()
parser = pl.Trainer.add_argparse_args(parser)
parser = DeepCluster.add_model_specific_args(parser)
parser = CloudDetectionDataModule.add_model_specific_args(parser)

args, arg_strings = parser.parse_known_args([], None)
for key, value in config.items():
    setattr(args, key, value)

seed_all(config['seed'])

datamodule = CloudDetectionDataModule.from_argparse_args(args)

model = DeepCluster(**args.__dict__)

checkpointer = pl.callbacks.ModelCheckpoint(
    dirpath=f'./experiments/whus2cd/deepcluster/pretraining',
    filename="{epoch}-{val_acc1:.2f}",
    save_last=True
)

callbacks = [checkpointer]

In [4]:
current_datetime = dt.datetime.now().strftime("%Y%m%d-%H%M%S")

logger = TensorBoardLogger(
                    save_dir="./experiments/logs",
                    name = "tensorboard/",
                    version=f"whus2cd_deepcluster_pretraining_{current_datetime}"
                    )

trainer = pl.Trainer.from_argparse_args(args, enable_checkpointing=True, 
                                        logger=logger, 
                                        callbacks=callbacks, 
                                        auto_lr_find=False)

trainer.fit(model, datamodule=datamodule)

/home/jgjean/anaconda3/envs/represent_ccn2/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/accelerator_connector.py:478: Setting `Trainer(gpus=1)` is deprecated in v1.7 and will be removed in v2.0. Please use `Trainer(accelerator='gpu', devices=1)` instead.
/home/jgjean/anaconda3/envs/represent_ccn2/lib/python3.12/site-packages/lightning_fabric/plugins/environments/slurm.py:165: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/jgjean/anaconda3/envs/represent_ccn2/lib/pytho ...
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False,

18306
18306
14644 3662
<class 'sslcd.tools.utils.dataset_with_index.<locals>.DatasetWithIndex'>


  "scheduler": LinearWarmupCosineAnnealingLR(

  | Name       | Type       | Params
------------------------------------------
0 | backbone   | ResNet     | 11.2 M
1 | classifier | Linear     | 10.8 K
2 | projector  | Sequential | 1.3 M 
3 | prototypes | ModuleList | 1.2 M 
------------------------------------------
12.5 M    Trainable params
1.2 M     Non-trainable params
13.7 M    Total params
54.751    Total estimated model params size (MB)


[[tensor([[[[1.5040e-01, 1.5040e-01, 1.5040e-01,  ..., 1.6430e-01,
           1.6430e-01, 1.6430e-01],
          [1.5040e-01, 1.5040e-01, 1.5040e-01,  ..., 1.6430e-01,
           1.6430e-01, 1.6430e-01],
          [1.5040e-01, 1.5040e-01, 1.5040e-01,  ..., 1.6430e-01,
           1.6430e-01, 1.6430e-01],
          ...,
          [1.3670e-01, 1.3670e-01, 1.3670e-01,  ..., 1.6100e-01,
           1.6100e-01, 1.6100e-01],
          [1.3670e-01, 1.3670e-01, 1.3670e-01,  ..., 1.6100e-01,
           1.6100e-01, 1.6100e-01],
          [1.3670e-01, 1.3670e-01, 1.3670e-01,  ..., 1.6100e-01,
           1.6100e-01, 1.6100e-01]],

         [[1.4910e-01, 1.5080e-01, 1.5040e-01,  ..., 1.5960e-01,
           1.5930e-01, 1.6010e-01],
          [1.4540e-01, 1.4170e-01, 1.4010e-01,  ..., 1.5880e-01,
           1.5890e-01, 1.5910e-01],
          [1.3100e-01, 1.3220e-01, 1.3730e-01,  ..., 1.5860e-01,
           1.5990e-01, 1.5880e-01],
          ...,
          [1.2070e-01, 1.2220e-01, 1.2170e-01,  ..., 1.43

Sanity Checking: 0it [00:00, ?it/s]

<class 'sslcd.tools.utils.dataset_with_index.<locals>.DatasetWithIndex'>
[[tensor([[[[0.1305, 0.1305, 0.1305,  ..., 0.1106, 0.1106, 0.1106],
          [0.1305, 0.1305, 0.1305,  ..., 0.1106, 0.1106, 0.1106],
          [0.1305, 0.1305, 0.1305,  ..., 0.1106, 0.1106, 0.1106],
          ...,
          [0.1181, 0.1181, 0.1181,  ..., 0.1137, 0.1137, 0.1137],
          [0.1181, 0.1181, 0.1181,  ..., 0.1137, 0.1137, 0.1137],
          [0.1181, 0.1181, 0.1181,  ..., 0.1137, 0.1137, 0.1137]],

         [[0.1168, 0.0985, 0.1125,  ..., 0.0820, 0.0804, 0.0808],
          [0.1242, 0.1127, 0.1088,  ..., 0.0828, 0.0806, 0.0806],
          [0.1022, 0.1008, 0.0970,  ..., 0.0809, 0.0808, 0.0808],
          ...,
          [0.0998, 0.0973, 0.0923,  ..., 0.0842, 0.0841, 0.0859],
          [0.0843, 0.0883, 0.0927,  ..., 0.0846, 0.0850, 0.0857],
          [0.0836, 0.0866, 0.0911,  ..., 0.0855, 0.0850, 0.0852]],

         [[0.1063, 0.0950, 0.1078,  ..., 0.0728, 0.0706, 0.0658],
          [0.1151, 0.1114, 0.1039

Training: 0it [00:00, ?it/s]

ValueError: not enough values to unpack (expected 3, got 2)

### Fine-tuning DeepCluster on WHUS2-CD+

### Pretraining DeepCLuster on CloudSEN12

In [None]:
config = dict(
    # DataModule Settings
    data_dir = dataset_path+"/CloudSen12",
    seed = 42,
    batch_size = 128,
    num_workers = 8,
    patch_size = 512,
    pretraining = True,
    dataset = "CloudSEN12",

    # Trainer Settings
    gpus = 1,
    accelerator="gpu",
    
    # Model Parameters
    use_mlp = False,
    input_ch = 13,
    num_classes = 21,
    backbone = "resnet18",
    proj_hidden_dim = 2048,
    num_prototypes = [5000,5000,5000],
    proj_output_dim = 128,
    temperature = 0.1,
    kmeans_iters = 10,
    
    # Optimizer Parameters
    optimizer = "Adam",
    scheduler = "CosineAnnealingLR",
    momentum = 0.9,
    max_epochs = 100,
    learning_rate = 0.6,
    classifier_lr = 0.1,
)


parser = ArgumentParser()
parser = pl.Trainer.add_argparse_args(parser)
parser = DeepCluster.add_model_specific_args(parser)
parser = CloudDetectionDataModule.add_model_specific_args(parser)

args, arg_strings = parser.parse_known_args([], None)
for key, value in config.items():
    setattr(args, key, value)

seed_all(config['seed'])

datamodule = CloudDetectionDataModule.from_argparse_args(args)

model = DeepCluster(**args.__dict__)

checkpointer = pl.callbacks.ModelCheckpoint(
    dirpath=f'./experiments/cloudsen12/deepcluster/pretraining',
    filename="{epoch}-{val_acc1:.2f}",
    save_last=True
)

callbacks = [checkpointer]

[rank: 0] Global seed set to 42
  self.backbone = template_model(num_classes=2, return_all_feature_maps=False)
  return _resnet("resnet18", BasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs)
  model = ResNet(block, layers, **kwargs)
  block(
  self.conv1 = conv3x3(inplanes, planes, stride)
  conv1x1(self.inplanes, planes * block.expansion, stride),


In [4]:
current_datetime = dt.datetime.now().strftime("%Y%m%d-%H%M%S")

logger = TensorBoardLogger(
                    save_dir="./experiments/logs",
                    name = "tensorboard/",
                    version=f"cloudsen12_deepcluster_pretraining_{current_datetime}"
                    )

trainer = pl.Trainer.from_argparse_args(args, enable_checkpointing=True, 
                                        logger=logger, 
                                        callbacks=callbacks, 
                                        auto_lr_find=False)

trainer.fit(model, datamodule=datamodule)

/home/jgjean/anaconda3/envs/represent_ccn2/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/accelerator_connector.py:478: Setting `Trainer(gpus=1)` is deprecated in v1.7 and will be removed in v2.0. Please use `Trainer(accelerator='gpu', devices=1)` instead.
/home/jgjean/anaconda3/envs/represent_ccn2/lib/python3.12/site-packages/lightning_fabric/plugins/environments/slurm.py:165: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/jgjean/anaconda3/envs/represent_ccn2/lib/pytho ...
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False,

8490 535
<class 'sslcd.tools.utils.dataset_with_index.<locals>.DatasetWithIndex'>


  "scheduler": LinearWarmupCosineAnnealingLR(

  | Name       | Type       | Params
------------------------------------------
0 | backbone   | ResNet     | 11.2 M
1 | classifier | Linear     | 10.8 K
2 | projector  | Sequential | 1.3 M 
3 | prototypes | ModuleList | 1.9 M 
------------------------------------------
12.5 M    Trainable params
1.9 M     Non-trainable params
14.5 M    Total params
57.823    Total estimated model params size (MB)


[tensor([7154, 1938, 2298, 3616, 2908, 8125, 5973, 3108, 3507, 6935, 1491, 3215,
        6418, 7249, 6141, 8089, 4946, 7839, 3579, 1883, 4506, 7329,  777, 8484,
        6984, 7117, 6051, 1288, 5402, 2302, 3189, 5905, 4343, 1739, 5392, 3223,
        7280,  423, 6643, 3730, 7128, 5609, 5016, 8027, 3489, 7066, 1961, 7489,
        2580,   23, 3590, 6541, 1105, 2351,  170, 8478, 5285,  469, 4032, 1480,
        3092, 4690, 2094, 5889, 5894, 4518, 1364, 7219, 2291,   39,  592, 5342,
        3801, 6065, 2369, 5997, 6969, 4273,  802, 5509, 5995, 5856, 4615, 2504,
        8211, 1919, 2903, 1624, 1171, 6828, 7202, 1692, 6885, 1486, 1792,  698,
        2277, 5256, 4835,   49, 8093, 2402, 7511, 3014, 1753,  411, 2317, 6789,
        5682, 8290, 5577, 1313, 8084, 8284, 1934, 7314, 1782, 5508, 7372,  563,
        1000, 5977, 1548, 1494,  971, 6259, 3581, 6481]), [tensor([[[[0.1297, 0.1297, 0.1297,  ..., 0.0000, 0.0000, 0.0000],
          [0.1297, 0.1297, 0.1297,  ..., 0.0000, 0.0000, 0.0000],
        

Sanity Checking: 0it [00:00, ?it/s]

<class 'sslcd.tools.utils.dataset_with_index.<locals>.DatasetWithIndex'>
[tensor([6704, 2945, 2444, 8085, 4300, 3437, 2024, 3612, 5436, 6244, 2569, 5270,
        7340, 4463, 2994, 1928, 2645, 2712, 2350, 5994, 3358, 6804, 1873, 8238,
        6071, 1997, 2538, 2345, 1648,  461, 5465,  861, 5231, 1699, 3526, 8394,
        5406, 5971, 7455, 6512, 3269, 7873, 6178, 3195, 8058,  931, 3506, 1341,
        8141, 5418,  136, 1445, 3908, 4733,  189, 8225, 6362, 1725, 7734, 5037,
        7574, 2585, 4233, 2781, 3071, 7341, 6066, 6550, 1366, 4666, 8006, 7280,
        3772, 4660,  254, 5459, 4318, 3797, 3749, 5918, 7807, 5155, 5838, 7953,
        5668, 3499, 2370, 1615, 5817,  862, 7278, 7307, 6377, 6851, 5934, 3610,
        1656, 8092, 3535, 2648, 3607, 7524, 1712,  179, 2418,  207, 6713,  490,
        1141, 7622,   70, 5138,  521, 7259, 2601,  919, 4230, 5861, 3939, 7848,
        8154, 4480, 4940, 3473, 1446, 3999,   82, 1609]), [tensor([[[[0.1852, 0.1852, 0.1893,  ..., 0.0000, 0.0000, 0.0000],
 

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

ValueError: Caught ValueError in DataLoader worker process 4.
Original Traceback (most recent call last):
  File "/home/jgjean/anaconda3/envs/represent_ccn2/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/jgjean/anaconda3/envs/represent_ccn2/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/projects/sampeo/RepreSentCCN/self-supervised-cloud-detection/sslcd/datamodules/cd_datasets.py", line 175, in __getitem__
    samples_idx = samples_idx[samples_idx!=index]
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "numpy/random/mtrand.pyx", line 974, in numpy.random.mtrand.RandomState.choice
ValueError: 'a' cannot be empty unless no samples are taken
