In [1]:
import torch

# Check if CUDA (NVIDIA GPU) is available
if torch.cuda.is_available():
    print("CUDA is available! You can use GPU for computation.")
else:
    print("CUDA is not available. You can use CPU for computation.")


CUDA is available! You can use GPU for computation.


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.__version__)

1.8.1+cu111


In [3]:
import pytorch_lightning as pl
print(pl.__version__)

1.6.4


In [1]:
from src.model import DecoderDenoisingModel
from src.data import SSLDataModule
import lightning as pl
from lightning.pytorch import seed_everything, Trainer
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint

seed_everything(4, workers=True)
model_class = DecoderDenoisingModel
dm_class = SSLDataModule

# create callbacks
early_stop = EarlyStopping(monitor="train_loss", mode="min", patience=5)
best_val_save = ModelCheckpoint(save_top_k = 2, monitor = "val_loss", 
                            mode = "min", dirpath=r"D:\AKairys\DFUC2022\Examples\decoder-denoising\output\SSL\best")



model = model_class(arch="unet", encoder="resnet50", lr=0.0009)
dm = dm_class(root="D:/AKairys/DFUC2022/Examples/decoder-denoising/data/stranger-sections-2-unlabeled-data-patched",
              batch_size=14)
trainer = Trainer(max_epochs=200, default_root_dir = "D:\AKairys\DFUC2022\Examples\decoder-denoising\output\SSL",
                  callbacks=[early_stop, best_val_save])
# create tuner object
tuner = Tuner(trainer)
# optimal lr
# tuner.lr_find(model, dm) # lr of 0.0009 was found optimal
# optimal batch size
# tuner.scale_batch_size(model, dm, 'binsearch') #batch of 32 was last good
trainer.fit(model=model, datamodule=dm)

Seed set to 4
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 103680 images from D:/AKairys/DFUC2022/Examples/decoder-denoising/data/stranger-sections-2-unlabeled-data-patched



  | Name | Type | Params
------------------------------
0 | net  | Unet | 32.5 M
------------------------------
9.0 M     Trainable params
23.5 M    Non-trainable params
32.5 M    Total params
130.086   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                           

c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 0:   0%|          | 2/7334 [00:01<1:02:20,  1.96it/s, v_num=6, lr=0.0009, train_loss=1.640]

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 18: 100%|██████████| 7334/7334 [24:09<00:00,  5.06it/s, v_num=6, lr=9e-6, train_loss=0.317, val_loss=0.286]  


Model finetuning

In [1]:
from src.model import FineTuningModel
from src.data import SupervisedDataModule
from lightning.pytorch import seed_everything, Trainer
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
import torch
import os

seed_everything(4, workers=True)
model_class = FineTuningModel
dm_class = SupervisedDataModule

output_dir = r"D:\AKairys\DFUC2022\Examples\decoder-denoising\output\FineTune"

model = model_class(arch="unet", encoder="resnet50", lr=0.0009, loss_type = "ce")
# load pretrained weights
ckpt = torch.load(r"D:\AKairys\DFUC2022\Examples\decoder-denoising\output\SSL\best\epoch=13-step=102676.ckpt")
model.load_state_dict(ckpt['state_dict'], strict=True)
# Replace segmentation head
in_channels = model.net.segmentation_head[0].in_channels
num_classes = 4 #3 + 1 background
model.net.segmentation_head[0] = torch.nn.Conv2d(in_channels, num_classes, kernel_size=3, padding=1)


# create callbacks
early_stop = EarlyStopping(monitor="train_loss", mode="min", patience=15)
best_val_save = ModelCheckpoint(save_top_k = 2, monitor = "val_loss", 
                            mode = "min", dirpath=os.path.join(output_dir, "best", "pretr"))

dm = dm_class(root="D:/AKairys/DFUC2022/Examples/decoder-denoising/data/train-patched",
              batch_size=14, num_val=256)
trainer = Trainer(max_epochs=200, default_root_dir = output_dir,
                  callbacks=[early_stop, best_val_save])
# create tuner object
tuner = Tuner(trainer)
# optimal lr
tuner.lr_find(model, dm) # lr of 0.0009 was found optimal
# optimal batch size
# tuner.scale_batch_size(model, dm) #batch of 128 was last good, but it did not worked in training
trainer.fit(model=model, datamodule=dm)


Seed set to 4
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 1344 images from D:/AKairys/DFUC2022/Examples/decoder-denoising/data/train-patched


c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
Finding best initial lr: 100%|██████████| 100/100 [00:58<00:00,  4.84it/s]`Trainer.fit` stopped: `max_steps=100` reached.
Finding best initial lr: 100%|██████████| 100/100 [00:58<00:00,  1.70it/s]
Learning rate set to 0.002754228703338169
Restoring states from the checkpoint path at D:\AKairys\DFUC2022\Examples\decoder-denoising\output\FineTune\.lr_find_32ed2df0-c9f0-4f95-98a7-82fe1ffa4f9a.ckpt
Restored all states from the checkpoint at D:\AKairys\DFUC2022\Examples\decoder-denoising\output\FineTune\.lr_find_32ed2df0-c9f0-4f95-98a7-82fe1ffa4f9a.ckpt

Loaded 1344 images from D:/AKairys/DFUC2022/Examples/decoder-denoising/data/train-patched
Epoch 63: 100%|██████████| 77/77 [00:54<00:00,  1.41it/s, v_num=27, lr=2.75e-9, train_loss=0.0204, val_loss=0.0514, val_jaccard=0.805]  


Inference

In [1]:
from src.model import FineTuningModel
from src.data import SupervisedDataModule
from lightning.pytorch import Trainer

output_dir = r"D:\AKairys\DFUC2022\Examples\decoder-denoising\output\FineTune"

# load checkpoint
checkpoint_path = r"D:\AKairys\DFUC2022\Examples\decoder-denoising\output\FineTune\best\epoch=34-step=2695.ckpt"
model = FineTuningModel.load_from_checkpoint(checkpoint_path)
# specify data loaders
dm_class = SupervisedDataModule
dm = dm_class(root=r"D:\AKairys\DFUC2022\Examples\decoder-denoising\data\test",
              batch_size=1)
# define trainer and predict
trainer = Trainer(default_root_dir = output_dir)
trainer.predict(model=model, datamodule=dm, return_predictions=False)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 25 images from D:\AKairys\DFUC2022\Examples\decoder-denoising\data\test


c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'predict_dataloader' to speed up the dataloader worker initialization.


Predicting DataLoader 0: 100%|██████████| 25/25 [00:06<00:00,  3.97it/s]
