In [2]:
import torch

# Check if CUDA (NVIDIA GPU) is available
if torch.cuda.is_available():
    print("CUDA is available! You can use GPU for computation.")
else:
    print("CUDA is not available. You can use CPU for computation.")


CUDA is available! You can use GPU for computation.


In [3]:
print(torch.__version__)

2.3.0+cu121


In [4]:
import pytorch_lightning as pl
print(pl.__version__)

2.2.4


In [1]:
from src.model import DecoderDenoisingModel
from src.data import SSLDataModule
import lightning as pl
from lightning.pytorch import seed_everything, Trainer
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
import os

seed_everything(4, workers=True)
model_class = DecoderDenoisingModel
dm_class = SSLDataModule

root_dir = r"output\SSL\pret_whole"
# create callbacks
early_stop = EarlyStopping(monitor="train_loss", mode="min", patience=5)
best_val_save = ModelCheckpoint(save_top_k = 2, monitor = "val_loss", 
                            mode = "min", dirpath=os.path.join(root_dir, 'best'), every_n_epochs=1)



model = model_class(arch="unet", encoder="resnet50", lr=0.0009, num_class=3, mode="encoder+decoder")
dm = dm_class(root="data/stranger-sections-2-unlabeled-data",
              batch_size=8, crop = 512)
trainer = Trainer(max_epochs=200, default_root_dir = root_dir,
                  callbacks=[early_stop, best_val_save])
# create tuner object
tuner = Tuner(trainer)
# optimal lr
tuner.lr_find(model, dm) # lr of 0.0009 was found optimal
# optimal batch size
tuner.scale_batch_size(model, dm) #batch of 32 was last good
trainer.fit(model=model, datamodule=dm)

Seed set to 4
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 8640 images from D:/AKairys/DFUC2022/Examples/decoder-denoising/data/stranger-sections-2-unlabeled-data


c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Finding best initial lr: 100%|██████████| 100/100 [00:21<00:00,  4.61it/s]`Trainer.fit` stopped: `max_steps=100` reached.
Finding best initial lr: 100%|██████████| 100/100 [00:22<00:00,  4.54it/s]
Learning rate set to 0.0009120108393559097
Restoring states from the checkpoint path at D:\AKairys\DFUC2022\Examples\decoder-denoising\output\SSL\pret_whole\.lr_find_5e771388-f837-406f-a600-896871c3a3d3.ckpt
Restored all states from the checkpoint at D:

Loaded 8640 images from D:/AKairys/DFUC2022/Examples/decoder-denoising/data/stranger-sections-2-unlabeled-data


`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 2 succeeded, trying batch size 4
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 4 succeeded, trying batch size 8
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 8 succeeded, trying batch size 16
  return F.conv2d(input, weight, bias, self.stride,
Batch size 16 failed, trying batch size 8
Finished batch size finder, will continue with full run using batch size 8
Restoring states from the checkpoint path at D:\AKairys\DFUC2022\Examples\decoder-denoising\output\SSL\pret_whole\.scale_batch_size_6196a12d-3d4d-407f-ae66-074d7d9416ae.ckpt
Restored all states from the checkpoint at D:\AKairys\DFUC2022\Examples\decoder-denoising\output\SSL\pret_whole\.scale_batch_size_6196a12d-3d4d-407f-ae66-074d7d9416ae.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
0 | net  | Unet | 32.5 M
------------------------------
32.5 M    Trainable params
0         Non-trainable params
3

Loaded 8640 images from D:/AKairys/DFUC2022/Examples/decoder-denoising/data/stranger-sections-2-unlabeled-data
Epoch 37: 100%|██████████| 955/955 [03:41<00:00,  4.32it/s, v_num=4, lr=9.12e-6, train_loss=0.0729, val_loss=0.0816] 


In [1]:
from src.model import SplitMaskModel
from src.data import SplitMaskDataModule
import lightning as pl
from lightning.pytorch import seed_everything, Trainer
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
import os
from torchvision.transforms import (Compose, Lambda, RandomCrop,
                                    RandomHorizontalFlip, Resize, 
                                    ToTensor, Normalize, ColorJitter, 
                                    RandomApply, RandomGrayscale)

seed_everything(4, workers=True)
model_class = SplitMaskModel
dm_class = SplitMaskDataModule


root_dirs = [r"output\SSL\pret_whole_splitmask",
             r"output\SSL\pret_whole_splitmask-noaugm"]
augms = [[], 
         [RandomApply([ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), RandomGrayscale(p=0.2)]]
for i, root_dir in enumerate(root_dirs):
    
    
    # create callbacks
    early_stop = EarlyStopping(monitor="train_loss", mode="min", patience=10)
    best_val_save = ModelCheckpoint(save_top_k = 2, monitor = "val_loss", 
                                mode = "min", dirpath=os.path.join(root_dir, 'best'), every_n_epochs=1)
    
    model = model_class(arch="unet", encoder="resnet50", lr=0.0009, num_class=3, mode="encoder+decoder")
    dm = dm_class(root="data/stranger-sections-2-unlabeled-data",
                batch_size=4, crop = 512, augms=augms[i])
    trainer = Trainer(max_epochs=200, default_root_dir = root_dir,
                    callbacks=[early_stop, best_val_save])
    # create tuner object
    tuner = Tuner(trainer)
    # optimal lr
    tuner.lr_find(model, dm) # lr of 0.0009 was found optimal
    # optimal batch size
    tuner.scale_batch_size(model, dm) #batch of 32 was last good
    trainer.fit(model=model, datamodule=dm)
    

Seed set to 4
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:653: Checkpoint directory D:\AKairys\DFUC2022\Examples\decoder-denoising\output\SSL\pret_whole_splitmask\best exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 8640 images from data/stranger-sections-2-unlabeled-data


c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Finding best initial lr: 100%|██████████| 100/100 [00:22<00:00,  4.43it/s]`Trainer.fit` stopped: `max_steps=100` reached.
Finding best initial lr: 100%|██████████| 100/100 [00:22<00:00,  4.40it/s]
Learning rate set to 0.0009120108393559097
Restoring states from the checkpoint path at output\SSL\pret_whole_splitmask\.lr_find_5ceecc19-bad2-49ba-be05-6bb53bcf592a.ckpt
Restored all states from the checkpoint at output\SSL\pret_whole_splitmask\.lr_fin

Loaded 8640 images from data/stranger-sections-2-unlabeled-data


`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 2 succeeded, trying batch size 4
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 4 succeeded, trying batch size 8
  return F.conv2d(input, weight, bias, self.stride,
Batch size 8 failed, trying batch size 4
Finished batch size finder, will continue with full run using batch size 4
Restoring states from the checkpoint path at output\SSL\pret_whole_splitmask\.scale_batch_size_ed324c04-221e-4d6a-b0a3-3b46b772d93c.ckpt
Restored all states from the checkpoint at output\SSL\pret_whole_splitmask\.scale_batch_size_ed324c04-221e-4d6a-b0a3-3b46b772d93c.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 8640 images from data/stranger-sections-2-unlabeled-data



  | Name   | Type                   | Params
--------------------------------------------------
0 | net    | Unet                   | 32.5 M
1 | acc_fn | MulticlassJaccardIndex | 0     
--------------------------------------------------
32.5 M    Trainable params
0         Non-trainable params
32.5 M    Total params
130.086   Total estimated model params size (MB)


Epoch 38: 100%|██████████| 1910/1910 [07:28<00:00,  4.26it/s, v_num=5, lr=5.7e-5, train_loss=0.000404, val_loss=0.00084]   


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 8640 images from data/stranger-sections-2-unlabeled-data


Finding best initial lr: 100%|██████████| 100/100 [00:20<00:00,  4.90it/s]`Trainer.fit` stopped: `max_steps=100` reached.
Finding best initial lr: 100%|██████████| 100/100 [00:20<00:00,  4.87it/s]
Learning rate set to 0.0009120108393559097
Restoring states from the checkpoint path at output\SSL\pret_whole_splitmask-noaugm\.lr_find_83ed8249-52bf-4364-9f30-7d525b4bbc80.ckpt
Restored all states from the checkpoint at output\SSL\pret_whole_splitmask-noaugm\.lr_find_83ed8249-52bf-4364-9f30-7d525b4bbc80.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 8640 images from data/stranger-sections-2-unlabeled-data


`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 2 succeeded, trying batch size 4
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 4 succeeded, trying batch size 8
Batch size 8 failed, trying batch size 4
Finished batch size finder, will continue with full run using batch size 4
Restoring states from the checkpoint path at output\SSL\pret_whole_splitmask-noaugm\.scale_batch_size_84cc52e7-dcc5-42d0-a2fb-c4896e47f09a.ckpt
Restored all states from the checkpoint at output\SSL\pret_whole_splitmask-noaugm\.scale_batch_size_84cc52e7-dcc5-42d0-a2fb-c4896e47f09a.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type                   | Params
--------------------------------------------------
0 | net    | Unet                   | 32.5 M
1 | acc_fn | MulticlassJaccardIndex | 0     
--------------------------------------------------
32.5 M    Trainable params
0         Non-trainable params
32.5 M    Total params
130.086   Total estimated model params size (MB)


Loaded 8640 images from data/stranger-sections-2-unlabeled-data
Epoch 29: 100%|██████████| 1910/1910 [07:24<00:00,  4.30it/s, v_num=0, lr=5.7e-5, train_loss=0.000996, val_loss=0.00103]   


Model finetuning

In [3]:
from src.model import FineTuningModel
from src.data import SupervisedDataModule
from lightning.pytorch import seed_everything, Trainer
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
import torch
import torch.nn.functional as F
import os

seed_everything(4, workers=True)
model_class = FineTuningModel
dm_class = SupervisedDataModule

output_dir = r"output\FineTune\whole_pret_splitm-naug"

# initialize model, should be the same as SSL, but different loss function
model = model_class(arch="unet", encoder="resnet50", lr=0.00005, loss_type = "ce", num_class=3, mode = "encoder+decoder")
# load pretrained weights

# load checkpoint
checkpoint_path = r"output\SSL\pret_whole_splitmask-noaugm\best\epoch=27-step=53480.ckpt"
# load pretrained weights
ckpt = torch.load(checkpoint_path)
model.load_state_dict(ckpt['state_dict'], strict=True)

# Replace segmentation head
in_channels = model.net.segmentation_head[0].in_channels
num_classes = 4 #3 + 1 background
model.net.segmentation_head[0] = torch.nn.Conv2d(in_channels, num_classes, kernel_size=3, padding=1)


# create callbacks
early_stop = EarlyStopping(monitor="train_loss", mode="min", patience=5)
best_val_save = ModelCheckpoint(save_top_k = 2, monitor = "val_jaccard", 
                            mode = "max", dirpath=os.path.join(output_dir, "best"), every_n_epochs=1)

dm = dm_class(root="data/train",
              batch_size=8, num_val=20, crop=512)
trainer = Trainer(max_epochs=200, default_root_dir = output_dir,
                  callbacks=[early_stop, best_val_save])
# create tuner object
tuner = Tuner(trainer)
# optimal lr
# tuner.lr_find(model, dm) # lr of 0.0009 was found optimal
# optimal batch size
# tuner.scale_batch_size(model, dm) #batch of 8 was last good
trainer.fit(model=model, datamodule=dm)


Seed set to 4
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: output\FineTune\whole_pret_splitm-naug\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type                   | Params
--------------------------------------------------
0 | net    | Unet                   | 32.5 M
1 | acc_fn | MulticlassJaccardIndex | 0     
--------------------------------------------------
32.5 M    Trainable params
0         Non-trainable params
32.5 M    Total params
130.086   Total estimated model params size (MB)


Loaded 112 images from data/train
Epoch 42: 100%|██████████| 11/11 [00:14<00:00,  0.74it/s, v_num=0, lr=2.5e-5, train_loss=0.427, val_loss=0.612, val_jaccard=0.108]


In [4]:
model.mode

AttributeError: 'FineTuningModel' object has no attribute 'mode'

Inference

In [1]:
from src.model import FineTuningModel
from src.data import SupervisedDataModule
from lightning.pytorch import Trainer
import torch

model_class = FineTuningModel

output_dir = r"output\FineTune"

model = model_class(arch="unet", encoder="resnet50", lr=0.00005, loss_type = "ce", num_class=4, mode = "encoder+decoder")
# load pretrained weights

# load checkpoint
checkpoint_path = r"output\FineTune\whole_pret_e_s\best\epoch=13-step=154.ckpt"
# load pretrained weights
ckpt = torch.load(checkpoint_path)
model.load_state_dict(ckpt['state_dict'], strict=True)

# specify data loaders
dm_class = SupervisedDataModule
dm = dm_class(root=r"data\test",
              batch_size=1)
# define trainer and predict
trainer = Trainer(default_root_dir = output_dir)
trainer.predict(model=model, datamodule=dm, return_predictions=False)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 25 images from data\test


c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'predict_dataloader' to speed up the dataloader worker initialization.


Predicting DataLoader 0: 100%|██████████| 25/25 [00:17<00:00,  1.39it/s]


TransUnet denoising pretraining

In [1]:
from src.model import DecoderDenoisingModel
from src.data import SSLDataModule
import lightning as pl
from lightning.pytorch import seed_everything, Trainer
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint, StochasticWeightAveraging
import os
from src.transunet.vit_seg_modeling import VisionTransformer as ViT_seg
from src.transunet.vit_seg_modeling import CONFIGS as CONFIGS_ViT_seg
import numpy as np

seed_everything(4, workers=True)
model_class = DecoderDenoisingModel
dm_class = SSLDataModule

root_dir = r"output\SSL\transUnetA_whole"
# create callbacks
early_stop = EarlyStopping(monitor="train_loss", mode="min", patience=5)
best_val_save = ModelCheckpoint(save_top_k = 2, monitor = "val_loss", 
                            mode = "min", dirpath=os.path.join(root_dir, 'best'), every_n_epochs=1)

# create net
config_vit = CONFIGS_ViT_seg['R50-ViT-B_16A']
config_vit.n_classes = 3
# config_vit.n_skip = 3
net = ViT_seg(config_vit, img_size=512, num_classes=config_vit.n_classes)

model = model_class(net=net, lr=0.0009, num_class=3, mode="encoder+decoder")
dm = dm_class(root="data/stranger-sections-2-unlabeled-data",
              batch_size=1, crop = 512)
trainer = Trainer(max_epochs=200, default_root_dir = root_dir,
                  callbacks=[early_stop, best_val_save, 
                            #  StochasticWeightAveraging(swa_lrs=0.001)#try without SWA
                             ])
# create tuner object
tuner = Tuner(trainer)
# optimal lr
tuner.lr_find(model, dm) # lr of 0.0009 was found optimal
# optimal batch size
# tuner.scale_batch_size(model, dm) #batch of 16 is too big
trainer.fit(model=model, datamodule=dm)

Seed set to 4
c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\utilities\parsing.py:199: Attribute 'net' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['net'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 8640 images from data/stranger-sections-2-unlabeled-data


c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
c:\Users\User\.conda\envs\DFU2\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 32 but got size 64 for tensor number 1 in the list.

In [6]:

from src.transunet.vit_seg_modeling import VisionTransformer as ViT_seg
from src.transunet.vit_seg_modeling import CONFIGS as CONFIGS_ViT_seg
import numpy as np
# create net
config_vit = CONFIGS_ViT_seg['R50-ViT-B_16']
config_vit.n_classes = 3
config_vit.n_skip = 3
net = ViT_seg(config_vit, img_size=512, num_classes=config_vit.n_classes)
# net.load_from(weights=np.load(config_vit.pretrained_path))

In [3]:
!pip install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl.metadata (296 bytes)
Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
from torchsummary import summary
summary(net.to('cuda'), (3, 512, 512))

AttributeError: 'list' object has no attribute 'size'