In [1]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMSegmentation installation
import mmseg
print(mmseg.__version__)

import mmcv
import mmengine
from mmseg.registry import DATASETS
from mmseg.datasets import BaseSegDataset
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import os.path as osp
import numpy as np
from PIL import Image

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ['TORCH_USE_CUDA_DSA'] = '1'

2.1.1+cu121 True
1.2.1


In [2]:
data_root = '/workspaces/YJ_661_final_project/data'
img_dir = 'img_for_inference_2d'
ann_dir = 'annotation_2d_binary'

classes = ('background', 'fluid')

In [3]:
# decide class weight
train_data_names = []
with open('/workspaces/YJ_661_final_project/data/splits/train.txt', 'r') as f:
    for line in f:
        train_data_names.append(line.strip())

# count the number of labels in each class
num_class_0 = 0
num_class_1 = 0

for name in train_data_names:
    ann_path = osp.join(data_root, ann_dir, name + '.png')
    ann = np.array(Image.open(ann_path))
    unique, counts = np.unique(ann, return_counts=True)
    num_class_0 += counts[0]
    if len(counts) > 1:
        num_class_1 += counts[1]

print('num_class_0: ', num_class_0, 'percentage: ', num_class_0 / (num_class_0 + num_class_1))
print('num_class_1: ', num_class_1, 'percentage: ', num_class_1 / (num_class_0 + num_class_1))
print('weight of class 0: ', round(num_class_1 / (num_class_0 + num_class_1),2))
print('weight of class 1: ', round(num_class_0 / (num_class_0 + num_class_1),2))

num_class_0:  33237795 percentage:  0.9915317747949505
num_class_1:  283869 percentage:  0.008468225205049487
weight of class 0:  0.01
weight of class 1:  0.99


In [4]:
@DATASETS.register_module()
class BOE_Chiu_Dataset(BaseSegDataset):
    METAINFO = dict(classes = classes)
    def __init__(self,dataset=None, times=None, **kwargs):
        super(BOE_Chiu_Dataset, self).__init__(img_suffix='.png', seg_map_suffix='.png', **kwargs)

In [5]:
# # Download config and checkpoint files
# !mim download mmsegmentation --config unet-s5-d16_pspnet_4xb4-40k_hrf-256x256 --dest .

In [6]:
from mmengine import Config
cfg = Config.fromfile('/workspaces/YJ_661_final_project/src/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py')
print(f'Config:\n{cfg.pretty_text}')

Config:
crop_size = (
    256,
    256,
)
data_preprocessor = dict(
    bgr_to_rgb=True,
    mean=[
        123.675,
        116.28,
        103.53,
    ],
    pad_val=0,
    seg_pad_val=255,
    size=(
        256,
        256,
    ),
    std=[
        58.395,
        57.12,
        57.375,
    ],
    type='SegDataPreProcessor')
data_root = 'data/HRF'
dataset_type = 'HRFDataset'
default_hooks = dict(
    checkpoint=dict(by_epoch=False, interval=4000, type='CheckpointHook'),
    logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
    param_scheduler=dict(type='ParamSchedulerHook'),
    sampler_seed=dict(type='DistSamplerSeedHook'),
    timer=dict(type='IterTimerHook'),
    visualization=dict(type='SegVisualizationHook'))
default_scope = 'mmseg'
env_cfg = dict(
    cudnn_benchmark=True,
    dist_cfg=dict(backend='nccl'),
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
img_ratios = [
    0.5,
    0.75,
    1.0,
    1.25,
    1.5,
    1.75,
]
img_scale =

In [7]:
# Since we use only one GPU, BN is used instead of SyncBN
cfg.norm_cfg = dict(type='BN', requires_grad=True)
cfg.crop_size = (256, 256)
cfg.model.data_preprocessor.size = cfg.crop_size
cfg.model.backbone.norm_cfg = cfg.norm_cfg
cfg.model.decode_head.norm_cfg = cfg.norm_cfg
cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg
# modify num classes of the model in decode/auxiliary head
cfg.model.decode_head.num_classes = 2
cfg.model.auxiliary_head.num_classes = 2

weights = [0.01, 0.99]
cfg.model.decode_head.loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False,loss_weight=1.0, class_weight=weights)
cfg.model.auxiliary_head.loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False,loss_weight=1.0, class_weight=weights)

# Modify dataset type and path
cfg.dataset_type = 'BOE_Chiu_Dataset'
cfg.data_root = data_root

cfg.train_dataloader.batch_size = 2

cfg.train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations'),
    dict(type='Resize', scale=(512, 512), keep_ratio=True),
    dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PackSegInputs')
]

cfg.test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='Resize', scale=(512, 512), keep_ratio=True),
    # add loading annotation after ``Resize`` because ground truth
    # does not need to do resize data transform
    dict(type='LoadAnnotations'),
    dict(type='PackSegInputs')
]


cfg.train_dataloader.dataset.type = cfg.dataset_type
cfg.train_dataloader.dataset.data_root = cfg.data_root
cfg.train_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)
cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline
cfg.train_dataloader.dataset.ann_file = '/workspaces/YJ_661_final_project/data/splits/train.txt'

cfg.val_dataloader.dataset.type = cfg.dataset_type
cfg.val_dataloader.dataset.data_root = cfg.data_root
cfg.val_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)
cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline
cfg.val_dataloader.dataset.ann_file = '/workspaces/YJ_661_final_project/data/splits/val.txt'

cfg.test_dataloader = cfg.val_dataloader


# Load the pretrained weights
cfg.load_from = 'pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './work_dirs/unet_try'

cfg.train_cfg.max_iters = 400
cfg.train_cfg.val_interval = 200
cfg.default_hooks.logger.interval = 10
cfg.default_hooks.checkpoint.interval = 200

# Set seed to facilitate reproducing the result
cfg['randomness'] = dict(seed=0)

# Let's have a look at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
crop_size = (
    256,
    256,
)
data_preprocessor = dict(
    bgr_to_rgb=True,
    mean=[
        123.675,
        116.28,
        103.53,
    ],
    pad_val=0,
    seg_pad_val=255,
    size=(
        256,
        256,
    ),
    std=[
        58.395,
        57.12,
        57.375,
    ],
    type='SegDataPreProcessor')
data_root = '/workspaces/YJ_661_final_project/data'
dataset_type = 'BOE_Chiu_Dataset'
default_hooks = dict(
    checkpoint=dict(by_epoch=False, interval=200, type='CheckpointHook'),
    logger=dict(interval=10, log_metric_by_epoch=False, type='LoggerHook'),
    param_scheduler=dict(type='ParamSchedulerHook'),
    sampler_seed=dict(type='DistSamplerSeedHook'),
    timer=dict(type='IterTimerHook'),
    visualization=dict(type='SegVisualizationHook'))
default_scope = 'mmseg'
env_cfg = dict(
    cudnn_benchmark=True,
    dist_cfg=dict(backend='nccl'),
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
img_ratios = [
    0.5,
    0.75,
    1.0,
    1.25

In [8]:
from mmengine.runner import Runner

runner = Runner.from_cfg(cfg)

12/06 03:27:04 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 0
    GPU 0: Tesla V100-PCIE-16GB
    CUDA_HOME: None
    GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
    PyTorch: 2.1.1+cu121
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=comput



12/06 03:27:06 - mmengine - [4m[97mINFO[0m - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
12/06 03:27:06 - mmengine - [4m[97mINFO[0m - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) RuntimeInfoHook                    
(BELOW_NORMAL) LoggerHook                         
 -------------------- 
before_train:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(VERY_LOW    ) CheckpointHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(NORMAL      ) DistSamplerSeedHook                
 -------------------- 
before_train_iter:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
 -------------------- 
after_train_iter:
(VERY_HIGH   ) Runti



In [9]:
# start training
runner.train()

Loads checkpoint by local backend from path: pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth
12/06 03:27:07 - mmengine - [4m[97mINFO[0m - Load checkpoint from pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth
12/06 03:27:07 - mmengine - [4m[97mINFO[0m - Checkpoints will be saved to /workspaces/YJ_661_final_project/src/work_dirs/unet_try.


ValueError: only one element tensors can be converted to Python scalars

In [None]:
from mmseg.apis import init_model, inference_model, show_result_pyplot

# Init the model from the config and the checkpoint
checkpoint_path = './work_dirs/unet_try/iter_200.pth'
model = init_model(cfg, checkpoint_path, 'cuda:0')

In [None]:
# for i in range(110):
#     img = mmcv.imread(f'/workspaces/YJ_661_final_project/data/img_for_inference_2d/img_{i}.png')
#     result = inference_model(model, img)
#     plt.figure(figsize=(8, 6))
#     vis_result = show_result_pyplot(model, img, result)
#     plt.imshow(mmcv.bgr2rgb(vis_result))