In [None]:
import os
os.chdir('mmsegmentation')
os.mkdir('checkpoint')
os.mkdir('outputs')
os.mkdir('data')

In [None]:
!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P checkpoint

In [None]:
import matplotlib 
import matplotlib.pyplot as plt
matplotlib.rc("font",family='SimHei')

In [None]:
plt.plot([1,2,3], [100,500,300])
plt.title('matplotlib中文字体测试', fontsize=25)
plt.xlabel('X轴', fontsize=15)
plt.ylabel('Y轴', fontsize=15)
plt.show()

探索数据集

In [None]:
import os

import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm

import matplotlib.pyplot as plt
%matplotlib inline
PATH_IMAGE = '../Watermelon87_Semantic_Seg_Mask/img_dir/train'
PATH_MASKS = '../Watermelon87_Semantic_Seg_Mask/ann_dir/train'

print('图像个数', len(os.listdir(PATH_IMAGE)))
print('标注个数', len(os.listdir(PATH_MASKS)))
#查看单张图像及其语义分割标注
# 指定图像文件名
file_name1 = '04_35-2.jpg'
file_name2 = '04_35-2.png'
img_path = os.path.join(PATH_IMAGE, file_name1)
mask_path = os.path.join(PATH_MASKS, file_name2)

print('图像路径', img_path)
print('标注路径', mask_path)
img = cv2.imread(img_path)
mask = cv2.imread(mask_path)
print("图像的shape:",img.shape)
# 可视化图像
plt.imshow(img)
plt.show()
print("mask的shape:",mask.shape)
# mask 语义分割标注，与原图大小相同，0 为 背景， 1 为 肾小球
np.unique(mask)
# 可视化语义分割标注
plt.imshow(mask[:,:,0])
plt.show()
# 可视化语义分割标注
plt.imshow(mask*255)
plt.show()

训练

In [2]:
import numpy as np
from PIL import Image

import os.path as osp
from tqdm import tqdm

import mmcv
import mmengine
import matplotlib.pyplot as plt
%matplotlib inline

import os

os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

# 类别和对应的颜色
classes = ('background', 'glomeruili')
palette = [[128, 128, 128], [151, 189, 8]]

from mmseg.registry import DATASETS
from mmseg.datasets import BaseSegDataset

@DATASETS.register_module()
class StanfordBackgroundDataset(BaseSegDataset):
  METAINFO = dict(classes = classes, palette = palette)
  def __init__(self, **kwargs):
    super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)



In [3]:
from mmengine import Config
cfg = Config.fromfile('./pspnet_config.py')

In [4]:
from mmengine.runner import Runner
from mmseg.utils import register_all_modules

# register all modules in mmseg into the registries
# do not init the default scope here because it will be init in the runner
register_all_modules(init_default_scope=False)
runner = Runner.from_cfg(cfg)

06/18 20:40:30 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.16 (main, Mar  8 2023, 14:00:05) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 0
    GPU 0: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr
    NVCC: Cuda compilation tools, release 11.5, V11.5.119
    GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04.1) 11.3.0
    PyTorch: 1.10.1+cu113
    PyTorch compiling details: PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.3
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_5



06/18 20:40:35 - mmengine - [4m[97mINFO[0m - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
06/18 20:40:35 - mmengine - [4m[97mINFO[0m - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) RuntimeInfoHook                    
(BELOW_NORMAL) LoggerHook                         
 -------------------- 
before_train:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(VERY_LOW    ) CheckpointHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(NORMAL      ) DistSamplerSeedHook                
 -------------------- 
before_train_iter:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
 -------------------- 
after_train_iter:
(VERY_HIGH   ) Runti



In [None]:

runner.train()

推理

In [None]:
from mmseg.apis import init_model, inference_model, show_result_pyplot
# 初始化模型
checkpoint_path = './mmsegmentation/iter_800.pth'
model = init_model(cfg, checkpoint_path, 'cuda:0')
#载入图像
img = mmcv.imread('./pic.jpeg')
result = inference_model(model, img)
visualization = show_result_pyplot(model, img, result, opacity=0.7, out_file='pred.jpg')

In [None]:
import cv2
video='./video.mp4'
output_file='./video_output.mp4'
output_fourcc='MJPG'
# build input video
cap = cv2.VideoCapture(video)
input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
input_fps = cap.get(cv2.CAP_PROP_FPS)

# init output video
writer = None
output_height = None
output_width = None
if output_file is not None:
    fourcc = cv2.VideoWriter_fourcc(*output_fourcc)
    output_fps = input_fps
    output_height = int(input_height)
    output_width = int(input_width)
    writer = cv2.VideoWriter(output_file, fourcc, output_fps,
                                (output_width, output_height), True)

# start looping
try:
    while True:
        flag, frame = cap.read()
        if not flag:
            break

        # test a single image
        result = inference_model(model, frame)

        # blend raw image and prediction
        draw_img = show_result_pyplot(model, frame, result)

        if writer:
            if draw_img.shape[0] != output_height or draw_img.shape[
                    1] != output_width:
                draw_img = cv2.resize(draw_img,
                                        (output_width, output_height))
            draw_img = cv2.flip(draw_img,-1)
            writer.write(draw_img)
finally:
    if writer:
        writer.release()
    cap.release()

选做

In [7]:
cfg_1 = Config.fromfile('./segformer_config.py')
register_all_modules(init_default_scope=False)
runner_1 = Runner.from_cfg(cfg_1)
runner_1.train()

06/18 20:46:04 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.16 (main, Mar  8 2023, 14:00:05) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 1629631404
    GPU 0: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr
    NVCC: Cuda compilation tools, release 11.5, V11.5.119
    GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04.1) 11.3.0
    PyTorch: 1.10.1+cu113
    PyTorch compiling details: PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.3
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,



06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.0.norm.weight:lr=6e-05
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.0.norm.weight:weight_decay=0.0
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.0.norm.weight:decay_mult=0.0
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.0.norm.bias:lr=6e-05
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.0.norm.bias:weight_decay=0.0
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.0.norm.bias:decay_mult=0.0
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.1.0.norm1.weight:lr=6e-05
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers.0.1.0.norm1.weight:weight_decay=0.0
06/18 20:46:36 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.layers

EncoderDecoder(
  (data_preprocessor): SegDataPreProcessor()
  (backbone): MixVisionTransformer(
    (layers): ModuleList(
      (0): ModuleList(
        (0): PatchEmbed(
          (projection): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
          (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
        )
        (1): ModuleList(
          (0): TransformerEncoderLayer(
            (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
            (attn): EfficientMultiheadAttention(
              (attn): MultiheadAttention(
                (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
              )
              (proj_drop): Dropout(p=0.0, inplace=False)
              (dropout_layer): DropPath()
              (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
              (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
            )
            (norm2): LayerNorm((64,), eps=1e-06, el