In [1]:
!pip install imgaug -i https://mirror.baidu.com/pypi/simple
!pip install paddlex -i https://mirror.baidu.com/pypi/simple

Looking in indexes: https://mirror.baidu.com/pypi/simple
Looking in indexes: https://mirror.baidu.com/pypi/simple


In [2]:
# 导入包
import matplotlib
import os
import paddlex as pdx
import paddle.fluid as fluid
# 设置使用0号GPU卡（如无GPU，执行此代码后仍然会使用CPU训练模型）
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [3]:
from paddlex.seg import transforms  # 语义分割
import imgaug.augmenters as iaa
# 图像预处理和增强https://blog.csdn.net/weixin_43593330/article/details/107206239
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),   # 以一定概率对图像进行水平翻转，参数prob（float）：随机水平翻转概率，默认0.5
    transforms.Resize(target_size=300),  # 调整图像大小
    transforms.RandomPaddingCrop(crop_size=256),  # 随机裁剪，当所需要的裁剪尺寸大于原图，则进行padding操作
    transforms.RandomBlur(prob=0.1),  # 以一定概率对图像进行高斯模糊，参数prob（float）：图像模糊概率，默认0.1
    transforms.RandomRotate(rotate_range=15),  # 对图像进行随机旋转，当存在标注图像时，同步进行，并对旋转后的图像进行padding
    transforms.RandomDistort(brightness_range=0.5),  # 以一定概率对图像进行随机像素内容变换，该方法必须在Normalize之前使用
    transforms.Normalize()  # 归一化
])

eval_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.Normalize()
])

In [4]:
# 数据集的解压
#!unzip data/data58325/img_testA.zip
#!unzip data/data58325/lab_train.zip
#!unzip data/data58325/img_train.zip

In [5]:
import numpy as np

datas = []
# 定义训练图片和标签
image_base = 'img_train'
annos_base = 'lab_train'
# os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表。这个列表以字母顺序。 它不包括 '.' 和'..' 即使它在文件夹中
ids_ = [v.split('.')[0] for v in os.listdir(image_base)]

for id_ in ids_:
    img_pt0 = os.path.join(image_base, '{}.jpg'.format(id_))  # os.path.join() 方法把目录和文件名合成一个路径
    img_pt1 = os.path.join(annos_base, '{}.png'.format(id_))
    datas.append((img_pt0.replace('/home/aistudio/work/', ''), img_pt1.replace('/home/aistudio/work/', '')))  # 将 img_pt0和 img_pt1连接
    if os.path.exists(img_pt0) and os.path.exists(img_pt1):  # os.path.exists（）路径存在则返回True,路径损坏返回False
        pass
    else:
        raise "path invalid!"

print('total:', len(datas))
print(datas[0][0])
print(datas[0][1])

data_dir = '/home/aistudio/work/'

total: 145981
img_train/T118288.jpg
lab_train/T118288.png


In [6]:
import numpy as np
# 给图像标签赋予含义
labels = [
    '建筑', '耕地', '林地',
    '水体', '道路', '草地',
    '其他'
]
# 标签写入labels.txt文件
with open('labels.txt', 'w') as f:
    for v in labels:
        f.write(v+'\n')
# 将数据进行打乱
np.random.seed(5)
np.random.shuffle(datas)
# 将数据划分为训练集和测试集
split_num = int(0.4*len(datas))

datas = datas[:-split_num]
split_num = int(0.2*len(datas))

train_data = datas[:-split_num]
valid_data = datas[-split_num:]
# 将训练集写入train_list.txt
with open('train_list.txt', 'w') as f:
    for img, lbl in train_data:
        f.write(img + ' ' + lbl + '\n')
# 将测试集写入valid_list.txt
with open('valid_list.txt', 'w') as f:
    for img, lbl in valid_data:
        f.write(img + ' ' + lbl + '\n')

print('train:', len(train_data))
print('valid:', len(valid_data))

train: 70072
valid: 17517


In [7]:
data_dir = './'
# paddlex.datasets的说明文档百度：https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html  
# paddlex.datasets.SegDataset()用于语义分割模型
train_dataset = pdx.datasets.SegDataset(
    data_dir=data_dir,  # 数据集路径
    file_list='train_list.txt',  # 描述数据集图片文件和对应标注文件的文件路径
    label_list='labels.txt',  # 描述数据集包含的类别信息文件路径
    transforms=train_transforms,  # 数据集中每个样本的预处理/增强算子
    shuffle=True)  #是否需要对数据集中样本打乱顺序。默认为False
    
eval_dataset = pdx.datasets.SegDataset(
    data_dir=data_dir,
    file_list='valid_list.txt',
    label_list='labels.txt',
    transforms=eval_transforms)

2020-11-27 18:33:59 [INFO]	70072 samples in file train_list.txt
2020-11-27 18:33:59 [INFO]	17517 samples in file valid_list.txt


In [None]:
num_classes = len(train_dataset.labels)
#regularizer_1=fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.1)
# 构建DeepLabv3p分割器 链接：https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html
# 模型1
'''model = pdx.seg.DeepLabv3p(
    num_classes=num_classes,  # 类别数
    backbone='MobileNetV3_large_x1_0_ssld',   #  DeepLabv3+的backbone网络，实现特征图的计算，取值范围为[‘Xception65’, ‘Xception41’, ‘MobileNetV2_x0.25’, ‘MobileNetV2_x0.5’, ‘MobileNetV2_x1.0’, ‘MobileNetV2_x1.5’, ‘MobileNetV2_x2.0’, ‘MobileNetV3_large_x1_0_ssld’]，默认值为’MobileNetV2_x1.0’
)'''
# 模型2
# model = pdx.seg.FastSCNN(num_classes=num_classes)
# 模型3
model = pdx.seg.HRNet(num_classes=num_classes,width=40)
# 模型4
# model = pdx.seg.UNet(num_classes=num_classes,upsample_mode='bilinear')
model.train(
    num_epochs=15,  # 训练迭代轮数
    train_dataset=train_dataset,  # 训练数据读取器
    train_batch_size=16,  #训练数据batch大小，同时作为验证数据batch大小。默认2
    eval_dataset=eval_dataset,  # 评估数据读取器
    save_interval_epochs=1,  # 模型保存间隔，默认1
    save_dir='output1/deeplab',  # 模型保存路径
    log_interval_steps=200,  # 训练日志输出间隔，默认2
    pretrain_weights='COCO',
    #optimizer =fluid.optimizer.MomentumOptimizer(learning_rate=0.0028, momentum=0.9, use_nesterov=True, regularization=fluid.regularizer.L1DecayRegularizer(0.2)),
    learning_rate=0.00256,
    lr_decay_power=0.975,
    eval_metric_loss=0.012
    )  # 为路径时，则加载路径下预训练模型



This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io

2020-11-27 18:34:22 [INFO]	Connecting PaddleHub server to get pretrain weights...


In [None]:
# DeepLabv3p模型评估接口。
model.evaluate(eval_dataset, batch_size=1, epoch_id=None, return_details=False)
# eval_dataset (paddlex.datasets): 评估数据读取器。
# batch_size (int): 评估时的batch大小。默认1。
# epoch_id (int): 当前评估模型所在的训练轮数。
# return_details (bool): 是否返回详细信息。默认False。

In [None]:
# 使用模型
model = pdx.load_model('./output1/deeplab/best_model')

In [None]:
from tqdm import tqdm  # tqdm 是一个快速，可扩展的Python进度条，可以在 Python 长循环中添加一个进度提示信息，用户只需要封装任意的迭代器 tqdm(iterator)。
import cv2

test_base = 'img_testA/'
out_base = 'ccf_baidu_remote_sense/results/'

if not os.path.exists(out_base):  # 判断out_base的路径是否存在
    os.makedirs(out_base)  # os.makedirs() 方法用于递归创建目录


for im in tqdm(os.listdir(test_base)):  # os.listdir()返回path指定的文件夹包含的文件或文件夹的名字的列表
    if not im.endswith('.jpg'):  # 判断文件名后缀
        continue
    pt = test_base + im
    #result = model.overlap_tile_predict(pt, tile_size=[512, 512], pad_size=[64, 64], batch_size=16)
    result = model.predict(pt)  # 使用训练好的模型
    cv2.imwrite(out_base+im.replace('jpg', 'png'), result['label_map'])  #  cv2.imwrite()保存图像

请点击[此处](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576)查看本环境基本用法.  <br>
Please click [here ](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576) for more detailed instructions. 