# 第1步 导入相关库

In [1]:
import numpy as np
from pathlib import Path
from tqdm import tqdm
import pdal
import json
from sklearn.neighbors import KDTree
import pickle
from prepare_opengf import normalize_attributes
import os

# 第2步 编写prepare_opengf 文件执行入口

### 2.1 配置预处理参数

In [2]:
data_dict = {
    "data_root": "E:/OpenGF",  # 原始数据根目录
    "num_classes": 2,  # 点云标注的类别数
    "grid_size": 5.0,  # 降采样格网尺寸，1.0代表1m * 1m的范围内取一个点
    "HasIntensity": True,  # 数据集是否有intensity属性，有也可以填False，表示不使用，默认使用
    "HasRGB": False  # 数据集是否有RGB属性，有也可以填False，表示不使用
}

# 第3步 laz文件预处理

## 3.1 创建预处理文件存放目录获取所有待处理laz文件

In [3]:
data_dict["root"] = os.path.abspath(data_dict["data_root"])
processed_dir = os.path.join(data_dict["data_root"], f'processed-{data_dict["grid_size"]:.2f}')
if not os.path.exists(processed_dir):
    os.mkdir(processed_dir)
raw_root_dir = os.path.join(data_dict["data_root"], 'raw')
raw_root_dir = Path(raw_root_dir)
paths = []
for file_path in raw_root_dir.rglob('*.la[sz]'):
    if file_path.is_file():
        paths.append(file_path)
print(paths)

[WindowsPath('E:/OpenGF/raw/Test/T1.laz'), WindowsPath('E:/OpenGF/raw/Test/T2.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_1.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_10.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_11.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_12.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_13.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_14.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_15.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_16.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_17.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_18.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_19.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_2.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_3.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_4.laz'), WindowsPath('E:/OpenGF/raw/Train/Metropolis/S1/S1_5.laz'), WindowsPath('E:/Open

## 3.2 将所有laz数据转为npy，并构建对应文件kdtree
1. 利用pdal库读取laz数据，并将采样，将原本1:others、2:ground的标签转为0:others，1:ground
2. 判断原始数据是否包含RGB和Intensity，有就将相关特征存入npy文件

In [4]:
class_counts = None
train = []
test = []
val = []
# 1. 首先每个文件都生成一个 json 的索引，统计 intensity 最大最小值
for raw_path in tqdm(paths, desc='Processing...'):
    raw_path = str(raw_path)
    name = os.path.splitext(os.path.basename(raw_path))[0]
    filename = name + '.npy'

    pth_path = os.path.join(processed_dir, filename)
    kdt_path = os.path.join(processed_dir, name + '_KDTree.pkl')

    if 'Train' in raw_path:
        train.append(name)
    elif 'Test' in raw_path:
        test.append(name)
    elif 'Validation' in raw_path:
        val.append(name)
    raw_path = raw_path.replace(os.sep, '/')

    pipeline = pdal.Pipeline()
    pipeline |= pdal.Reader.las(filename=raw_path)
    pipeline |= pdal.Filter.stats(dimensions='Intensity')
    pipeline |= pdal.Filter.voxelcenternearestneighbor(cell=data_dict["grid_size"])
    # outlier剔除场景中过高或过低的异常点
    pipeline |= pdal.Filter.outlier(method='statistical',
                                    multiplier=3.0)
    pipeline |= pdal.Filter.range(limits='Classification[1:2]')
    pipeline |= pdal.Filter.assign(value=["Classification = 0 WHERE Classification == 1",
                                          "Classification = 1 WHERE Classification == 2"])

    count = pipeline.execute()

    arrays = pipeline.arrays[0]
    metadata = pipeline.metadata['metadata']
    stats = metadata['filters.stats']['statistic'][0]

    bounds = metadata['readers.las']
    minx = bounds['minx']
    miny = bounds['miny']
    minz = bounds['minz']

    pos = np.concatenate([
        np.expand_dims(arrays['X'] - minx, 1),
        np.expand_dims(arrays['Y'] - miny, 1),
        np.expand_dims(arrays['Z'] - minz, 1)
    ], axis=-1).astype(np.float32)

    label = arrays['Classification']

    point_npy = np.zeros(pos.shape[0], dtype=[
        ('x', np.float32),  # X坐标 (浮点数)
        ('y', np.float32),  # Y坐标 (浮点数)
        ('z', np.float32),  # Z坐标 (浮点数)
        ('intensity', np.float32),  # 强度值 (浮点数)
        ('r', np.uint8),  # 红色分量 (0-255)
        ('g', np.uint8),  # 绿色分量 (0-255)
        ('b', np.uint8),  # 蓝色分量 (0-255)
        ('class', np.uint8)  # 类别分量 (int)
    ])

    if data_dict["HasRGB"]:
        red = arrays['Red']
        green = arrays['Green']
        blue = arrays['Blue']

        point_npy['r'] = red
        point_npy['g'] = green
        point_npy['b'] = blue

    point_npy['x'] = (arrays['X'] - minx).astype(np.float32)
    point_npy['y'] = (arrays['Y'] - miny).astype(np.float32)
    point_npy['z'] = (arrays['Z'] - minz).astype(np.float32)
    point_npy['class'] = label

    if data_dict["HasIntensity"]:
        stats = metadata['filters.stats']['statistic'][0]
        intensity = normalize_attributes(arrays['Intensity'], 0.0, (float)(stats['maximum']),
                                         stats['average'],
                                         stats['stddev'])
        point_npy['intensity'] = intensity.squeeze()


    np.save(pth_path, point_npy)

    kdtree = KDTree(pos)
    with open(kdt_path, 'wb') as f:
        pickle.dump(kdtree, f)

    _, counts = np.unique(label, return_counts=True)
    class_counts = class_counts + counts if class_counts is not None else counts

Processing...: 100%|██████████| 162/162 [03:59<00:00,  1.48s/it]


## 3.3 将train、validation、test目录下文件的名称以及样本类别数量写入metadata.json文件

In [5]:
splits = {
    'train': train,
    'val': val,
    'test': test,
    'class_weights': class_counts.tolist(),
}
print(splits)
json_path = os.path.join(processed_dir, 'metadata.json')
with open(json_path, 'w') as f:
    json.dump(splits, f, indent=4)

{'train': ['S1_1', 'S1_10', 'S1_11', 'S1_12', 'S1_13', 'S1_14', 'S1_15', 'S1_16', 'S1_17', 'S1_18', 'S1_19', 'S1_2', 'S1_3', 'S1_4', 'S1_5', 'S1_6', 'S1_7', 'S1_8', 'S1_9', 'S2_1', 'S2_10', 'S2_11', 'S2_12', 'S2_13', 'S2_14', 'S2_15', 'S2_16', 'S2_17', 'S2_18', 'S2_19', 'S2_2', 'S2_3', 'S2_4', 'S2_5', 'S2_6', 'S2_7', 'S2_8', 'S2_9', 'S7_1', 'S7_2', 'S7_3', 'S7_4', 'S7_5', 'S7_6', 'S7_7', 'S7_8', 'S7_9', 'S8_1', 'S8_2', 'S8_3', 'S8_4', 'S8_5', 'S8_6', 'S8_7', 'S8_8', 'S8_9', 'S9_1', 'S9_10', 'S9_11', 'S9_12', 'S9_13', 'S9_14', 'S9_15', 'S9_16', 'S9_17', 'S9_18', 'S9_19', 'S9_2', 'S9_3', 'S9_4', 'S9_5', 'S9_6', 'S9_7', 'S9_8', 'S9_9', 'S3_1', 'S3_2', 'S3_3', 'S3_4', 'S3_5', 'S3_6', 'S3_7', 'S3_8', 'S3_9', 'S4_1', 'S4_10', 'S4_11', 'S4_12', 'S4_13', 'S4_14', 'S4_15', 'S4_16', 'S4_17', 'S4_18', 'S4_19', 'S4_2', 'S4_3', 'S4_4', 'S4_5', 'S4_6', 'S4_7', 'S4_8', 'S4_9', 'S5_1', 'S5_2', 'S5_3', 'S5_4', 'S5_5', 'S5_6', 'S5_7', 'S5_8', 'S5_9', 'S6_1', 'S6_10', 'S6_11', 'S6_12', 'S6_13', 'S6_14', 