In [None]:
import os

import numpy as np
import pandas as pd

from pointtorch.operations.numpy import make_labels_consecutive
from pointtorch import PointCloud, read


In [None]:
base_dir = '<insert path>'

datasets = {
    'TreeML': {
        '2023-01-09_5_1_37': {
            'file_path': '2023-01-09_5_1_37.csv',
            'street': '2023-01-09\_5\_1\_37',
            'part': '',
        },
        '2023-01-09_17_2_18': {
            'file_path': '2023-01-09_17_2_18.csv',
            'street': '2023-01-09\_17\_2\_18',
            'part': ''
        },
        '2023-01-10_7_6': {
            'file_path': '2023-01-10_7_6.csv',
            'street': '2023-01-10\_7\_6',
            'part': ''
        },
        '2023-01-12_35_34': {
            'file_path': '2023-01-12_35_34.csv',
            'street': '2023-01-12\_35\_34',
            'part': ''
        },
        '2023-01-12_65_64': {
            'file_path': '2023-01-12_65_64.csv',
            'street': '2023-01-12\_65\_64',
            'part': ''
        },
        '2023-01-16_44': {
            'file_path': '2023-01-16_44.csv',
            'street': '2023-01-16\_44',
            'part': ''
        },
        '2023-01-09_tum_campus': {
            'file_path': '2023-01-09_tum_campus.csv',
            'street': '2023-01-09\_tum\_campus',
            'part': ''
        },
        '2023-01-13_42': {
            'file_path': '2023-01-13_42.csv',
            'street': '2023-01-13\_42',
            'part': ''
        },
        '2023-01-13_61': {
            'file_path': '2023-01-13_61.csv',
            'street': '2023-01-13\_61',
            'part': ''
        },
        '2023-01-12_48': {
            'file_path': '2023-01-12_48.csv',
            'street': '2023-01-12\_48',
            'part': ''
        },
        '2023-01-12_58': {
            'file_path': '2023-01-12_58.csv',
            'street': '2023-01-12\_58',
            'part': ''
        },
        '2023-01-13_74': {
            'file_path': '2023-01-13_74.csv',
            'street': '2023-01-13\_74',
            'part': ''
        },
        '2023-01-13_4': {
            'file_path': '2023-01-13_4.csv',
            'street': '2023-01-13\_4',
            'part': ''
        },
        '2023-01-13_52': {
            'file_path': '2023-01-13_52.csv',
            'street': '2023-01-13\_52',
            'part': ''
        },
        '2023-01-12_28': {
            'file_path': '2023-01-12_28.csv',
            'street': '2023-01-12\_28',
            'part': ''
        },
        '2023-01-16_22': {
            'file_path': '2023-01-16_22.csv',
            'street': '2023-01-16\_22',
            'part': ''
        },
        '2023-01-12_57': {
            'file_path': '2023-01-12_57.csv',
            'street': '2023-01-12\_57',
            'part': ''
        },
        '2023-01-16_43': {
            'file_path': '2023-01-16_43.csv',
            'street': '2023-01-16\_43',
            'part': ''
        },
        '2023-01-16_12': {
            'file_path': '2023-01-16_12.csv',
            'street': '2023-01-16\_12',
            'part': ''
        },
        '2023-01-12_56': {
            'file_path': '2023-01-12_56.csv',
            'street': '2023-01-12\_56',
            'part': ''
        },
    },
    'Essen': {
        'altendorfer_part_1': {
            'file_path': 'Altendorfer_p1_min_1.csv',
            'street': 'Altendorfer Straße',
            'part': 'part 1',
        },
        'altendorfer_part_2': {
            'file_path': 'Altendorfer_p2_min_1.csv',
            'street': 'Altendorfer Straße',
            'part': 'part 2'
        },
        'altenessener_part_4': {
            'file_path': 'Essen3_p2_min_1.csv',
            'street': 'Altenessener Straße',
            'part': 'part 4'
        },
        'altenessener_part_5': {
            'file_path': 'Essen3_p3_min_1.csv',
            'street': 'Altenessener Straße',
            'part': 'part 5'
        }
    },
    'Hamburg': {
        'armgart_straße_part_1': {
            'file_path': '000274_v2_min_1.csv',
            'street': 'Armgartstraße',
            'part': 'part 1'
        },
        'armgart_straße_part_2': {
            'file_path': '000275_000276_min_1.csv',
            'street': 'Armgartstraße',
            'part': 'part 2'
        }
    }
}

In [None]:
for dataset in datasets:
    os.makedirs(os.path.join(base_dir, 'Data', dataset), exist_ok=True)
    for file_id, file_infos in datasets[dataset].items():
        print("Process", file_id)

        point_cloud = pd.read_csv(os.path.join(base_dir, 'Data', dataset, '2_semantic_segmentation', file_infos['file_path']))
        point_cloud = point_cloud.rename({'//X': 'X'}, axis=1)
        point_cloud = point_cloud.rename(str.lower, axis=1)

        columns_to_keep = ['x', 'y', 'z', 'instance_id', 'semclassidpredicted', 'specificclassidpredicted']

        if 'classification' in point_cloud.columns:
            columns_to_keep.append('classification')
        if 'semclassid' in point_cloud.columns:
            columns_to_keep.append('semclassid')
        if 'specificclassid' in point_cloud.columns:
            columns_to_keep.append('specificclassid')

        point_cloud = point_cloud[columns_to_keep]

        class_mapping = {
            (0, 0): 0,
            (1, 0): 0,
            (1, 1): 1,
            (1, 2): 2,
            (1, 3): 3,
            (2, 0): 4,
            (3, 0): 4,
            (4, 0): 4,
            (4, 1): 4
        }

        if dataset == "TreeML":
            target_class_mapping = {
                0: 0,
                1: 0,
                2: 2,
            }
            point_cloud["classification_target"] = np.vectorize(lambda x: target_class_mapping[x])(
                point_cloud["classification"].to_numpy()
            )
        else:
            point_cloud["classification_target"] = np.vectorize(lambda x, y: class_mapping[(x, y)])(
                point_cloud["semclassid"].to_numpy(), point_cloud["specificclassid"].to_numpy()
            )
        point_cloud["classification_prediction"] = np.vectorize(lambda x, y: class_mapping[(x, y)])(
            point_cloud["semclassidpredicted"].to_numpy(), point_cloud["specificclassidpredicted"].to_numpy()
        )
        point_cloud.loc[~(point_cloud["classification_target"].isin([1, 2, 3])), "instance_id"] = -1
        point_cloud["instance_id"] = make_labels_consecutive(point_cloud["instance_id"].to_numpy(), ignore_id=-1, inplace=True)

        data_dir = os.path.join(base_dir, 'Data', dataset, '3_semantic_segmentation_processed')
        os.makedirs(data_dir, exist_ok=True)
        columns_to_keep = ["x", "y", "z", "instance_id", "classification_target", "classification_prediction"]
        if "intensity" in point_cloud.columns:
            columns_to_keep.append("intensity")
        point_cloud = PointCloud(point_cloud, x_max_resolution=1e-6, y_max_resolution=1e-6, z_max_resolution=1e-6)
        point_cloud.to(os.path.join(data_dir, file_infos['file_path'].replace('.csv', '.laz')), columns=columns_to_keep)
