### Explore data

In [None]:
# -*- coding: utf-8 -*-

import laspy
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append("../src")
from preprocessing.summary_stats import summarize_las

In [None]:
# load file .las
las = laspy.read(r"../data/raw/raw_data.las")

# For laspy
summarize_las(las)

In [None]:
# stats
z = las.z
intensity = las.intensity
classification = las.classification

# instogram
unique_classes, counts = np.unique(classification, return_counts=True)
plt.bar(unique_classes, counts)
plt.xlabel("Classes")
plt.ylabel("Number of points")
plt.title("Distribution of LiDAR points by class")
plt.show()

In [None]:
# Check for outliers

# negative or extreme Z values
print("Z below 0:", (z < 0).sum())
print("Z above 4000m:", (z > 4000).sum())

# check for invalid intensity values
print("Intensity < 0:", (intensity < 0).sum())

### Preprocess pipeline to clean data

In [None]:
from preprocessing.preprocess import run_pdal_pipeline

pipeline = run_pdal_pipeline("../config/pdal_pipeline_downsample.json")

if pipeline.arrays:  # Check if pipeline executed successfully
    print(f"PDAL pipeline executed successfully!")
    print(f"Processed {len(pipeline.arrays[0])} points")
    print(f"Output metadata: {pipeline.metadata}")
else:
    print("PDAL pipeline execution failed")
    print(f"Error messages: {pipeline.error}")

In [None]:
# load file .las
las = laspy.read(r"../data/processed/raw_data_tscan_1m.las")

summarize_las(las)

### Create raw data from classified file

In [None]:
import json
import pdal

raw_las = "../data/raw/raw_data.las"
terr_las = "../data/ground_truth/classified.las"
out_las = "../data/raw/raw_data_tscan.las"   # comparable file

pipeline_json = {
    "pipeline": [
        {
            "type": "readers.las",
            "filename": terr_las
        },
        {
            "type": "filters.assign",
            "assignment": "Classification[:]=1"
        },
        {
            "type": "writers.las",
            "filename": out_las,
            "extra_dims": "all"
        }
    ]
}

p = pdal.Pipeline(json.dumps(pipeline_json))
p.execute()
print(f"Wrote comparable file: {out_las}")

### Summary stats

In [None]:
# load file .las
las = laspy.read("../data/processed/raw_data_downsampled_1m.las")

summarize_las(las)

In [None]:
# load file .las
las = laspy.read("../data/processed/raw_data_cleaned_1m.las")

summarize_las(las)

In [None]:
# load file .las
las = laspy.read("../data/processed/raw_data_cleaned_20m.las")

summarize_las(las)