# Setup

Ensure all dependencies are installed.


In [1]:
%pip install -U -r requirements.txt

Collecting numpy (from -r requirements.txt (line 5))
  Using cached numpy-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting mlflow (from -r requirements.txt (line 14))
  Using cached mlflow-2.20.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.20.0 (from mlflow->-r requirements.txt (line 14))
  Using cached mlflow_skinny-2.20.0-py3-none-any.whl.metadata (31 kB)
Using cached mlflow-2.20.0-py3-none-any.whl (28.3 MB)
Using cached mlflow_skinny-2.20.0-py3-none-any.whl (6.0 MB)
[0mInstalling collected packages: mlflow-skinny, mlflow
  Attempting uninstall: mlflow
    Found existing installation: mlflow 2.19.0
    Uninstalling mlflow-2.19.0:
      Successfully uninstalled mlflow-2.19.0
[0mSuccessfully installed mlflow-2.20.0 mlflow-skinny-2.20.0
Note: you may need to restart the kernel to use updated packages.


# AI Model


## Settings

Our main settings are found in the config.yaml file, but you can also modify values here for convenience.


In [2]:
from pole_gen.models import UtilityPoleLabel
import numpy as np
import random
import warnings
import torch
from utils.logging import warning_format
from utils.config import *

torch.set_float32_matmul_precision("medium")

warnings.formatwarning = warning_format

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
print(f"Seed: {SEED}")

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Seed: 42


## Load Testing Data

This data is manually labeled, real-world laser scanned data. We will fetch this remotely if needed.


In [3]:
import os
from electrical_poles.data import download_data
from models.dataset import PointCloudDataset
from torch.utils.data import DataLoader

needs_validation = False
if not os.path.exists(TEST_DATA_PATH) or len(os.listdir(TEST_DATA_PATH)) == 0:
    print(
        "Testing data directory is empty or does not exist. New testing data will be downloaded."
    )
    download_data(out_dir=TEST_DATA_PATH, n_points=N_POINTS)
    needs_validation = True
else:
    print("Testing data directory found. Using existing testing data.")

test_dataset = PointCloudDataset(
    file_paths=[os.path.join(TEST_DATA_PATH, f) for f in os.listdir(TEST_DATA_PATH)],
    n_points=N_POINTS,
    n_classes=N_CLASSES,
)

if needs_validation:
    test_dataset.validate()

test_dataloader = DataLoader(
    test_dataset,
    batch_size=TEST_DATA_BATCH_SIZE,
    shuffle=False,
    num_workers=TEST_DATA_WORKERS,
    persistent_workers=True,
)

print(f"Testing dataset size: {len(test_dataset)}")

Testing data directory found. Using existing testing data.
Testing dataset size: 91


## Load our model

If you don't have a valid model ready, use train.py to train one!


In [4]:
import torch
from ai.pointnet_seg.model import PointNetSeg
from utils.config import *

segmenter = PointNetSeg.load_from_checkpoint(TRAIN_BEST_MODEL_PATH)
print("Model loaded")

Model loaded


## Testing

Test our data on our real-world dataset.


In [5]:
import pytorch_lightning as L

trainer = L.Trainer()
trainer.test(model=segmenter, dataloaders=test_dataloader)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/jovyan/research-project/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: |                                                                                             | 0/? […

OutOfMemoryError: CUDA out of memory. Tried to allocate 128.00 MiB. GPU 0 has a total capacity of 14.57 GiB of which 22.75 MiB is free. Process 1064099 has 14.12 GiB memory in use. Process 1117677 has 432.00 MiB memory in use. Of the allocated memory 175.15 MiB is allocated by PyTorch, and 128.85 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# Demo

A little section here to show its labeling in action!


In [10]:
import numpy as np
import open3d as o3d
from utils.plot import point_cloud_figure
from utils.string import format_accuracy
from plotly.subplots import make_subplots

segmenter.eval()
with torch.no_grad():
    inputs, labels = test_dataset[np.random.randint(0, len(test_dataset))]
    inputs = inputs.float()
    outputs, __, __ = segmenter(inputs.unsqueeze(0))
    _, predicted = torch.max(outputs.data, 1)

# Predicted point cloud
pr_pc = o3d.t.geometry.PointCloud()
pr_pc.point.positions = o3d.core.Tensor(
    inputs.squeeze(0).numpy(), dtype=o3d.core.Dtype.Float32
)
pr_pc.point.labels = predicted.squeeze(0).numpy()

# Ground truth point cloud
gt_pc = o3d.t.geometry.PointCloud()
gt_pc.point.positions = o3d.core.Tensor(
    inputs.squeeze(0).numpy(), dtype=o3d.core.Dtype.Float32
)
gt_pc.point.labels = labels.squeeze(0).numpy()

accuracy = (predicted == labels).sum().item() / N_POINTS
print(f"Accuracy: {format_accuracy(accuracy)}")
pred_fig = point_cloud_figure(
    pr_pc,
    xaxis=[-1.0, 1.0],
    yaxis=[-1.0, 1.0],
    zaxis=[-1.0, 1.0],
    title="Predicted",
    cmax=N_CLASSES - 1,
)
gt_fig = point_cloud_figure(
    gt_pc,
    xaxis=[-1.0, 1.0],
    yaxis=[-1.0, 1.0],
    zaxis=[-1.0, 1.0],
    title="Ground Truth",
    cmax=N_CLASSES - 1,
)

fig = make_subplots(
    rows=1, cols=2, specs=[[{"type": "scatter3d"}, {"type": "scatter3d"}]]
)
for trace in pred_fig.data:
    fig.add_trace(trace, row=1, col=1)
for trace in gt_fig.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(
    template="plotly_dark",
    plot_bgcolor="rgba(0,0,0,0)",
    paper_bgcolor="rgba(0,0,0,0)",
)
fig.update_scenes(
    xaxis=dict(range=[-1.0, 1.0], autorange=False),
    yaxis=dict(range=[-1.0, 1.0], autorange=False),
    zaxis=dict(range=[-1.0, 1.0], autorange=False),
    aspectmode="cube",
)

fig.show()

Accuracy: [38;2;23;255;0m98.14%[0m
