# Setup

Ensure all dependencies are installed.


In [1]:
%pip install -U -r requirements.txt

Looking in links: https://download.pytorch.org/whl/torch/, https://download.pytorch.org/whl/torchvision/
Collecting numpy (from -r requirements.txt (line 10))
  Using cached numpy-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting torch-points3d (from -r requirements.txt (line 18))
  Using cached torch_points3d-1.3.0-py3-none-any.whl.metadata (26 kB)
Collecting gdown<4.0.0,>=3.12.0 (from torch-points3d->-r requirements.txt (line 18))
  Using cached gdown-3.15.0.tar.gz (10 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting hydra-core<0.12.0,>=0.11.2 (from torch-points3d->-r requirements.txt (line 18))
  Using cached hydra_core-0.11.3-py3-none-any.whl.metadata (2.2 kB)
Collecting numba<0.51.0,>=0.50.0 (from torch-points3d->-r requirements.txt (line 18))
  Using cached numba-0.50.1.tar.gz (2.0 MB)
  Preparing metad

Check if we have GPU support, and if not, warn the user.


In [None]:
import torch
import warnings

if torch.cuda.is_available():
    print("CUDA is available!")
else:
    warnings.warn("CUDA is not available. Running on CPU.")

CUDA is available!


# Settings

All our settings are here for convenience.


In [1]:
from pole_gen.models import UtilityPoleLabel
import secrets
import numpy as np
import random
import torch
import warnings
from utils.logging import warning_format
import os

# ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤
# ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ No need to modify anything above here! ◢◤ ◢◤ ◢◤ ◢◤ ◢◤
# ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤

debug: bool = True
n_points: int = 1000
classes: list = [l.name for l in UtilityPoleLabel]
n_classes: int = len(classes)
seed: int = secrets.randbits(32)
torch.set_float32_matmul_precision("medium")

# ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤
# ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ No need to modify anything below here! ◢◤ ◢◤ ◢◤ ◢◤ ◢◤
# ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤ ◢◤

if debug:
    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
    torch.autograd.set_detect_anomaly(True)

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
print(f"Seed: {seed}")

warnings.formatwarning = warning_format

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Seed: 1626838268


# Preparing our Data


## Training & Evaluation Data

This data is procedurally generated.


In [2]:
import os
from pole_gen.data import generate_data
from models.dataset import PointCloudDataset

out_path: str = os.path.join("data", "train")
eval_split: float = 0.2

if not os.path.exists(out_path) or len(os.listdir(out_path)) == 0:
    print("Directory is empty or does not exist. New testing data will be generated.")
    generate_data(
        n_samples=1000,
        n_points=n_points,
        out_dir=out_path,
        jitter=0.02,
    )
else:
    print("Data directory found. Using existing training data.")

file_paths = [os.path.join(out_path, f) for f in os.listdir(out_path)]
generated_dataset = PointCloudDataset(
    file_paths=file_paths,
    n_points=n_points,
    n_classes=n_classes,
)

generated_dataset.validate()

# Split the dataset into training and validation sets
print("Splitting dataset into training and validation sets...")
train_size = int((1 - eval_split) * len(generated_dataset))
val_size = len(generated_dataset) - train_size
train_dataset, eval_dataset = torch.utils.data.random_split(
    generated_dataset, [train_size, val_size]
)

del generated_dataset


print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(eval_dataset)}")

Data directory found. Using existing training data.


Checking dataset...:   0%|          | 0/1000 [00:00<?, ?it/s]

Splitting dataset into training and validation sets...
Training dataset size: 800
Validation dataset size: 200


## Testing Data

This data is manually labeled, real-world laser scanned data. We will fetch this remotely if needed.


In [3]:
import os
from electrical_poles.data import download_data
from models.dataset import PointCloudDataset

test_data_path = os.path.join("data", "test")

if not os.path.exists(test_data_path) or len(os.listdir(test_data_path)) == 0:
    print(
        "Testing data directory is empty or does not exist. New testing data will be downloaded."
    )
    download_data(out_dir=test_data_path)
else:
    print("Testing data directory found. Using existing testing data.")

file_paths = [os.path.join(test_data_path, f) for f in os.listdir(test_data_path)]
test_dataset = PointCloudDataset(
    file_paths=file_paths,
    n_points=n_points,
    n_classes=n_classes,
)

test_dataset.validate()

print(f"Testing dataset size: {len(test_dataset)}")

Testing data directory found. Using existing testing data.


Checking dataset...:   0%|          | 0/91 [00:00<?, ?it/s]

Testing dataset size: 91


# Segmentation

Now we can train our segmenter with our data, or load a pre-existing one if needed.


## Training


In [None]:
from ai.pointnet_seg.train import train
from ai.pointnet_seg.model import PointNetSeg
from torch.utils.data import DataLoader

segmenter = PointNetSeg(classes=n_classes)

train(
    pointnet=segmenter,
    optimizer=torch.optim.Adam(segmenter.parameters(), lr=0.001),
    train_data=DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4),
    eval_data=DataLoader(eval_dataset, batch_size=32, shuffle=False, num_workers=4),
    out_dir="data/pointnet/checkpoints",
)

[1,     1] loss: 0.223
[1,     2] loss: 0.210
[1,     3] loss: 0.195
[1,     4] loss: 0.185
[1,     5] loss: 0.171
[1,     6] loss: 0.171
[1,     7] loss: 0.161
[1,     8] loss: 0.158
[1,     9] loss: 0.155
[1,    10] loss: 0.141
[1,    11] loss: 0.140
[1,    12] loss: 0.132
[1,    13] loss: 0.132
[1,    14] loss: 0.125
[1,    15] loss: 0.119
[1,    16] loss: 0.118
[1,    17] loss: 0.116
[1,    18] loss: 0.113
[1,    19] loss: 0.104
[1,    20] loss: 0.107
[1,    21] loss: 0.107
[1,    22] loss: 0.106
[1,    23] loss: 0.107
[1,    24] loss: 0.108
[1,    25] loss: 0.096
predicted shape: torch.Size([32, 1000]), labels shape: torch.Size([32, 1000])
predicted shape: torch.Size([32, 1000]), labels shape: torch.Size([32, 1000])
predicted shape: torch.Size([32, 1000]), labels shape: torch.Size([32, 1000])
predicted shape: torch.Size([32, 1000]), labels shape: torch.Size([32, 1000])
predicted shape: torch.Size([32, 1000]), labels shape: torch.Size([32, 1000])
predicted shape: torch.Size([32, 10

## Testing

Test our data on our real-world dataset.


In [None]:
from ai.pointnet_seg.test import test
from torch.utils.data import DataLoader

test(
    model=segmenter,
    test_data=DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4),
)