# Setup

Ensure all dependencies are installed.


In [5]:
%pip install -U -r requirements.txt

Looking in links: https://download.pytorch.org/whl/torch/, https://download.pytorch.org/whl/torchvision/
Collecting pointnet (from -r requirements.txt (line 20))
  Cloning https://github.com/fxia22/pointnet.pytorch.git (to revision master) to /tmp/pip-install-2sa27mga/pointnet_232b62d7fb664c20b8ea6911589107f5
  Running command git clone --filter=blob:none --quiet https://github.com/fxia22/pointnet.pytorch.git /tmp/pip-install-2sa27mga/pointnet_232b62d7fb664c20b8ea6911589107f5
  Resolved https://github.com/fxia22/pointnet.pytorch.git to commit bafbf401e0af19be3262e448c59313fd2be0e421
  Preparing metadata (setup.py) ... [?25ldone
Collecting torch-tb-profiler (from -r requirements.txt (line 7))
  Downloading torch_tb_profiler-0.4.3-py3-none-any.whl.metadata (1.4 kB)
Collecting numpy (from -r requirements.txt (line 10))
  Using cached numpy-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting lightning (from -r requirements.txt (line 18))
  Using ca

Check if we have GPU support, and if not, warn the user.


In [6]:
import torch
import warnings

if torch.cuda.is_available():
    print("CUDA is available!")
else:
    warnings.warn("CUDA is not available. Running on CPU.")

CUDA is available!


# Settings

All our settings are here for convenience.


In [1]:
from pole_gen.models import UtilityPoleLabel
import secrets
import numpy as np
import random
import torch
import warnings
from utils.logging import warning_format

# Configuration
n_points: int = 1000
classes: list = [l.name for l in UtilityPoleLabel]
n_classes: int = len(classes)
seed: int = secrets.randbits(32)
torch.set_float32_matmul_precision("medium")

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
print(f"Seed: {seed}")

# Warning format
warnings.formatwarning = warning_format

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Seed: 3594080470


# Preparing our Data


## Training & Evaluation Data

This data is procedurally generated.


In [2]:
import os
from pole_gen.data import generate_data
from models.dataset import PointCloudDataset
from tqdm.auto import tqdm
import warnings

out_path: str = os.path.join("data", "train")
eval_split: float = 0.2

if not os.path.exists(out_path) or len(os.listdir(out_path)) == 0:
    print("Directory is empty or does not exist. New testing data will be generated.")
    os.makedirs(out_path)
    generate_data(
        n_samples=2000,
        n_points=n_points,
        out_dir=out_path,
        jitter=0.02,
    )
else:
    print("Data directory found. Using existing training data.")

generated_dataset = PointCloudDataset(
    file_paths=[os.path.join(out_path, f) for f in os.listdir(out_path)],
    n_points=n_points,
)

for sample in tqdm(generated_dataset, desc="Checking dataset..."):
    n = len(sample[0].numpy())
    if n != n_points:
        warnings.warn(
            f"A sample has {n} points (Expected {n_points}). Check if the data was generated correctly!"
        )
        break
    label = sample[1].numpy()[0]
    if label < 0 or label >= n_classes:
        warnings.warn(
            f"Invalid label {label} found in the dataset. Check if the data was generated correctly!"
        )
        break

# Split the dataset into training and validation sets
print("Splitting dataset into training and validation sets...")
train_size = int((1 - eval_split) * len(generated_dataset))
val_size = len(generated_dataset) - train_size
train_dataset, eval_dataset = torch.utils.data.random_split(
    generated_dataset, [train_size, val_size]
)

del generated_dataset


print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(eval_dataset)}")

Data directory found. Using existing training data.


Checking dataset...:   0%|          | 0/2000 [00:00<?, ?it/s]

Splitting dataset into training and validation sets...
Training dataset size: 1600
Validation dataset size: 400


## Testing Data

This data is manually labeled, real-world laser scanned data. We will fetch this remotely if needed.


In [3]:
import os
from electrical_poles.data import download_data
from models.dataset import PointCloudDataset
from tqdm.auto import tqdm

test_data_path = os.path.join("data", "test")

if not os.path.exists(test_data_path) or len(os.listdir(test_data_path)) == 0:
    print(
        "Testing data directory is empty or does not exist. New testing data will be downloaded."
    )
    os.makedirs(test_data_path)
    download_data(out_dir=test_data_path)
else:
    print("Testing data directory found. Using existing testing data.")

test_dataset = PointCloudDataset(
    file_paths=[os.path.join(test_data_path, f) for f in os.listdir(test_data_path)],
    n_points=n_points,
)

for sample in tqdm(test_dataset, desc="Checking dataset..."):
    n = len(sample[0].numpy())
    if n != n_points:
        warnings.warn(
            f"A sample has {n} points (Expected {n_points}). Check if the data was generated correctly!"
        )
        break
    label = sample[1].numpy()[0]
    if label < 0 or label >= n_classes:
        warnings.warn(
            f"Invalid label {label} found in the dataset. Check if the data was generated correctly!"
        )
        break

print(f"Testing dataset size: {len(test_dataset)}")

Testing data directory found. Using existing testing data.


Checking dataset...:   0%|          | 0/91 [00:00<?, ?it/s]

Testing dataset size: 91


# Training Segmentation

Now we can train our segmenter with our data, or load a pre-existing one if needed.


In [4]:
from utils.ai import train
from models.pointnet import PointNetLightningModel

model = train(
    model=PointNetLightningModel(task="seg", num_classes=n_classes),
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    test_dataset=test_dataset,
    checkpoint_path="data/pointnet/checkpoints",
    workers_per_dataloader=4,
)

Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name     | Type     | Params | Mode 
----------------------------------------------
0 | pointnet | PointNet | 1.7 M  | train
----------------------------------------------
1.7 M     Trainable params
0         Non-trainable params
1.7 M     Total params
6.683     Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

Training: |                                                                                                   …

../aten/src/ATen/native/cuda/Loss.cu:240: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [7,0,0] Assertion `t >= 0 && t < n_classes` failed.


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
