In [1]:
# !tar -xvzf dataset.tar.gz

In [2]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip uninstall torch-scatter torch-sparse torch-cluster -y
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-2.2.1+cu121.html --no-cache-dir
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-2.2.1+cu121.html --no-cache-dir
!pip install -q torch-cluster -f https://pytorch-geometric.com/whl/torch-2.2.1+cu121.html --no-cache-dir
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install matplotlib pyvis torchmetrics
!pip install laspy
!pip install hydra-core 


Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.17.2%2Bcu121-cp38-cp38-linux_x86_64.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchaudio
  Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.2.2%2Bcu121-cp38-cp38-linux_x86_64.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting torch
  Downloading https://download.pytorch.org/whl/cu121/torch-2.2.2%2Bcu121-cp38-cp38-linux_x86_64.whl (757.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m757.3/757.3 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Downloading https://download.pytorch.org/whl/pillow-10.2.0-

In [3]:
!nvidia-smi

Tue Apr 16 12:46:20 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.116.04   Driver Version: 525.116.04   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro P4000        Off  | 00000000:00:05.0 Off |                  N/A |
| 46%   39C    P8     5W / 105W |      2MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
from glob import glob
import os.path as osp
import os
import laspy as lp
import numpy as np
import hydra
from tqdm.auto import tqdm


import torch
import torch.nn.functional as F
from torchmetrics.functional import jaccard_index

import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
# from torch.utils.data import DataLoader

from torch_geometric.nn import MLP, PointNetConv, fps, global_max_pool, radius,knn_interpolate
from torch_geometric.typing import WITH_TORCH_CLUSTER
from torch_geometric.utils import scatter

os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.2.2+cu121


In [5]:
if not WITH_TORCH_CLUSTER:
    quit("This example requires 'torch-cluster'")

## Define PointNet++ Segmentation model

In [56]:
class SAModule(torch.nn.Module):
    def __init__(self, ratio, r, nn):
        super().__init__()
        self.ratio = ratio
        self.r = r
        self.conv = PointNetConv(nn, add_self_loops=False)

    def forward(self, x, pos, batch):
        idx = fps(pos, batch, ratio=self.ratio)
        
        row, col = radius(pos, pos[idx], self.r, batch, batch[idx],
                          max_num_neighbors=64)
        edge_index = torch.stack([col, row], dim=0)
        x_dst = None if x is None else x[idx]
        
        print(f"idx shape: {idx.shape}")
        print(f"x shape: {x.shape}")
        print(f"pos shape: {pos.shape}")
        print(f"x_dst shape: {x_dst.shape}")
        print(f"edge_index shape: {edge_index.shape}")
        
        x = self.conv((x, x_dst), (pos, pos[idx]), edge_index)
        pos, batch = pos[idx], batch[idx]
        return x, pos, batch


In [7]:
class GlobalSAModule(torch.nn.Module):
    def __init__(self, nn):
        super().__init__()
        self.nn = nn

    def forward(self, x, pos, batch):
        x = self.nn(torch.cat([x, pos], dim=1))
        x = global_max_pool(x, batch)
        pos = pos.new_zeros((x.size(0), 3))
        batch = torch.arange(x.size(0), device=batch.device)
        return x, pos, batch

In [8]:
class FPModule(torch.nn.Module):
    def __init__(self, k, nn):
        super().__init__()
        self.k = k
        self.nn = nn

    def forward(self, x, pos, batch, x_skip, pos_skip, batch_skip):
        x = knn_interpolate(x, pos, pos_skip, batch, batch_skip, k=self.k)
        if x_skip is not None:
            x = torch.cat([x, x_skip], dim=1)
        x = self.nn(x)
        return x, pos_skip, batch_skip

In [57]:
class Net(torch.nn.Module):
    def __init__(self, num_classes , num_features):
        super().__init__()

        # Input channels account for both `pos` and node features.
        # self.sa1_module = SAModule(0.2, 0.2, MLP([3 + 3, 64, 64, 128]))
        self.sa1_module = SAModule(0.2, 0.2, MLP([3 + num_features, 64, 64, 128]))
        self.sa2_module = SAModule(0.25, 0.4, MLP([128 + 3, 128, 128, 256]))
        self.sa3_module = GlobalSAModule(MLP([256 + 3, 256, 512, 1024]))

        self.fp3_module = FPModule(1, MLP([1024 + 256, 256, 256]))
        self.fp2_module = FPModule(3, MLP([256 + 128, 256, 128]))
        self.fp1_module = FPModule(3, MLP([128 + num_features, 128, 128, 128]))

        self.mlp = MLP([128, 128, 128, num_classes], dropout=0.5, norm=None)

        # self.lin1 = torch.nn.Linear(128, 128)
        # self.lin2 = torch.nn.Linear(128, 128)
        # self.lin3 = torch.nn.Linear(128, num_classes)

    def forward(self, data):
        sa0_out = (data.x, data.pos, data.batch)
        sa1_out = self.sa1_module(*sa0_out)
        sa2_out = self.sa2_module(*sa1_out)
        sa3_out = self.sa3_module(*sa2_out)

        fp3_out = self.fp3_module(*sa3_out, *sa2_out)
        fp2_out = self.fp2_module(*fp3_out, *sa1_out)
        x, _, _ = self.fp1_module(*fp2_out, *sa0_out)

        return self.mlp(x).log_softmax(dim=-1)

## Preprocessing data

In [10]:
#Get all las file for training 
TRAIN_DIR = "dales_las/train/" 
TEST_DIR = "dales_las/test/"

all_train_files = os.listdir(osp.abspath(TRAIN_DIR))
all_test_files  = os.listdir(osp.abspath(TEST_DIR))

In [11]:
pc = lp.read(os.path.join(TRAIN_DIR, all_train_files[0])) 
for dimension in pc.point_format.dimensions:
    print(dimension.name)

X
Y
Z
intensity
return_number
number_of_returns
scan_direction_flag
edge_of_flight_line
classification
synthetic
key_point
withheld
scan_angle_rank
user_data
point_source_id
gps_time


In [50]:
def convertData(pc):
    #Define preprocessing steps
    num_to_load = 32000
    transform = T.Compose([
        T.RandomJitter(0.01),
        T.RandomRotate(15, axis=0),
        T.RandomRotate(15, axis=1),
        T.RandomRotate(15, axis=2)
        ])
    pre_transform = T.NormalizeScale()
    
    #Get imformation from las file
    coords = np.vstack((pc.x, pc.y, pc.z)).transpose()    
    scales = pc.header.scales
    offsets= pc.header.offsets    
    scaled_coords = (coords * scales) + offsets
    
    labels = pc.classification.array
    features = pc.intensity if np.max(pc.intensity) > 0 else np.ones_like(pc.intensity, dtype = np.uint8)
    if len(features.shape) <2: 
        features = features[:, np.newaxis]
    num_classes = len(np.unique(labels))
    

    
    
    print(features.shape)
    print(coords.shape)
    print(scaled_coords.shape)
    print(labels.shape)
    print(np.unique(labels))
    return Data(x = torch.from_numpy(features[:num_to_load]), 
                pos = torch.from_numpy(scaled_coords[:num_to_load]).type(torch.FloatTensor), 
                y = torch.from_numpy(labels[:num_to_load]), 
                num_classes=num_classes,
                num_features = features.shape[-1],
               transform = transform, 
               pre_transform= pre_transform,
               batch = torch.from_numpy(np.zeros_like(features[:num_to_load],dtype = np.int64)).flatten()
               )

In [51]:
#EDA

pc = lp.read(os.path.join(TRAIN_DIR, all_train_files[0])) 
train_dataset = convertData(pc)
print(train_dataset)


(12954374, 1)
(12954374, 3)
(12954374, 3)
(12954374,)
[0 1 2 3 4 5 6 7 8]
Data(x=[32000, 1], y=[32000], pos=[32000, 3], num_classes=9, num_features=1, transform=Compose([
  RandomJitter(0.01),
  RandomRotate((-15, 15), axis=0),
  RandomRotate((-15, 15), axis=1),
  RandomRotate((-15, 15), axis=2)
]), pre_transform=NormalizeScale(), batch=[32000])


In [23]:
# train_dataset = ShapeNet(path, category, split='trainval', force_reload=True)
# test_dataset = ShapeNet(path, category, split='test',
#                         pre_transform=pre_transform)
train_loader = DataLoader(train_dataset, shuffle=True,  num_workers=6)
# test_loader = DataLoader(test_dataset, batch_size=12, shuffle=False,
#                          num_workers=6)

In [24]:
torch.cuda.empty_cache()

In [58]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(train_dataset.num_classes, train_dataset.num_features).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

In [59]:
model.train()
data = train_dataset
total_loss = correct_nodes = total_nodes = 0
data = data.to(device)
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out, data.y)
loss.backward()
optimizer.step()
total_loss += loss.item()
correct_nodes += out.argmax(dim=1).eq(data.y).sum().item()
total_nodes += data.num_nodes

idx shape: torch.Size([6400])
x shape: torch.Size([32000, 1])
pos shape: torch.Size([32000, 3])
x_dst shape: torch.Size([6400, 1])
edge_index shape: torch.Size([2, 409600])
idx shape: torch.Size([1600])
x shape: torch.Size([6400, 128])
pos shape: torch.Size([6400, 3])
x_dst shape: torch.Size([1600, 128])
edge_index shape: torch.Size([2, 102400])


tensor([[5.1098e+03, 5.4461e+04, 5.1390e-01],
        [5.1098e+03, 5.4461e+04, 5.1440e-01],
        [5.1098e+03, 5.4461e+04, 5.1410e-01],
        ...,
        [5.1098e+03, 5.4461e+04, 5.2100e-01],
        [5.1099e+03, 5.4462e+04, 5.2410e-01],
        [5.1098e+03, 5.4461e+04, 5.7960e-01]], device='cuda:0')


## Creat Pointnet++ for segmentation and pass it to GPU

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(train_dataset.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## Load Data

In [None]:
category = 'Airplane'  # Pass in `None` to train on all categories.
path = osp.join(osp.dirname(os.getcwd()), '..', 'data', 'ShapeNet')
transform = T.Compose([
    T.RandomJitter(0.01),
    T.RandomRotate(15, axis=0),
    T.RandomRotate(15, axis=1),
    T.RandomRotate(15, axis=2)
])
pre_transform = T.NormalizeScale()

transform = None
pre_transforms = None 
# train_dataset = ShapeNet(path, category, split='trainval', transform=transform,
#                          pre_transform=pre_transform)
train_dataset = ShapeNet(path, category, split='trainval', force_reload=True)
test_dataset = ShapeNet(path, category, split='test',
                        pre_transform=pre_transform)
train_loader = DataLoader(train_dataset, batch_size=15, shuffle=True,
                          num_workers=6)
test_loader = DataLoader(test_dataset, batch_size=15, shuffle=False,
                         num_workers=6)

In [None]:
print(train_dataset.x.shape)
print(train_dataset.nodes.shape)
for data in train_loader:
    print(data.batch.shape)
    print(train_dataset.x.shape[0]// data.batch.shape[0])
    print(np.unique(data.batch, return_counts=True))
    break


## Define train step

In [None]:
def train():
    model.train()

    total_loss = correct_nodes = total_nodes = 0
    for i, data in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        correct_nodes += out.argmax(dim=1).eq(data.y).sum().item()
        total_nodes += data.num_nodes

        if (i + 1) % 10 == 0:
            print(f'[{i+1}/{len(train_loader)}] Loss: {total_loss / 10:.4f} '
                  f'Train Acc: {correct_nodes / total_nodes:.4f}')
            total_loss = correct_nodes = total_nodes = 0


In [None]:
def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            pred = model(data).max(1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)

In [None]:


# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = Net().to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# for epoch in range(1, 201):
#     train(epoch)
#     test_acc = test(test_loader)
#     print(f'Epoch: {epoch:03d}, Test: {test_acc:.4f}')

In [None]:
print(train_dataset)