### 1. Dynamic Graph CNN base model.

##### Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install unrar
!unrar x /content/drive/MyDrive/Disertatie/data_slim.rar
!unrar x /content/drive/MyDrive/Disertatie/metadata.rar

In [3]:
!pip install plyfile

Collecting plyfile
  Downloading plyfile-1.0.3-py3-none-any.whl.metadata (2.1 kB)
Downloading plyfile-1.0.3-py3-none-any.whl (23 kB)
Installing collected packages: plyfile
Successfully installed plyfile-1.0.3


In [4]:
from __future__ import print_function
import sys
import copy
import math
import numpy as np
import glob
import h5py
import json
import cv2
import pickle
from torch.utils.data import Dataset
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import requests
import urllib.request as urllib
import zipfile
import os
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR
from torch.utils.data import DataLoader
import sklearn.metrics as metrics
from plyfile import PlyData, PlyElement

##### Utility functions, such as loss etc. & Global vars

In [5]:
def knn(x, k):
    inner_product = -2*torch.matmul(x.transpose(2, 1), x)
    squared_norms = torch.sum(x**2, dim=1, keepdim=True)
    pairwise_distance = -squared_norms - inner_product - squared_norms.transpose(2, 1)

    # (batch_size, num_points, k)
    idx = pairwise_distance.topk(k=k, dim=-1)[1]
    return idx


def cal_loss(pred, gold, smoothing=False, ignore_index=255):
    ''' Calculate cross entropy loss, apply label smoothing if needed. '''

    gold = gold.contiguous().view(-1)

    if smoothing:
        eps = 0.2
        n_class = pred.size(1)

        one_hot = torch.zeros_like(pred).scatter(1, gold.view(-1, 1), 1)
        one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
        log_prb = F.log_softmax(pred, dim=1)

        loss = -(one_hot * log_prb).sum(dim=1).mean()
    else:
        loss = F.cross_entropy(
            pred, gold, reduction='mean',
            ignore_index=ignore_index)

    return loss


def get_graph_feature(x, k=20, idx=None, dim9=False):
    batch_size = x.size(0)
    num_points = x.size(2)
    x = x.view(batch_size, -1, num_points)
    if idx is None:
        if dim9 == False:
            idx = knn(x, k=k)   # (batch_size, num_points, k)
        else:
            idx = knn(x[:, 6:], k=k)
    device = torch.device('cuda')

    idx_base = torch.arange(
        0, batch_size, device=device).view(-1, 1, 1)*num_points

    idx = idx + idx_base

    idx = idx.view(-1)

    _, num_dims, _ = x.size()

    # (batch_size, num_points, num_dims)  -> (batch_size*num_points, num_dims) #   batch_size * num_points * k + range(0, batch_size*num_points)
    x = x.transpose(2, 1).contiguous()
    feature = x.view(batch_size*num_points, -1)[idx, :]
    feature = feature.view(batch_size, num_points, k, num_dims)
    x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1)

    feature = torch.cat((feature-x, x), dim=3).permute(0, 3, 1, 2).contiguous()

    return feature


def calculate_semantic_mIoU(pred_np, true_np, visual=False):
    I_all = np.zeros(13)
    U_all = np.zeros(13)
    for sem_idx in range(true_np.shape[0]):
        for sem in range(13):
            I = np.sum(np.logical_and(
                pred_np[sem_idx] == sem, true_np[sem_idx] == sem))
            U = np.sum(np.logical_or(
                pred_np[sem_idx] == sem, true_np[sem_idx] == sem))
            I_all[sem] += I
            U_all[sem] += U
    return I_all / U_all

##### Experimental Area

In [6]:
def adaptive_knn_local_density(x, base_k=5, density_scale=1.0):
    """
    Compute k-nearest neighbors with adaptive neighborhood sizes based on local density.

    Args:
    - x (Tensor): The input tensor of shape (batch_size, num_dims, num_points).
    - base_k (int): The base number of neighbors used to estimate density.
    - density_scale (float): Scaling factor to adjust k based on density.

    Returns:
    - Tensor: Indices of the k-nearest neighbors, with adaptive k for each point.
    """
    batch_size, num_dims, num_points = x.size()

    # Step 1: Calculate initial distances for density estimation using a fixed small k
    inner = -2 * torch.matmul(x.transpose(2, 1), x)
    xx = torch.sum(x**2, dim=1, keepdim=True)
    pairwise_distance = xx - inner + xx.transpose(2, 1)

    # Get initial k neighbors to estimate density
    initial_k = min(base_k, num_points - 1)
    initial_idx = pairwise_distance.topk(k=initial_k, largest=False, dim=-1)[1]

    # Step 2: Estimate local density
    # Using mean distance to initial k neighbors as a simple density measure
    density = torch.mean(torch.gather(
        pairwise_distance, 2, initial_idx), dim=-1)
    # average density per batch
    avg_density = torch.mean(density, dim=-1, keepdim=True)
    relative_density = density*density_scale / avg_density

    # Step 3: Determine adaptive k for each point
    adaptive_k = torch.clamp(
        (base_k * torch.reciprocal(relative_density)).long(), min=2, max=num_points - 1)

    # Step 4: Compute KNN using adaptive k for each point
    idx_adaptive = torch.zeros(batch_size, num_points, torch.max(
        adaptive_k).item(), dtype=torch.long, device=x.device)

    for i in range(batch_size):
        for j in range(num_points):
            k = adaptive_k[i, j].item()
            distances = pairwise_distance[i, j, :]
            _, indices = torch.topk(distances, k=k, largest=False)
            idx_adaptive[i, j, :k] = indices

    return idx_adaptive


def calculate_ktest(k_train, bias, n_train, n_test):
    """ Calculate the adjusted K for testing based on training parameters. """
    return int((k_train - bias) * (n_test / n_train) + bias)


def adaptive_knn(x, n_train, k_train=20, bias=5):
    """
    Compute k-nearest neighbors with dynamically adjusted neighborhood sizes based on input size.

    Args:
    - x (Tensor): The input tensor of shape (batch_size, num_dims, num_points).
    - n_train (int): The number of points in the training dataset.
    - k_train (int): The base number of neighbors used during training.
    - bias (int): The smallest size of the KNN model.

    Returns:
    - Tensor: Indices of the k-nearest neighbors, with dynamically adjusted k.
    """
    batch_size, num_dims, n_test = x.size()
    k_test = calculate_ktest(k_train, bias, n_train, n_test)

    # Calculate pairwise squared Euclidean distance
    inner = -2 * torch.matmul(x.transpose(2, 1), x)
    xx = torch.sum(x**2, dim=1, keepdim=True)
    pairwise_distance = xx - inner + xx.transpose(2, 1)

    # Get the top k_test neighbors
    # (batch_size, num_points, k_test)
    idx = pairwise_distance.topk(k=k_test, largest=False, dim=-1)[1]

    return idx

##### Model Implementation for Semantic Segmentation

Starting with <strong>PointNet</strong> implementation, as it will be our backbone, on top of which we will add the EdgeConv

In [7]:
class PointNet(nn.Module):
    def __init__(self, args, out_channels=40):
        super(PointNet, self).__init__()
        self.conv_1 = nn.Conv1d(3, 64, kernel_size=1, bias=False)
        self.conv_2 = nn.Conv1d(64, 64, kernel_size=1, bias=False)
        self.conv_3 = nn.Conv1d(64, 64, kernel_size=1, bias=False)
        self.conv_4 = nn.Conv1d(64, 128, kernel_size=1, bias=False)
        self.conv_5 = nn.Conv1d(128, args['embedded_dims'], kernel_size=1, bias=False)
        self.batch_norm_1 = nn.BatchNorm1d(64)
        self.batch_norm_2 = nn.BatchNorm1d(64)
        self.batch_norm_3 = nn.BatchNorm1d(64)
        self.batch_norm_4 = nn.BatchNorm1d(128)
        self.batch_norm_5 = nn.BatchNorm1d(args['embedded_dims'])
        self.fc_1 = nn.Linear(args['embedded_dims'], 512, bias=False)
        self.batch_norm_6 = nn.BatchNorm1d(512)
        self.dropout = nn.Dropout()
        self.fc_2 = nn.Linear(512, out_channels)

    def forward(self, x):
        x = F.relu(self.batch_norm_1(self.conv_1(x)))
        x = F.relu(self.batch_norm_2(self.conv_2(x)))
        x = F.relu(self.batch_norm_3(self.conv_3(x)))
        x = F.relu(self.batch_norm_4(self.conv_4(x)))
        x = F.relu(self.batch_norm_5(self.conv_5(x)))
        x = F.adaptive_max_pool1d(x, 1).squeeze()
        x = F.relu(self.batch_norm_6(self.fc_1(x)))
        x = self.dropout(x)
        x = self.fc_2(x)
        return x

Next, we implement the <strong>EdgeConv</strong> network, specifically tailored for semantic segmentation.

In [8]:
class DGCNN(nn.Module):
    def __init__(self, args, num_clases=13):
        super(DGCNN, self).__init__()
        self.args = args
        self.k = args['k']
        self.batch_norm_1 = nn.BatchNorm2d(64)
        self.batch_norm_2 = nn.BatchNorm2d(64)
        self.batch_norm_3 = nn.BatchNorm2d(64)
        self.batch_norm_4 = nn.BatchNorm2d(64)
        self.batch_norm_5 = nn.BatchNorm2d(64)
        self.batch_norm_6 = nn.BatchNorm1d(args['embedded_dims'])
        self.batch_norm_7 = nn.BatchNorm1d(512)
        self.batch_norm_8 = nn.BatchNorm1d(256)

        self.conv_1 = nn.Sequential(nn.Conv2d(18, 64, kernel_size=1, bias=False),
                                    self.batch_norm_1,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.conv_2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False),
                                    self.batch_norm_2,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.conv_3 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False),
                                    self.batch_norm_3,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.conv_4 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False),
                                    self.batch_norm_4,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.conv_5 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False),
                                    self.batch_norm_5,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.conv_6 = nn.Sequential(nn.Conv1d(192, args['embedded_dims'], kernel_size=1, bias=False),
                                    self.batch_norm_6,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.conv_7 = nn.Sequential(nn.Conv1d(1216, 512, kernel_size=1, bias=False),
                                    self.batch_norm_7,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.conv_8 = nn.Sequential(nn.Conv1d(512, 256, kernel_size=1, bias=False),
                                    self.batch_norm_8,
                                    nn.LeakyReLU(negative_slope=0.2))
        self.dropout = nn.Dropout(p=args['dropout'])
        self.conv_9 = nn.Conv1d(256, num_clases, kernel_size=1, bias=False)

    def forward(self, x):
        batch_size = x.size(0)
        num_points = x.size(2)

        x = get_graph_feature(x, k=self.k, dim9=True)   # (batch_size, 9, num_points) -> (batch_size, 9*2, num_points, k)
        x = self.conv_1(x)                              # (batch_size, 9*2, num_points, k) -> (batch_size, 64, num_points, k)
        x = self.conv_2(x)                              # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points, k)
        x1 = x.max(dim=-1, keepdim=False)[0]            # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)

        x = get_graph_feature(x1, k=self.k)             # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
        x = self.conv_3(x)                              # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k)
        x = self.conv_4(x)                              # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points, k)
        x2 = x.max(dim=-1, keepdim=False)[0]            # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)

        x = get_graph_feature(x2, k=self.k)             # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
        x = self.conv_5(x)                              # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k)
        x3 = x.max(dim=-1, keepdim=False)[0]            # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)

        x = torch.cat((x1, x2, x3), dim=1)              # (batch_size, 64*3, num_points)

        x = self.conv_6(x)                              # (batch_size, 64*3, num_points) -> (batch_size, emb_dims, num_points)
        x = x.max(dim=-1, keepdim=True)[0]              # (batch_size, emb_dims, num_points) -> (batch_size, emb_dims, 1)

        x = x.repeat(1, 1, num_points)                  # (batch_size, 1024, num_points)
        x = torch.cat((x, x1, x2, x3), dim=1)           # (batch_size, 1024+64*3, num_points)

        x = self.conv_7(x)                              # (batch_size, 1024+64*3, num_points) -> (batch_size, 512, num_points)
        x = self.conv_8(x)                              # (batch_size, 512, num_points) -> (batch_size, 256, num_points)
        x = self.dropout(x)
        x = self.conv_9(x)                              # (batch_size, 256, num_points) -> (batch_size, 13, num_points)

        return x

### 2. Loading the dataset

##### Some constant vars for the s3dis dataset

In [9]:
g_classes = [x.rstrip() for x in open('metadata/class_names.txt')]
g_class2label = {cls: i for i, cls in enumerate(g_classes)}
g_class2color = {'ceiling':	[0, 255, 0],
                 'floor':	[0, 0, 255],
                 'wall':	[0, 255, 255],
                 'beam':    [255, 255, 0],
                 'column':  [255, 0, 255],
                 'window':  [100, 100, 255],
                 'door':    [200, 200, 100],
                 'table':   [170, 120, 200],
                 'chair':   [255, 0, 0],
                 'sofa':    [200, 100, 100],
                 'bookcase':[10, 200, 100],
                 'board':   [200, 200, 200],
                 'clutter': [50, 50, 50]}
g_easy_view_labels = [7, 8, 9, 10, 11, 1]
g_label2color = {g_classes.index(cls): g_class2color[cls] for cls in g_classes}

global raw_data_index
raw_data_index = 0

##### Utils for data handling

In [18]:
def sample_data(data, num_sample):
    """ data is in N x ...
        we want to keep num_samplexC of them.
        if N > num_sample, we will randomly keep num_sample of them.
        if N < num_sample, we will randomly duplicate samples.
    """
    N = data.shape[0]
    if (N == num_sample):
        return data, range(N)
    elif (N > num_sample):
        sample = np.random.choice(N, num_sample)
        return data[sample, ...], sample
    else:
        sample = np.random.choice(N, num_sample-N)
        dup_data = data[sample, ...]
        return np.concatenate([data, dup_data], 0), list(range(N))+list(sample)

def sample_data_label(data, label, num_sample):
    new_data, sample_indices = sample_data(data, num_sample)
    new_label = label[sample_indices]
    return new_data, new_label

def room2blocks(data_label_filename, data, label, num_point, block_size=1.0, stride=1.0,
                random_sample=False, sample_num=None, sample_aug=1):
    """ Prepare block training data.
    Args:
        data: N x 6 numpy array, 012 are XYZ in meters, 345 are RGB in [0,1]
            assumes the data is shifted (min point is origin) and aligned
            (aligned with XYZ axis)
        label: N size uint8 numpy array from 0-12
        num_point: int, how many points to sample in each block
        block_size: float, physical size of the block in meters
        stride: float, stride for block sweeping
        random_sample: bool, if True, we will randomly sample blocks in the room
        sample_num: int, if random sample, how many blocks to sample
            [default: room area]
        sample_aug: if random sample, how much aug
    Returns:
        block_datas: K x num_point x 6 np array of XYZRGB, RGB is in [0,1]
        block_labels: K x num_point x 1 np array of uint8 labels
    """
    assert (stride <= block_size)
    print(data_label_filename)
    limit = np.amax(data, 0)[0:3]

    # Get the corner location for our sampling blocks
    xbeg_list = []
    ybeg_list = []
    if not random_sample:
        num_block_x = int(np.ceil((limit[0] - block_size) / stride)) + 1
        num_block_y = int(np.ceil((limit[1] - block_size) / stride)) + 1
        for i in range(num_block_x):
            for j in range(num_block_y):
                xbeg_list.append(i*stride)
                ybeg_list.append(j*stride)
    else:
        num_block_x = int(np.ceil(limit[0] / block_size))
        num_block_y = int(np.ceil(limit[1] / block_size))
        if sample_num is None:
            sample_num = num_block_x * num_block_y * sample_aug
        for _ in range(sample_num):
            xbeg = np.random.uniform(-block_size, limit[0])
            ybeg = np.random.uniform(-block_size, limit[1])
            xbeg_list.append(xbeg)
            ybeg_list.append(ybeg)
    data_label_filename = data_label_filename.replace('\\', '/')
    data_label_filename = data_label_filename[:-4].split('/')
    data_label_filename = data_label_filename[-1]
    test_area = data_label_filename[5]
    room_name = data_label_filename[7:]
    if not os.path.exists("data/indoor3d_sem_seg_hdf5_data_test/raw_data3d"):
        os.makedirs("data/indoor3d_sem_seg_hdf5_data_test/raw_data3d")
    if not os.path.exists("data/indoor3d_sem_seg_hdf5_data_test/raw_data3d/Area_"+str(test_area)):
        os.makedirs(
            "data/indoor3d_sem_seg_hdf5_data_test/raw_data3d/Area_"+str(test_area))
    # Collect blocks
    block_data_list = []
    block_label_list = []
    global raw_data_index
    for idx in range(len(xbeg_list)):
       xbeg = xbeg_list[idx]
       ybeg = ybeg_list[idx]
       xcond = (data[:, 0] <= xbeg+block_size) & (data[:, 0] >= xbeg)
       ycond = (data[:, 1] <= ybeg+block_size) & (data[:, 1] >= ybeg)
       cond = xcond & ycond
       if np.sum(cond) < 100:  # discard block if there are less than 100 pts.
           continue

       block_data = data[cond, :]
       block_label = label[cond]

       # randomly subsample data
       block_data_sampled, block_label_sampled = \
           sample_data_label(block_data, block_label, num_point)
       block_data_list.append(np.expand_dims(block_data_sampled, 0))
       block_label_list.append(np.expand_dims(block_label_sampled, 0))
       f = open('data/indoor3d_sem_seg_hdf5_data_test/raw_data3d/Area_' +
                str(test_area)+'/'+str(room_name)+'('+str(raw_data_index)+').txt', "a")
       np.savetxt(f, block_data_sampled[:, 0:3], fmt='%s', delimiter=' ')
       raw_data_index = raw_data_index + 1
    return np.concatenate(block_data_list, 0), \
        np.concatenate(block_label_list, 0)


def room2blocks_plus(data_label, num_point, block_size, stride,
                     random_sample, sample_num, sample_aug):
    """ room2block with input filename and RGB preprocessing.
    """
    data = data_label[:, 0:6]
    data[:, 3:6] /= 255.0
    label = data_label[:, -1].astype(np.uint8)

    return room2blocks(data, label, num_point, block_size, stride,
                       random_sample, sample_num, sample_aug)


def room2blocks_wrapper(data_label_filename, num_point, block_size=1.0, stride=1.0,
                        random_sample=False, sample_num=None, sample_aug=1):
    if data_label_filename[-3:] == 'txt':
        data_label = np.loadtxt(data_label_filename)
    elif data_label_filename[-3:] == 'npy':
        data_label = np.load(data_label_filename)
    else:
        print('Unknown file type! exiting.')
        exit()
    return room2blocks_plus(data_label, num_point, block_size, stride,
                            random_sample, sample_num, sample_aug)


def room2blocks_plus_normalized(data_label_filename, data_label, num_point, block_size, stride,
                                random_sample, sample_num, sample_aug):
    """ room2block, with input filename and RGB preprocessing.
        for each block centralize XYZ, add normalized XYZ as 678 channels
    """
    data = data_label[:, 0:6]
    data[:, 3:6] /= 255.0
    label = data_label[:, -1].astype(np.uint8)
    max_room_x = max(data[:, 0])
    max_room_y = max(data[:, 1])
    max_room_z = max(data[:, 2])
    data_batch, label_batch = room2blocks(data_label_filename, data, label, num_point, block_size, stride,
                                          random_sample, sample_num, sample_aug)
    new_data_batch = np.zeros((data_batch.shape[0], num_point, 9))
    for b in range(data_batch.shape[0]):
        new_data_batch[b, :, 6] = data_batch[b, :, 0]/max_room_x
        new_data_batch[b, :, 7] = data_batch[b, :, 1]/max_room_y
        new_data_batch[b, :, 8] = data_batch[b, :, 2]/max_room_z
        minx = min(data_batch[b, :, 0])
        miny = min(data_batch[b, :, 1])
        data_batch[b, :, 0] -= (minx+block_size/2)
        data_batch[b, :, 1] -= (miny+block_size/2)
    new_data_batch[:, :, 0:6] = data_batch
    return new_data_batch, label_batch


def room2blocks_wrapper_normalized(data_label_filename, num_point, block_size=1.0, stride=1.0,
                                   random_sample=False, sample_num=None, sample_aug=1):
    if data_label_filename[-3:] == 'txt':
        data_label = np.loadtxt(data_label_filename)
    elif data_label_filename[-3:] == 'npy':
        data_label = np.load(data_label_filename)
    else:
        print('Unknown file type! exiting.')
        exit()
    return room2blocks_plus_normalized(data_label_filename, data_label, num_point, block_size, stride,
                                       random_sample, sample_num, sample_aug)


def save_h5(h5_filename, data, label, data_dtype='uint8', label_dtype='uint8'):
    h5_fout = h5py.File(h5_filename, "w")
    h5_fout.create_dataset(
        'data', data=data,
        compression='gzip', compression_opts=4,
        dtype=data_dtype)
    h5_fout.create_dataset(
        'label', data=label,
        compression='gzip', compression_opts=1,
        dtype=label_dtype)
    h5_fout.close()


def gen_indoor3d_h5():
    indoor3d_data_dir = 'data/stanford_indoor3d'
    NUM_POINT = 4096
    H5_BATCH_SIZE = 1000
    data_dim = [NUM_POINT, 9]
    label_dim = [NUM_POINT]
    data_dtype = 'float32'
    label_dtype = 'uint8'

    # Set paths
    filelist = 'metadata/all_data_label.txt'
    data_label_files = [os.path.join(indoor3d_data_dir, line.rstrip()) for line in open(filelist)]
    output_dir = 'data/indoor3d_sem_seg_hdf5_data_test'
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    output_filename_prefix = os.path.join(output_dir, 'ply_data_all')
    output_room_filelist = os.path.join(output_dir, 'room_filelist.txt')
    output_all_file = os.path.join(output_dir, 'all_files.txt')
    fout_room = open(output_room_filelist, 'w')
    all_file = open(output_all_file, 'w')

    # --------------------------------------
    # ----- BATCH WRITE TO HDF5 -----
    # --------------------------------------
    batch_data_dim = [H5_BATCH_SIZE] + data_dim
    batch_label_dim = [H5_BATCH_SIZE] + label_dim
    h5_batch_data = np.zeros(batch_data_dim, dtype = np.float32)
    h5_batch_label = np.zeros(batch_label_dim, dtype = np.uint8)
    buffer_size = 0  # state: record how many samples are currently in buffer
    h5_index = 0 # state: the next h5 file to save

    def insert_batch(data, label, last_batch=False):
        nonlocal h5_batch_data, h5_batch_label
        nonlocal buffer_size, h5_index
        data_size = data.shape[0]
        # If there is enough space, just insert
        if buffer_size + data_size <= h5_batch_data.shape[0]:
            h5_batch_data[buffer_size:buffer_size+data_size, ...] = data
            h5_batch_label[buffer_size:buffer_size+data_size] = label
            buffer_size += data_size
        else:  # not enough space
            capacity = h5_batch_data.shape[0] - buffer_size
            assert (capacity >= 0)
            if capacity > 0:
                h5_batch_data[buffer_size:buffer_size +
                                capacity, ...] = data[0:capacity, ...]
                h5_batch_label[buffer_size:buffer_size +
                                capacity, ...] = label[0:capacity, ...]
            # Save batch data and label to h5 file, reset buffer_size
            h5_filename = output_filename_prefix + '_' + str(h5_index) + '.h5'
            save_h5(h5_filename, h5_batch_data,
                                h5_batch_label, data_dtype, label_dtype)
            print('Stored {0} with size {1}'.format(h5_filename, h5_batch_data.shape[0]))
            h5_index += 1
            buffer_size = 0
            # recursive call
            insert_batch(data[capacity:, ...], label[capacity:, ...], last_batch)
        if last_batch and buffer_size > 0:
            h5_filename = output_filename_prefix + '_' + str(h5_index) + '.h5'
            save_h5(h5_filename, h5_batch_data[0:buffer_size, ...], h5_batch_label[0:buffer_size, ...], data_dtype, label_dtype)
            print('Stored {0} with size {1}'.format(h5_filename, buffer_size))
            h5_index += 1
            buffer_size = 0
        return

    sample_cnt = 0
    for i, data_label_filename in enumerate(data_label_files):
        data, label = room2blocks_wrapper_normalized(data_label_filename, NUM_POINT, block_size=1.0, stride=1,
                                                    random_sample=False, sample_num=None)
        print('{0}, {1}'.format(data.shape, label.shape))
        for _ in range(data.shape[0]):
            fout_room.write(os.path.basename(data_label_filename)[0:-4]+'\n')

        sample_cnt += data.shape[0]
        insert_batch(data, label, i == len(data_label_files)-1)

    fout_room.close()
    print("Total samples: {0}".format(sample_cnt))

    for i in range(h5_index):
        all_file.write(os.path.join('indoor3d_sem_seg_hdf5_data_test', 'ply_data_all_') + str(i) +'.h5\n')
    all_file.close()
    return


def collect_point_label(annotation_path, out_filename, file_format='txt'):
    """
    Convert original dataset files to data_label file (each line is XYZRGBL).
    We aggregated all the points from each instance in the room.
    Args:
        anno_path: path to annotations. e.g. Area_1/office_2/Annotations/
        out_filename: path to save collected points and labels (each line is XYZRGBL)
        file_format: txt or numpy, determines what file format to save.
    Returns:
        None
    Note:
        the points are shifted before save, the most negative point is now at origin.
    """
    points_list = []
    for f in glob.glob(os.path.join(annotation_path, '*.txt')):
        cls = os.path.basename(f).split('_')[0]
        if cls not in g_classes: # note: in some room there is 'staris' class..
            cls = 'clutter'
        points = np.loadtxt(f)
        labels = np.ones((points.shape[0],1)) * g_class2label[cls]
        points_list.append(np.concatenate([points, labels], 1)) # Nx7
    data_label = np.concatenate(points_list, 0)
    xyz_min = np.amin(data_label, axis=0)[0:3]
    data_label[:, 0:3] -= xyz_min
    if file_format=='txt':
        fout = open(out_filename, 'w')
        for i in range(data_label.shape[0]):
            fout.write('%f %f %f %d %d %d %d\n' % \
                          (data_label[i,0], data_label[i,1], data_label[i,2],
                           data_label[i,3], data_label[i,4], data_label[i,5],
                           data_label[i,6]))
        fout.close()
    elif file_format=='numpy':
        np.save(out_filename, data_label)
    else:
        print('ERROR!! Unknown file format: %s, please use txt or numpy.' % \
            (file_format))
        exit()

def collect_indoor3d_data():
    annotation_paths = [line.rstrip() for line in open('metadata/annotation_paths.txt')]
    annotation_paths = [os.path.join('data/Stanford3dDataset_v1.2_Aligned_Version', p) for p in annotation_paths]
    output_folder = 'data/stanford_indoor3d'
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    for anno_path in annotation_paths:
        elements = anno_path.split('/')
        out_filename = elements[-3].split("\\")[-1]+'_'+elements[-2]+'.npy' # Area_1_hallway_1.npy
        collect_point_label(anno_path, os.path.join(output_folder, out_filename), 'numpy')


def download_S3DIS():
    if not os.path.exists('data/indoor3d_sem_seg_hdf5_data'):
        with zipfile.ZipFile('data/indoor3d_sem_seg_hdf5_data.zip', 'r') as zip_ref:
            zip_ref.extractall('data/')
    if os.path.exists('data/Stanford3dDataset_v1.2_Aligned_Version'):
        return None
    zippath = 'data/Stanford3dDataset_v1.2_Aligned_Version.zip'
    with zipfile.ZipFile(zippath, "r") as zip_ref:
        zip_ref.extractall('data/')


def prepare_test_data_semseg():
    if not os.path.exists('data/stanford_indoor3d'):
        collect_indoor3d_data()
    if not os.path.exists('data/indoor3d_sem_seg_hdf5_data_test'):
        gen_indoor3d_h5()

In [19]:
def load_data_semseg(partition, test_area):
    # download_S3DIS()
    # prepare_test_data_semseg()
    if partition == 'train':
        data_dir = os.path.join('data/indoor3d_sem_seg_hdf5_data')
    else:
        data_dir = os.path.join('data/indoor3d_sem_seg_hdf5_data_test')
    with open(os.path.join(data_dir, "all_files.txt")) as f:
        all_files = [line.rstrip() for line in f]
    with open(os.path.join(data_dir, "room_filelist.txt")) as f:
        room_filelist = [line.rstrip() for line in f]
    data_batchlist, label_batchlist = [], []
    for f in all_files:
        file = h5py.File(os.path.join('data/', f), 'r+')
        data = file["data"][:]
        label = file["label"][:]
        data_batchlist.append(data)
        label_batchlist.append(label)
    data_batches = np.concatenate(data_batchlist, 0)
    seg_batches = np.concatenate(label_batchlist, 0)
    test_area_name = "Area_" + str(test_area)
    train_idxs, test_idxs = [], []
    for i, room_name in enumerate(room_filelist):
        if test_area_name in room_name:
            test_idxs.append(i)
        else:
            train_idxs.append(i)
    if partition == 'train':
        all_data = data_batches[train_idxs, ...]
        all_seg = seg_batches[train_idxs, ...]
    else:
        all_data = data_batches[test_idxs, ...]
        all_seg = seg_batches[test_idxs, ...]
    return all_data, all_seg


def load_color_semseg():
    colors = []
    labels = []
    f = open("metadata/semseg_colors.txt")
    for line in json.load(f):
        colors.append(line['color'])
        labels.append(line['label'])
    semseg_colors = np.array(colors)
    semseg_colors = semseg_colors[:, [2, 1, 0]]
    partseg_labels = np.array(labels)
    font = cv2.FONT_HERSHEY_SIMPLEX
    img_size = 1500
    img = np.zeros((500, img_size, 3), dtype="uint8")
    cv2.rectangle(img, (0, 0), (img_size, 750), [255, 255, 255], thickness=-1)
    color_size = 64
    color_index = 0
    label_index = 0
    row_index = 16
    for _ in range(0, img_size):
        column_index = 32
        for _ in range(0, img_size):
            color = semseg_colors[color_index]
            label = partseg_labels[label_index]
            length = len(str(label))
            cv2.rectangle(img, (column_index, row_index), (column_index + color_size, row_index + color_size),
                          color=(int(color[0]), int(color[1]), int(color[2])), thickness=-1)
            img = cv2.putText(img, label, (column_index + int(color_size * 1.15), row_index + int(color_size / 2)),
                              font,
                              0.7, (0, 0, 0), 2)
            column_index = column_index + 200
            color_index = color_index + 1
            label_index = label_index + 1
            if color_index >= 13:
                cv2.imwrite("metadata/semseg_colors.png",
                            img, [cv2.IMWRITE_PNG_COMPRESSION, 0])
                return np.array(colors)
            elif (column_index >= 1280):
                break
        row_index = row_index + int(color_size * 1.3)
        if (row_index >= img_size):
            break

##### Main class for data

In [20]:
class S3DIS(Dataset):
    def __init__(self, num_points=4096, partition='train', test_area='1'):
        self.data, self.seg = load_data_semseg(partition, test_area)
        self.num_points = num_points
        self.partition = partition
        self.semseg_colors = load_color_semseg()

    def __getitem__(self, item):
        pointcloud = self.data[item][:self.num_points]
        seg = self.seg[item][:self.num_points]
        if self.partition == 'train':
            indices = list(range(pointcloud.shape[0]))
            np.random.shuffle(indices)
            pointcloud = pointcloud[indices]
            seg = seg[indices]
        seg = torch.LongTensor(seg)
        return pointcloud, seg

    def __len__(self):
        return self.data.shape[0]

### 3. Train test sequence

##### Arguments to be initialized

In [21]:
args = {
    "exp_name": "exp",
    'model': 'dgcnn',
    'dataset': 'S3DIS',
    'test_area': 6,
    'batch_size': 64,
    'test_batch_size': 16,
    'epochs': 30,
    'use_sgd': True,
    'lr': 0.001,
    'momentum': 0.9,
    'scheduler': 'cos',
    'cuda': True,
    'seed': 2024,
    'eval': True,
    'num_points': 4096,
    'dropout': 0.5,
    'embedded_dims': 1024,
    'k': 20,
    'model_root': '',
    'visu': '',
    'visu_format': 'ply'
}

##### Init function

In [22]:
def init_state():
    if not os.path.exists('outputs'):
        os.makedirs('outputs')
    if not os.path.exists('outputs/'+args['exp_name']):
        os.makedirs('outputs/'+args['exp_name'])
    if not os.path.exists('outputs/'+args['exp_name']+'/'+'models'):
        os.makedirs('outputs/'+args['exp_name']+'/'+'models')

##### Train Function

In [23]:
def train(args):
    train_loader = DataLoader(S3DIS(partition='train', num_points=args['num_points'], test_area=args['test_area']),
                              num_workers=8, batch_size=args['batch_size'], shuffle=True, drop_last=True)
    print("Loaded Train data")
    test_loader = DataLoader(S3DIS(partition='test', num_points=args['num_points'], test_area=args['test_area']),
                             num_workers=8, batch_size=args['test_batch_size'], shuffle=True, drop_last=False)
    print("Loaded Test data")

    device = torch.device("cuda" if args['cuda'] else "cpu")

    if args['model'] == 'dgcnn':
        model = DGCNN(args).to(device)
    else:
        raise Exception("Not implemented")
    print(str(model))
    model = nn.DataParallel(model)
    print("We are using ", torch.cuda.device_count(), " GPUs!")

    if args['use_sgd']:
        print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args['lr']*100,
                        momentum=args['momentum'],
                        weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args['lr'], weight_decay=1e-4)

    if args['scheduler'] == 'cos':
        scheduler = CosineAnnealingLR(opt, args['epochs'], eta_min=1e-3)
    elif args['scheduler'] == 'step':
        scheduler = StepLR(opt, 20, 0.5, args['epochs'])

    criterion = cal_loss

    best_test_iou = 0

    for epoch in range(args['epochs']):
        # Train
        train_loss = 0.0
        count = 0.0
        model.train()
        train_true_cls = []
        train_pred_cls = []
        train_true_seg = []
        train_pred_seg = []
        train_label_seg = []
        for data, seg in train_loader:
            data, seg = data.to(device), seg.to(device)
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            opt.zero_grad()
            seg_pred = model(data)
            seg_pred = seg_pred.permute(0, 2, 1).contiguous()
            loss = criterion(seg_pred.view(-1, 13), seg.view(-1, 1).squeeze())
            loss.backward()
            opt.step()
            pred = seg_pred.max(dim=2)[1]            # (batch_size, num_points)
            count += batch_size
            train_loss += loss.item() * batch_size
            seg_np = seg.cpu().numpy()                  # (batch_size, num_points)
            pred_np = pred.detach().cpu().numpy()       # (batch_size, num_points)
            train_true_cls.append(seg_np.reshape(-1))   # (batch_size * num_points)
            train_pred_cls.append(pred_np.reshape(-1))  # (batch_size * num_points)
            train_true_seg.append(seg_np)
            train_pred_seg.append(pred_np)
        if args['scheduler'] == 'cos':
            scheduler.step()
        elif args['scheduler'] == 'step':
            if opt.param_groups[0]['lr'] > 1e-5:
                scheduler.step()
            if opt.param_groups[0]['lr'] < 1e-5:
                for param_group in opt.param_groups:
                    param_group['lr'] = 1e-5
        train_true_cls = np.concatenate(train_true_cls)
        train_pred_cls = np.concatenate(train_pred_cls)
        train_acc = metrics.accuracy_score(train_true_cls, train_pred_cls)
        avg_per_class_acc = metrics.balanced_accuracy_score(train_true_cls, train_pred_cls)
        train_true_seg = np.concatenate(train_true_seg, axis=0)
        train_pred_seg = np.concatenate(train_pred_seg, axis=0)
        train_ious = calculate_semantic_mIoU(train_pred_seg, train_true_seg)
        outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f, train iou: %.6f' % (epoch,
                                                                                                  train_loss*1.0/count,
                                                                                                  train_acc,
                                                                                                  avg_per_class_acc,
                                                                                                  np.mean(train_ious))
        print(outstr)

        # Eval
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_true_cls = []
        test_pred_cls = []
        test_true_seg = []
        test_pred_seg = []
        for data, seg in test_loader:
            data, seg = data.to(device), seg.to(device)
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            seg_pred = model(data)
            seg_pred = seg_pred.permute(0, 2, 1).contiguous()
            loss = criterion(seg_pred.view(-1, 13), seg.view(-1, 1).squeeze())
            pred = seg_pred.max(dim=2)[1]
            count += batch_size
            test_loss += loss.item() * batch_size
            seg_np = seg.cpu().numpy()
            pred_np = pred.detach().cpu().numpy()
            test_true_cls.append(seg_np.reshape(-1))
            test_pred_cls.append(pred_np.reshape(-1))
            test_true_seg.append(seg_np)
            test_pred_seg.append(pred_np)
        test_true_cls = np.concatenate(test_true_cls)
        test_pred_cls = np.concatenate(test_pred_cls)
        test_acc = metrics.accuracy_score(test_true_cls, test_pred_cls)
        avg_per_class_acc = metrics.balanced_accuracy_score(
            test_true_cls, test_pred_cls)
        test_true_seg = np.concatenate(test_true_seg, axis=0)
        test_pred_seg = np.concatenate(test_pred_seg, axis=0)
        test_ious = calculate_semantic_mIoU(test_pred_seg, test_true_seg)
        outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f, test iou: %.6f' % (epoch,
                                                                                              test_loss*1.0/count,
                                                                                              test_acc,
                                                                                              avg_per_class_acc,
                                                                                              np.mean(test_ious))
        print(outstr)
        if np.mean(test_ious) >= best_test_iou:
            best_test_iou = np.mean(test_ious)
            torch.save(model.state_dict(), 'outputs/%s/models/model_%s.t7' %
                       (args['exp_name'], args['test_area']))

##### Test Function

In [None]:
def test(args, io):
    all_true_cls = []
    all_pred_cls = []
    all_true_seg = []
    all_pred_seg = []
    for test_area in range(1, 7):
        visual_file_index = 0
        test_area = str(test_area)
        if os.path.exists("data/indoor3d_sem_seg_hdf5_data_test/room_filelist.txt"):
            with open("data/indoor3d_sem_seg_hdf5_data_test/room_filelist.txt") as f:
                for line in f:
                    if (line[5]) == test_area:
                        break
                    visual_file_index = visual_file_index + 1
        if (args.test_area == 'all') or (test_area == args.test_area):
            test_loader = DataLoader(S3DIS(partition='test', num_points=args.num_points, test_area=test_area),
                                     batch_size=args.test_batch_size, shuffle=False, drop_last=False)

            device = torch.device("cuda" if args.cuda else "cpu")

            # Try to load models
            semseg_colors = test_loader.dataset.semseg_colors
            if args.model == 'dgcnn':
                model = DGCNN(args).to(device)
            else:
                raise Exception("Not implemented")

            model = nn.DataParallel(model)
            model.load_state_dict(torch.load(os.path.join(
                args.model_root, 'model_%s.t7' % test_area)))
            model = model.eval()
            test_acc = 0.0
            count = 0.0
            test_true_cls = []
            test_pred_cls = []
            test_true_seg = []
            test_pred_seg = []
            for data, seg in test_loader:
                data, seg = data.to(device), seg.to(device)
                data = data.permute(0, 2, 1)
                batch_size = data.size()[0]
                seg_pred = model(data)
                seg_pred = seg_pred.permute(0, 2, 1).contiguous()
                pred = seg_pred.max(dim=2)[1]
                seg_np = seg.cpu().numpy()
                pred_np = pred.detach().cpu().numpy()
                test_true_cls.append(seg_np.reshape(-1))
                test_pred_cls.append(pred_np.reshape(-1))
                test_true_seg.append(seg_np)
                test_pred_seg.append(pred_np)
                # visiualization
                visualization(args.visu, args.visu_format, args.test_area,
                              data, seg, pred, visual_file_index, semseg_colors)
                visual_file_index = visual_file_index + data.shape[0]
            if visual_warning and args.visu != '':
                print(
                    'Visualization Failed: You can only choose a room to visualize within the scope of the test area')
            test_true_cls = np.concatenate(test_true_cls)
            test_pred_cls = np.concatenate(test_pred_cls)
            test_acc = metrics.accuracy_score(test_true_cls, test_pred_cls)
            avg_per_class_acc = metrics.balanced_accuracy_score(
                test_true_cls, test_pred_cls)
            test_true_seg = np.concatenate(test_true_seg, axis=0)
            test_pred_seg = np.concatenate(test_pred_seg, axis=0)
            test_ious = calculate_semantic_mIoU(test_pred_seg, test_true_seg)
            outstr = 'Test :: test area: %s, test acc: %.6f, test avg acc: %.6f, test iou: %.6f' % (test_area,
                                                                                                    test_acc,
                                                                                                    avg_per_class_acc,
                                                                                                    np.mean(test_ious))
            io.cprint(outstr)
            all_true_cls.append(test_true_cls)
            all_pred_cls.append(test_pred_cls)
            all_true_seg.append(test_true_seg)
            all_pred_seg.append(test_pred_seg)

    if args.test_area == 'all':
        all_true_cls = np.concatenate(all_true_cls)
        all_pred_cls = np.concatenate(all_pred_cls)
        all_acc = metrics.accuracy_score(all_true_cls, all_pred_cls)
        avg_per_class_acc = metrics.balanced_accuracy_score(
            all_true_cls, all_pred_cls)
        all_true_seg = np.concatenate(all_true_seg, axis=0)
        all_pred_seg = np.concatenate(all_pred_seg, axis=0)
        all_ious = calculate_semantic_mIoU(all_pred_seg, all_true_seg)
        outstr = 'Overall Test :: test acc: %.6f, test avg acc: %.6f, test iou: %.6f' % (all_acc,
                                                                                         avg_per_class_acc,
                                                                                         np.mean(all_ious))
        io.cprint(outstr)

##### Main

In [27]:
init_state()
train(args)

Loaded Train data
Loaded Test data
DGCNN(
  (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm_2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm_4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm_5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm_6): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm_7): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm_8): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_1): Sequential(
    (0): Conv2d(18, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

KeyboardInterrupt: 