Explaining Lukas Point Net ++ Regression Main Script

In [None]:
#Import libraries
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.loader import DataLoader

#Import classes defined in additional scripts
from pn2_regressor import Net
from pointcloud_dataset import PointCloudsInFiles

In [None]:

#Why do you have a write_las function?
def write_las(outpoints, outfilepath, attribute_dict={}):
    """
    :param outpoints: 3D array of points to be written to output file
    :param outfilepath: specification of output file (format: las or laz)
    :param attribute_dict: dictionary of attributes (key: name of attribute; value: 1D array of attribute values in order of points in 'outpoints'); if not specified, dictionary is empty and nothing is added
    :return: None
    """
    import laspy
    hdr = laspy.LasHeader(version="1.4", point_format=6)
    hdr.x_scale = 0.00025
    hdr.y_scale = 0.00025
    hdr.z_scale = 0.00025
    mean_extent = np.mean(outpoints, axis=0)
    hdr.x_offset = int(mean_extent[0])
    hdr.y_offset = int(mean_extent[1])
    hdr.z_offset = int(mean_extent[2])

    las = laspy.LasData(hdr)

    las.x = outpoints[:, 0]
    las.y = outpoints[:, 1]
    las.z = outpoints[:, 2]
    for key, vals in attribute_dict.items():
        try:
            las[key] = vals
        except:
            las.add_extra_dim(laspy.ExtraBytesParams(
                name=key,
                type=type(vals[0])
            ))
            las[key] = vals

    las.write(outfilepath)

In [None]:
if __name__ == '__main__': #The __name__ variable is used for when you want to import a function into another script
                            #The __name__ var is by defauly '__main__', but if you are running a function from another script, __name__ is
                            #Not entirely sure how this works in this case

    use_columns = ['scan_angle_rank'] #Why using scan angle rank and not NormalizedZ?
    #Here we specify the folder where the LAZ files are located
    #Also specify the max number of points allowed in each cloud and the column names to add as additional input
    train_dataset = PointCloudsInFiles(r'C:\Users\hseely\OneDrive - UBC\Documents\Jupyter_Lab_Workspace\Lukas_DL_Regression_Example\train',
                                       '*.laz',
                                       'NormalizedZ', max_points=4_000, use_columns=use_columns)
    test_dataset = PointCloudsInFiles(r'C:\Users\hseely\OneDrive - UBC\Documents\Jupyter_Lab_Workspace\Lukas_DL_Regression_Example\test',
                                      '*.laz',
                                      'NormalizedZ', max_points=4_000, use_columns=use_columns)
    #Use the DataLoader from torch geometric to load the training/test data, specifying batch size and number of workers, also shuffling data
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,
                              num_workers=0) #Change to zero for my machine
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False,
                             num_workers=0) #Change to zero for my machine

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #Set device as cuda for parallel processing
    print(f"Using {device} device.")
    model = Net(num_features=len(use_columns)).to(device) #Specify the Net() class as the model which is described in the pn2_regressor.py script
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #Use the Adam optimizer, which is a stochastic gradient descent method
        #Not sure what .parameters() is doing here

    def train():
        model.train()

        for i, data in enumerate(train_loader):#enumerate(): adds a counter (i, in this case) to an iterable and returns it in a form of enumerating object
            data = data.to(device)
            optimizer.zero_grad() #Specify Adam optimizer function
            out = model(data)
            loss = F.mse_loss(out, data.y) #Specify mean squared error loss function, 
                                           #mse_loss() from TORCH.NN.FUNCTIONAL Measures the element-wise mean squared error.
            #
            loss.backward() #loss.backward() computes dloss/dx for every parameter x which has requires_grad=True. These are accumulated into x.grad for every parameter x.
            optimizer.step() #Performs a single optimization step (parameter update)
            if (i + 1) % 1 == 0:
                print(f'[{i + 1}/{len(train_loader)}] MSE Loss: {loss.to("cpu"):.4f} ')

    #torch.no_grad(): Context-manager that disabled gradient calculation.useful for inference, when you are sure that you will not call Tensor.backward(). It will reduce memory consumption for computations that would otherwise have requires_grad=True.
    @torch.no_grad() #@ is a decorator that extends the functionality of the no_grad() function
    def test(loader, ep_id):
        model.eval() #model.eval(): a kind of switch for some specific layers/parts of the model that behave differently during training and inference (evaluating) time. For example, Dropouts Layers, BatchNorm Layers etc. You need to turn off them during model evaluation, and .eval() will do it for you. In addition, the common practice for evaluating/validation is using torch.no_grad() in pair with model.eval() to turn off gradients computation:
        losses = []
        for idx, data in enumerate(loader): 
            data = data.to(device)
            outs = model(data)
            loss = F.mse_loss(outs, data.y) #Computes loss
            losses.append(float(loss.to("cpu")))
            if idx == 0:
                batch = data.batch.to('cpu').numpy()
                coords = data.pos.to('cpu').numpy()[batch==0, :]
                vals = data.y.to('cpu').numpy()[batch==0, 0]
                vals_pred = outs.to('cpu').numpy()[batch==0, 0]
                write_las(coords, rf'C:\Users\hseely\OneDrive - UBC\Documents\Jupyter_Lab_Workspace\Lukas_DL_Regression_Example\predicted\ep{ep_id}_{idx}.laz',
                          {'ref': vals,
                           'pred': vals_pred
                           } )
        return float(np.mean(losses))

#Final loop to iterate through batch for each epoch, training, and then testing data to get change in MSE
    for epoch in range(1, 501):
        model_path = rf'C:\Users\hseely\OneDrive - UBC\Documents\Jupyter_Lab_Workspace\Lukas_DL_Regression_Example\predicted\latest.model'
        if os.path.exists(model_path):
            model = torch.load(model_path)
        train() #What does empty train() function do? What is the input
        mse = test(test_loader, epoch)
        torch.save(model, model_path) #Save the model
        print(f'Epoch: {epoch:02d}, Mean test MSE: {mse:.4f}')
