# Mini challenge - NPM3D

## Imports

In [1]:
from datetime import datetime

import numpy as np
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import jaccard_score, ConfusionMatrixDisplay, accuracy_score
from xgboost import XGBClassifier

from src import FeaturesExtractor, timeit, checkpoint

In [2]:
# paths of the training and test files
training_path = "./data/training"
test_path = "./data/test"

In [3]:
features_extractor = FeaturesExtractor()

## Pipelines

In [4]:
@timeit
def save_prediction(
    clf,
    test_path: str,
    file_path: str = f"submissions/feat-{datetime.now().strftime('%Y_%m_%d-%H_%M')}.txt",
):
    timer = checkpoint()
    test_features = features_extractor.extract_features_no_label(test_path)
    timer("Time spent computing test features")

    predictions = clf.predict(test_features)
    timer("Time spent on test prediction")

    assert predictions.shape[0] == 3079187, "Incorrect number of predictions"

    np.savetxt(
        file_path,
        predictions,
        fmt="%d",
    )
    timer(f"Time spent writing results on {file_path}")

In [5]:
@timeit
def test(clf, test_features: np.ndarray, test_labels: np.ndarray) -> None:
    prediction = clf.predict(test_features)
    print(f"Accuracy:      {accuracy_score(test_labels, prediction):.4f}")
    print(
        f"Jaccard score: {jaccard_score(test_labels, prediction, average='micro')}:.4f"
    )
    ConfusionMatrixDisplay.from_predictions(test_labels, prediction)

## Model selection

In [6]:
def choose_model(model_name: str):
    if model_name == "sklearn":
        return RandomForestClassifier()
    elif model_name == "lgbm":
        return LGBMClassifier()
    elif model_name == "xgboost":
        return XGBClassifier()
    else:
        raise ValueError("Incorrect model name passed")

In [7]:
model = "lgbm"  # @param
clf = choose_model(model)

## Experiments

In [8]:
%%time
(
    train_features,
    train_labels,
    test_features,
    test_labels,
) = features_extractor.extract_features(training_path, "MiniLille1.ply")

Function subsample_point_cloud took 73.49 seconds
1210261 elements available for class Ground
1071599 elements available for class Building
24787 elements available for class Poles
32396 elements available for class Pedestrians
50468 elements available for class Cars
1455156 elements available for class Vegetation
Function compute_features took 5.42 seconds
Function subsample_point_cloud took 29.77 seconds
767722 elements available for class Ground
864939 elements available for class Building
5532 elements available for class Poles
0 elements available for class Pedestrians
114237 elements available for class Cars
104182 elements available for class Vegetation
Function compute_features took 2.50 seconds
Function subsample_point_cloud took 41.09 seconds
1074040 elements available for class Ground
1000935 elements available for class Building
13094 elements available for class Poles
0 elements available for class Pedestrians
96132 elements available for class Cars
261365 elements availab

In [9]:
%%time
clf.fit(train_features, train_labels)

CPU times: user 5.33 s, sys: 1.11 s, total: 6.44 s
Wall time: 3.02 s


In [None]:
test(test_features, test_labels)

In [None]:
save_prediction(clf, test_path)

Function subsample_point_cloud took 50.61 seconds


Remove ground and perform region growing + majority vote (class importance in vote can be learning).