## Imports

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler

## Load Dataset

In [5]:
dataset="3DML_urban_point_cloud.xyz"

pcd=pd.read_csv(dataset,delimiter=' ')
pcd.dropna(inplace=True)

## Labels and Features

In [7]:
labels=pcd['Classification']
features=pcd[['X','Y','Z','R','G','B']]

## Min-Max scaler
The MinMaxScaler() transforms features by scaling and translating each feature individually to be in the given range, e.g., between zero and one. If your data is normally distributed, then you may use StandardScaler.

In [8]:
from sklearn.preprocessing import MinMaxScaler
features_scaled = MinMaxScaler().fit_transform(features)

## Training setup

In [9]:
X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, test_size=0.4)

## Classifier

In [None]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
rf_predictions = rf_classifier.predict(X_test)

## Visualize

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(20,5))
axs[0].scatter(X_test['X'], X_test['Y'], c =y_test, s=0.05)
axs[0].set_title('3D Point Cloud Ground Truth')
axs[1].scatter(X_test['X'], X_test['Y'], c = rf_predictions, s=0.05)
axs[1].set_title('3D Point Cloud Predictions')
axs[2].scatter(X_test['X'], X_test['Y'], c = y_test-rf_predictions, cmap = plt.cm.rainbow, s=0.5*(y_test-rf_predictions))
axs[2].set_title('Differences')