## Create ML model using RANDOM FOREST algorithm for self-driving car with LIDAR data

In [1]:
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from micromlgen import port

In [2]:
# read data from csv
data = pd.read_csv('./all.txt', header=None)
data.shape

(2388, 361)

In [3]:
data.rename(columns={data.columns[-1]: 'Label'}, inplace=True)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,351,352,353,354,355,356,357,358,359,Label
0,0.00,615.50,591.50,0.00,569.25,549.75,0.00,532.00,516.00,0.00,...,800.50,0.00,784.25,0.00,753.00,710.50,0.00,675.75,643.00,0.00
1,0.00,680.25,0.00,648.75,619.25,0.00,595.25,572.25,0.00,552.25,...,932.00,0.00,906.00,805.00,791.00,0.00,775.00,759.50,716.50,-1.00
2,0.00,879.75,0.00,857.75,836.75,0.00,818.50,802.00,783.25,0.00,...,1163.75,1134.25,0.00,1015.50,0.00,0.00,956.00,927.00,904.25,-1.00
3,1016.75,0.00,1002.50,975.50,0.00,944.25,915.50,0.00,890.00,866.25,...,1109.75,1082.50,1109.50,0.00,0.00,0.00,0.00,1051.25,1037.25,0.38
4,0.00,898.25,0.00,870.00,842.50,0.00,819.25,796.50,0.00,776.50,...,0.00,940.50,1009.00,990.25,0.00,974.75,958.25,945.75,929.75,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2383,0.00,446.75,461.00,477.25,0.00,492.50,511.25,531.25,554.25,0.00,...,0.00,370.00,378.50,388.50,0.00,399.25,410.25,421.75,434.50,0.00
2384,446.50,0.00,461.00,477.00,0.00,492.50,511.00,531.00,554.25,0.00,...,369.75,0.00,378.50,388.50,399.25,0.00,410.25,421.75,433.75,0.00
2385,0.00,445.50,459.75,475.00,491.25,0.00,509.25,529.50,552.00,575.00,...,0.00,370.00,379.75,388.50,398.50,0.00,409.25,420.50,432.25,0.00
2386,0.00,449.75,463.75,479.25,0.00,497.00,515.00,535.50,559.00,0.00,...,364.50,373.75,383.25,0.00,393.75,401.75,411.75,423.50,436.75,0.00


In [4]:
X = data.iloc[:, :-1]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,350,351,352,353,354,355,356,357,358,359
0,0.00,615.50,591.50,0.00,569.25,549.75,0.00,532.00,516.00,0.00,...,816.25,800.50,0.00,784.25,0.00,753.00,710.50,0.00,675.75,643.00
1,0.00,680.25,0.00,648.75,619.25,0.00,595.25,572.25,0.00,552.25,...,956.75,932.00,0.00,906.00,805.00,791.00,0.00,775.00,759.50,716.50
2,0.00,879.75,0.00,857.75,836.75,0.00,818.50,802.00,783.25,0.00,...,0.00,1163.75,1134.25,0.00,1015.50,0.00,0.00,956.00,927.00,904.25
3,1016.75,0.00,1002.50,975.50,0.00,944.25,915.50,0.00,890.00,866.25,...,0.00,1109.75,1082.50,1109.50,0.00,0.00,0.00,0.00,1051.25,1037.25
4,0.00,898.25,0.00,870.00,842.50,0.00,819.25,796.50,0.00,776.50,...,955.25,0.00,940.50,1009.00,990.25,0.00,974.75,958.25,945.75,929.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2383,0.00,446.75,461.00,477.25,0.00,492.50,511.25,531.25,554.25,0.00,...,361.25,0.00,370.00,378.50,388.50,0.00,399.25,410.25,421.75,434.50
2384,446.50,0.00,461.00,477.00,0.00,492.50,511.00,531.00,554.25,0.00,...,361.25,369.75,0.00,378.50,388.50,399.25,0.00,410.25,421.75,433.75
2385,0.00,445.50,459.75,475.00,491.25,0.00,509.25,529.50,552.00,575.00,...,361.25,0.00,370.00,379.75,388.50,398.50,0.00,409.25,420.50,432.25
2386,0.00,449.75,463.75,479.25,0.00,497.00,515.00,535.50,559.00,0.00,...,356.00,364.50,373.75,383.25,0.00,393.75,401.75,411.75,423.50,436.75


In [5]:
# Assign target y as the last column 'Label'
y = data.iloc[:, -1]
y

0       0.00
1      -1.00
2      -1.00
3       0.38
4       1.00
        ... 
2383    0.00
2384    0.00
2385    0.00
2386    0.00
2387    0.00
Name: Label, Length: 2388, dtype: float64

## Begin training

In [6]:
# Split the training data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=40)

In [32]:
k = 30
k_best = SelectKBest(score_func=f_classif, k=k)
k_best.fit(X_train, y_train)

selected_feature_indices = k_best.get_support(indices=True)
print("selected_feature_indices: ", selected_feature_indices)

selected_feature_indices:  [ 24  30  31  32  33  34  35  36  37  38  39  40  42  43  44  46  50  51
 150 156 202 206 212 213 324 330 333 335 337 359]


In [33]:
# Create a random forest regressor
rf = RandomForestRegressor(n_estimators=200, random_state=42)

In [34]:
# Train the model on the training data
rf.fit(X_train.iloc[:, selected_feature_indices], y_train)

In [42]:
X_test.iloc[:, selected_feature_indices].shape

(597, 30)

In [35]:
# Make predictions on the testing data
y_pred = rf.predict(X_test.iloc[:, selected_feature_indices])

In [36]:
# Calculate the mean squared error of the predictions
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.24230621551926293


In [37]:
# Calculate the R-squared value of the predictions
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)

R-squared: 0.43084815362831685


In [39]:
import pickle
# Save the model to disk
filename = 'self_driving_model_0.1.pkl'
with open(filename, 'wb') as file:
    pickle.dump(rf, file)