In [1]:
import os
import pcl
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import altair as alt
import numpy as np

In [2]:
#Function to convert point cloud to dataframe
def pcd_to_df(pointcloud):
    points = []
    for point in pointcloud:
        points.append(point)
    return pd.DataFrame(points)

#To trian data using KNN, points cloud should be converted to feature vector. Here Feature vector is represented 
#as <x_mean, y_mean, z_mean, x_standard_deviation, y_standard_deviation, z_standard_deviation> for x-axis, y-axis, 
#z-axis respectively. Takes input as a point cloud and returns vector representing that cloud.
def pcd_to_features(pointcloud):
    datapoints = pcd_to_df(pointcloud)
    vector = []
    for mean_val in datapoints.mean():
        vector.append(mean_val)
    for std_val in datapoints.std():
        vector.append(std_val)
    return vector

#Directory containing all PCD
#To download this data from https://drive.google.com/drive/u/1/folders/1t7AoBg8FAmqkkpKS5tjvDUs1zPHx0GIH
directory = 'out_100_transformed'

collect = []
labels = []

#To generate trainable dataset for KNN. Loop over all pcd files to collect feature vector of each and label of 
#each file
for filename in os.listdir(directory):
    pointcloud = pcl.load(directory+'/'+filename)
    if "sedan" in filename.lower():
        labels.append(0)
       
    elif "jeep" in filename.lower():
        labels.append(1)
        
    elif "suv" in filename.lower():
        labels.append(2)
       
    elif "hatchback" in filename.lower():
        labels.append(3)
        
    elif "truck" in filename.lower():
        labels.append(4)
        
    collect.append(pcd_to_features(pointcloud))

In [3]:

collect_train, collect_test, labels_train, labels_test = train_test_split(collect, labels, test_size=0.33, random_state=42)

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(collect_train, labels_train)
y_pred = neigh.predict(collect_test)


In [4]:
#Testing Accuracy of KNN Algorithm
accuracy_score(labels_test, y_pred)

0.9217687074829932

In [5]:
labels = ['sedan', 'jeep', 'hatchback', 'suv']
x = []
for label in labels:
    collect = []
    for filename in os.listdir(directory):
        pointcloud = pcl.load(directory+'/'+filename)
        if label in filename.lower():
            collect.append(pcd_to_features(pointcloud))
    x.append(collect)

# PLOTTING HISTOGRAMS FOR ANALYSIS

In [6]:
s = pd.DataFrame(x[0])
source_upper = pd.DataFrame({
    'sedan_mean_x': np.array(s[0]),
    'sedan_mean_y': np.array(s[1]),
    'sedan_mean_z': np.array(s[2])
})

source_lower = pd.DataFrame({
    'sedan_std_x': np.array(s[3]),
    'sedan_std_y': np.array(s[4]),
    'sedan_std_z': np.array(s[5])
})

upper=alt.Chart(source_upper).transform_fold(
    ['sedan_mean_x', 'sedan_mean_y', 'sedan_mean_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')).properties(
    width=600,
    height=200
)

lower = alt.Chart(source_lower).transform_fold(
    ['sedan_std_x', 'sedan_std_y', 'sedan_std_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')).properties(
    width=600,
    height=200
)

alt.vconcat(upper, lower)

In [7]:
s = pd.DataFrame(x[1])
source_upper = pd.DataFrame({
    'jeep_mean_x': np.array(s[0]),
    'jeep_mean_y': np.array(s[1]),
    'jeep_mean_z': np.array(s[2])
})

source_lower = pd.DataFrame({
    'jeep_std_x': np.array(s[3]),
    'jeep_std_y': np.array(s[4]),
    'jeep_std_z': np.array(s[5])
})



upper=alt.Chart(source_upper).transform_fold(
    ['jeep_mean_x', 'jeep_mean_y', 'jeep_mean_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')).properties(
    width=600,
    height=200
)

lower = alt.Chart(source_lower).transform_fold(
    ['jeep_std_x', 'jeep_std_y', 'jeep_std_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')).properties(
    width=600,
    height=200
)
alt.vconcat(upper, lower)

In [8]:
s = pd.DataFrame(x[2])
source_upper = pd.DataFrame({
    'hatchback_mean_x': np.array(s[0]),
    'hatchback_mean_y': np.array(s[1]),
    'hatchback_mean_z': np.array(s[2])
})

source_lower = pd.DataFrame({
    'hatchback_std_x': np.array(s[3]),
    'hatchback_std_y': np.array(s[4]),
    'hatchback_std_z': np.array(s[5])
})



upper=alt.Chart(source_upper).transform_fold(
    ['hatchback_mean_x', 'hatchback_mean_y', 'hatchback_mean_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')).properties(
    width=600,
    height=200
)

lower = alt.Chart(source_lower).transform_fold(
    ['hatchback_std_x', 'hatchback_std_y', 'hatchback_std_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')) .properties(
    width=600,
    height=200
)
alt.vconcat(upper, lower)

In [9]:
s = pd.DataFrame(x[3])
source_upper = pd.DataFrame({
    'suv_mean_x': np.array(s[0]),
    'suv_mean_y': np.array(s[1]),
    'suv_mean_z': np.array(s[2])
})

source_lower = pd.DataFrame({
    'suv_std_x': np.array(s[3]),
    'suv_std_y': np.array(s[4]),
    'suv_std_z': np.array(s[5])
})



upper=alt.Chart(source_upper).transform_fold(
    ['suv_mean_x', 'suv_mean_y', 'suv_mean_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')).properties(
    width=600,
    height=200
)

lower = alt.Chart(source_lower).transform_fold(
    ['suv_std_x', 'suv_std_y', 'suv_std_z'],
    as_=['Experiment', 'Measurement']
).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('Experiment:N')).properties(
    width=600,
    height=200
)
alt.vconcat(upper, lower)