In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import math
from sklearn.preprocessing import MinMaxScaler, StandardScaler

pd.set_option('display.float_format', lambda x: '%.5f' % x)

ORIGINAL_DB = Path("./psb_orig.csv")
PROCESSED_DB = Path("./psb_proc.csv")

df_orig = pd.read_csv(ORIGINAL_DB)
df_proc = pd.read_csv(PROCESSED_DB)

ORIGINAL_MODEL_DIR = Path("./models")
PROCESSED_MODEL_DIR = Path("./processed-models")

# List string to numpy array
for col in ["Bounding box", "Barycenter", "A3", "D1", "D2", "D3", "D4"]:
    df_orig[col] = df_orig[col].str.replace("nan", "0")
    df_proc[col] = df_proc[col].str.replace("nan", "0")

    df_orig[col] = df_orig[col].apply(eval).apply(np.array)
    df_proc[col] = df_proc[col].apply(eval).apply(np.array)

df_orig["Max bound"] = df_orig["Bounding box"].apply(lambda x: max(np.abs(x[0] - x[1])))
df_proc["Max bound"] = df_proc["Bounding box"].apply(lambda x: max(np.abs(x[0] - x[1])))

df_orig["Distance from origin"] = df_orig["Barycenter"].apply(np.linalg.norm)
df_proc["Distance from origin"] = df_proc["Barycenter"].apply(np.linalg.norm)

df_proc["Diameter"].describe()

df_proc.replace(np.inf, np.finfo(np.float64).max, inplace=True)
df_proc.replace(np.nan, 0, inplace=True)

# Calculate the feature vector for every entry in the dataset
df_proc["Feature Vector"] = df_proc.apply(lambda x: np.array([x["Surface"], x["Compactness"], x["Bounding box volume"], x["Diameter"], x["Eccentricity"], *x["A3"], *x["D1"], *x["D2"], *x["D3"], *x["D4"]]), axis=1)

# Probably normalize this feature vector here
# TODO: Normalize feature vector either by min..max normalization or standardization
# TODO: Weigh feature somehow
scaler = MinMaxScaler()

X = list(df_proc["Feature Vector"].map(list))

scaler.fit(X)

df_proc["Feature Vector"] = df_proc["Feature Vector"].apply(lambda x: scaler.transform(x.reshape(1, -1)))

In [None]:
from object import Object
from viewer import Viewer

# Select the number of the model to query here
NO_TO_QUERY = 1

obj = Object.load_mesh(list(PROCESSED_MODEL_DIR.glob(f"**/m{NO_TO_QUERY}.off"))[0])

view_object = False

if view_object:
    viewer = Viewer(obj)
    viewer.mainLoop()

In [None]:
# Get the feature vector from the selected model
# Recalculate it so we can also pick models from outside of our dataset
model_num, label, num_vertices, num_faces, num_edges, type_faces, bounding_box, barycenter, diagonal, surface, bounding_box_volume, volume, compactness, diameter, eccentricity, A3, D1, D2, D3, D4= obj.get_info()

feature_vector = np.array([surface, compactness, bounding_box_volume, diameter, eccentricity, *A3, *D1, *D2, *D3, *D4])

feature_vector = scaler.transform([feature_vector])[0]

In [None]:
# Calculate the distance of each object in the shapebase to the feature vector using a euclidean distance metric
df_proc["Distance"] = df_proc.apply(lambda x: np.linalg.norm(x["Feature Vector"] - feature_vector), axis=1)

# Take the smallest n distances and display their features
df_proc.nsmallest(5, "Distance")

In [None]:
k = 5

match_counts = []
matches = []

for i, row in df_proc.iterrows():
    vec = row["Feature Vector"]
    label = row["Label"]
    model_num = row["Model number"]

    df_proc["Distance"] = df_proc.apply(lambda x: np.linalg.norm(x["Feature Vector"] - vec), axis=1)

    top_k = df_proc.nsmallest(k + 1, "Distance")
    top_k = top_k[top_k["Model number"] != model_num]

    match_count = len(top_k[top_k["Label"] == label])

    match = max(set(top_k["Label"]), key = list(top_k["Label"]).count) == label

    matches.append(match)

    match_counts.append(match_count)

In [None]:
print("Count: ", len(match_counts))
print("Max: ", max(match_counts))
print("Min: ", min(match_counts))
print("Avg: ", sum(match_counts) / len(match_counts))
print("Correct matches: ", sum(matches))
print(f"Correct matches: {sum(matches) / len(match_counts) * 100: .2f}%")