In [38]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import math
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy.spatial import distance

pd.set_option('display.float_format', lambda x: '%.5f' % x)

PROCESSED_DB = Path("./psb_proc.csv")

df = pd.read_csv(PROCESSED_DB)

PROCESSED_MODEL_DIR = Path("./processed-models")

# List string to numpy array
for col in ["Bounding box", "Barycenter", "A3", "D1", "D2", "D3", "D4"]:
    df[col] = df[col].str.replace("nan", "0")
    df[col] = df[col].apply(eval).apply(np.array)

df.replace(np.inf, np.nan, inplace=True)
df.dropna(inplace=True)

# Scale each non-histogram feature seperately
scaler = StandardScaler()

# Standardize single-value features only
for col in ["Surface", "Bounding box volume", "Convex hull volume", "Compactness", "Diameter", "Eccentricity"]:
    X = df[col].values
    X = scaler.fit_transform(X.reshape(-1, 1))
    df[col] = X

In [39]:
def apply_weights(df, w = [1 / 11 for _ in range(11)]):
    # Normalize the weights
    w = np.array(w) / sum(w)

    # Calculate the feature vector for every entry in the dataset
    # df["Feature Vector"] = df.apply(lambda x: np.array([w[0] * x["Surface"], w[1] * x["Compactness"], w[2] * x["Bounding box volume"], w[3] * x["Convex hull volume"], w[4] * x["Diameter"], w[5] * x["Eccentricity"], *(w[6] * x["A3"]), *(w[7] * x["D1"]), *(w[8] * x["D2"]), *(w[9] * x["D3"]), *(w[10] * x["D4"])]), axis=1)
    df["Feature Vector"] = df.apply(lambda x: np.hstack(np.array([x["Surface"], x["Compactness"], x["Bounding box volume"], x["Convex hull volume"], x["Diameter"], x["Eccentricity"], x["A3"], x["D1"], x["D2"], x["D3"], x["D4"]]) * w), axis=1)
    return df

In [40]:
def acc(df):
    k = 5
    matches = []

    for i, row in df.iterrows():
        vec = row["Feature Vector"]
        label = row["Label"]
        model_num = row["Model number"]

        df["Cosine Distance"] = df.apply(lambda x: np.linalg.norm(x["Feature Vector"] - vec), axis=1)

        top_k = df.nsmallest(k + 1, "Cosine Distance")

        top_k = top_k[top_k["Model number"] != model_num]

        match = max(set(top_k["Label"]), key = list(top_k["Label"]).count) == label

        matches.append(match)

    return sum(matches)

In [43]:
w = [1 / 11 for _ in range(11)]

best_indices = []

for x in range(11):
    best_weights = w
    best_acc = 0
    best_index = -1

    for i in range(11):
        if i in best_indices:
            continue

        w_copy = w.copy()
        w_copy[i] = 1 - x/11

        df = apply_weights(df, w_copy)
        accuracy = acc(df)



        if accuracy > best_acc:
            best_acc = accuracy
            best_weights = w_copy
            best_index = i

    best_indices.append(best_index)
    w = best_weights
    print(f"Weights: {best_weights}")
    print(f"Accuracy: {best_acc}")


  df["Feature Vector"] = df.apply(lambda x: np.hstack(np.array([x["Surface"], x["Compactness"], x["Bounding box volume"], x["Convex hull volume"], x["Diameter"], x["Eccentricity"], x["A3"], x["D1"], x["D2"], x["D3"], x["D4"]]) * w), axis=1)


Weights: [0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 1.0, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091]
Accuracy: 898
Weights: [0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 1.0, 0.9090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091]
Accuracy: 875
Weights: [0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 1.0, 0.9090909090909091, 0.09090909090909091, 0.8181818181818181, 0.09090909090909091, 0.09090909090909091]
Accuracy: 883
Weights: [0.09090909090909091, 0.7272727272727273, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 1.0, 0.9090909090909091, 0.09090909090909091, 0.8181818181818181, 0.09090909090909091, 0.09090909090909091]
Accuracy: 885
Weights: [0.09090909090909091, 0.7272727272727273,

In [44]:
df = apply_weights(df)
accuracy = acc(df)

print(accuracy)



  df["Feature Vector"] = df.apply(lambda x: np.hstack(np.array([x["Surface"], x["Compactness"], x["Bounding box volume"], x["Convex hull volume"], x["Diameter"], x["Eccentricity"], x["A3"], x["D1"], x["D2"], x["D3"], x["D4"]]) * w), axis=1)


905
