### Import

In [39]:
import os
from os.path import join
import random
import pickle
from time import time
from datetime import datetime
import shutil

from icecream import ic
import numpy as np
import open3d as o3d
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import ConfusionMatrixDisplay

from utils import get_angles
from feature_extractor import surflet_pairs_feature, get_histogram

### Utils

In [40]:
def plot_histogram(hists=None, **kwargs):
    if hists is None:
        hists = get_histogram(**kwargs)
    for hist in hists:
        plt.figure(figsize=(5, 5))
        plt.imshow(hist, cmap='gray')
    plt.show()


# id = random.choice(list(range(1, 46001)))
# # plot_histogram(filepath=f'dataset/ply/training/pointCloud/pointCloud{id}.ply', normalize=False)
# plot_histogram(filepath=f'dataset/ply/training/pointCloud/pointCloud{id}.ply', normalize=True, max_ratio=0.8)

### Data

In [41]:
def select_ids(ids, labels, samples_per_class):
    label2ids = {label:[] for label in range(1, 6)}
    res = []
    for id in ids:
        label = gt[id]['label']
        label2ids[label].append(id)
    for label in labels:
        res += random.sample(label2ids[label], samples_per_class)
    return res


def get_train_data(ids, filepath, bins=10, max_ratio=0.8, labels=[1, 2, 3, 4, 5], samples_per_class=10**9):
    x_train, y_train = [], []
    count = {label:0 for label in labels}
    for id in tqdm(select_ids(ids, labels, samples_per_class)):
        label = gt[id]['label']
        if label in labels and count[label] < samples_per_class:
            hists = get_histogram(filepath=filepath % id, bins=bins, normalize=True, flatten=True, max_ratio=max_ratio)
            x_train += hists
            y_train += [label for i in range(len(hists))]
            count[label] += 1
    return np.array(x_train), np.array(y_train)


def get_test_data(ids, filepath, bins=10, max_ratio=0.8, labels=[1, 2, 3, 4, 5], samples_per_class=10**9):
    x_test, y_test, test_ids = [], [], []
    count = {label:0 for label in labels}
    for id in tqdm(select_ids(ids, labels, samples_per_class)):
        label = gt[id]['label']
        if label in labels and count[label] < samples_per_class:
            hists = get_histogram(filepath=filepath % id, bins=bins, normalize=True, flatten=True, max_ratio=max_ratio)
            x_test.append(np.array(hists))
            y_test.append(label)
            test_ids.append(id)
            count[label] += 1
    return x_test, y_test, test_ids

In [42]:
def init_dataset(bins, max_ratio, labels=[1, 2, 3, 4, 5], samples_per_class=[10**9, 10**9], train_full_data=False):
    # train_ids = []
    # test_ids = []
    # for label in range(1, 6):
    #     ids = list(label2ids[label])
    #     random.shuffle(ids)
    #     pos = int(len(ids) * 0.9)
    #     train_ids += ids[:pos]
    #     test_ids += ids[pos:]
    if train_full_data:
        train_ids = list(range(1, 46001))
        test_ids = []
    else:
        train_ids, test_ids = pickle.load(open('./honv/train_test_ids.pkl', 'rb'))
    x_train, y_train = get_train_data(
        train_ids,
        './dataset/ply/training/pointCloud/pointCloud%d.ply',
        bins=bins,
        max_ratio=max_ratio,
        labels=labels,
        samples_per_class=samples_per_class[0],
    )

    if not train_full_data:
        x_test, y_test, test_ids = get_test_data(
            test_ids,
            './dataset/ply/training/pointCloud/pointCloud%d.ply',
            bins=bins,
            max_ratio=max_ratio,
            labels=labels,
            samples_per_class=samples_per_class[1],
        )
        return (x_train, y_train), (x_test, y_test, test_ids)
    else:
        return x_train, y_train

### Eval utils

In [43]:
def predict(model, xs, labels, honv=True):
    if not honv: 
        return model.predict(xs)
    counts = [len(x) for x in xs]
    data = []
    data = np.concatenate(xs, axis=0)
    probs = model.predict_proba(data)

    pred = []
    head = 0
    for count in counts:
        p = probs[head:head+count, :].sum(axis=0)
        pred.append(labels[p.argmax()])
        head += count
    return pred


def evaluate(model=None, x_test=None, y_test=None, test_ids=None, labels=None, y_pred=None, save_fig=True, names=None, save_con=False):
    assert labels is not None and y_test is not None
    n_labels = len(labels)
    confusion_matrix = np.zeros((n_labels, n_labels))
    if save_con:
        confusion_ids = [[[] for i in range(n_labels)] for i in range(n_labels)]
        confusion_hists = [[[] for i in range(n_labels)] for i in range(n_labels)]
    N = len(y_test)

    if y_pred is None:
        assert model is not None and x_test is not None
        y_pred = predict(model, x_test[:N], labels)
    for i in range(N):
        confusion_matrix[labels.index(y_test[i]), labels.index(y_pred[i])] += 1
        if save_con:
            confusion_ids[labels.index(y_test[i])][labels.index(y_pred[i])].append(test_ids[i])
            confusion_hists[labels.index(y_test[i])][labels.index(y_pred[i])].append(random.choice(x_test[i]))

    if save_con:
        pickle.dump(confusion_ids, open(join(LOGGING_PATH, 'confusion_ids.pkl'), 'wb'))
        pickle.dump(confusion_hists, open(join(LOGGING_PATH, 'confusion_hists.pkl'), 'wb'))

    acc = sum(confusion_matrix[i, i] for i in range(n_labels)) / confusion_matrix.sum()
    confusion_matrix = confusion_matrix.astype('int32')

    disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix, display_labels=[names[label] for label in labels])
    plt.rc('font', size=12)
    fig, ax = plt.subplots(figsize=(10, 10))
    try:
        ax.set_title(f'accuracy = %.4f\nK={cf.n_neighbors}\nmax_ratio={cf.max_ratio}\nbins={cf.bins}\n\nEach row has a sum of 1' % acc)
    except:
        ax.set_title(f'accuracy = %.4f\n\nEach row has a sum of 1' % acc)
    disp.plot(cmap='Reds', ax=ax)
    plt.show()
    if save_fig:
        fig.savefig(join(LOGGING_PATH, f'{NAME}.jpg'))

    print('Accuracy:', acc)
    return acc, confusion_matrix

### Run

In [44]:
class Config:
    def __init__(self, **kwargs):
        for key in kwargs:
            setattr(self, key, kwargs[key])

    def save(self, root):
        text = ''
        for attr, value in self.__dict__.items():
            text += f'{attr} {value}\n'
        with open(join(root, 'config.txt'), 'w') as f:
            f.write(text)

#### Train

In [46]:
cf = Config(
    bins=25,
    max_ratio=0.8,
    n_neighbors = 5,
    labels=[2, 4]
)

NAME = 'cyl-cone_25_08_05_full'
LOGGING_PATH = f'./honv/{NAME}/'
os.makedirs(LOGGING_PATH, exist_ok=True)
cf.save(LOGGING_PATH)

In [48]:
gt = pickle.load(open('./metadata/ground_truth.pkl', 'rb'))
# for id in range(1, 46001):
#     if gt[id]['label'] == 5:
#         gt[id]['label'] = 3
#     if gt[id]['label'] == 4:
#         gt[id]['label'] = 2
    # if gt[id]['label'] != 1:
    #     gt[id]['label'] = 2

# Train, test
# (x_train, y_train), (x_test, y_test, test_ids) = init_dataset(bins=cf.bins, max_ratio=cf.max_ratio, labels=cf.labels, samples_per_class=[8280*2, 920*2])
# pickle.dump(((x_train, y_train), (x_test, y_test, test_ids)), open(join(LOGGING_PATH, 'data.pkl'), 'wb'))

# Train only
x_train, y_train = init_dataset(bins=cf.bins, max_ratio=cf.max_ratio, labels=cf.labels, samples_per_class=[9200, 0], train_full_data=True)
# pickle.dump((x_train, y_train), open(join(LOGGING_PATH, 'data.pkl'), 'wb'))

# Reuse
# (x_train, y_train), (x_test, y_test, test_ids) = pickle.load(open(f'./honv/2_class_25_08_05/data.pkl', 'rb'))

  0%|          | 0/18400 [00:00<?, ?it/s]

In [None]:
model = KNeighborsClassifier(n_neighbors=cf.n_neighbors)
model.fit(x_train, y_train)
pickle.dump(model, open(join(LOGGING_PATH, 'model.pkl'), 'wb'))

In [None]:
acc, confusion_matrix = evaluate(model, x_test, y_test, test_ids, cf.labels,
    # names={1:'plane', 2:'cylinder, cone', 3:'sphere, torus'}
    names={2:'cylinder, cone', 3:'sphere, torus'},
    save_con=True
)

In [None]:
confusion_ids = pickle.load(open(f'{LOGGING_PATH}/confusion_ids.pkl', 'rb'))
confusion_hists = pickle.load(open(f'./{LOGGING_PATH}/confusion_hists.pkl', 'rb'))

os.makedirs(join(LOGGING_PATH, 'confusion/ply/'), exist_ok=True)
os.makedirs(join(LOGGING_PATH, 'confusion/hist/'), exist_ok=True)

names = ['cylinder', 'cone']
for i in range(2):
    for j in range(2):
        N = len(confusion_ids[i][j])
        indices = random.sample(list(range(N)), min(N, 5))
        for index in indices:
            id = confusion_ids[i][j][index]
            hist = confusion_hists[i][j][index]
            src = f'./dataset/ply/training/pointCloud/pointCloud{id}.ply'
            dst = join(LOGGING_PATH, f'confusion/ply/{names[i]}_{names[j]}_{id}.ply')
            shutil.copy(src, dst)

            plt.figure(figsize=(8, 8))
            plt.title(f'{names[i]}_{names[j]}_{id}')
            plt.axis('off')
            plt.imshow(hist.reshape((cf.bins, cf.bins)), cmap='gray')
            plt.savefig(join(LOGGING_PATH, f'confusion/hist/{names[i]}_{names[j]}_{id}.jpg'))

#### Test

In [None]:
root1 = './honv/4_class_25_08_05/'
root2 = './SP/35_30/'
cf1 = Config(bins=25, max_ratio=0.8, n_neighbors=5, labels=[1, 2, 3, 4])
model1 = pickle.load(open(join(root1, 'model.pkl'), 'rb'))

cf2 = Config(n_neighbors=30, labels=[3, 5])
model2 = pickle.load(open(join(root2, 'model.pkl'), 'rb'))

In [None]:
_, (x_test1, y_test1, test_ids1) = pickle.load(open(join(root1, 'data.pkl'), 'rb'))
_, (x_test2, y_test2, test_ids2) = pickle.load(open(join(root2, 'data.pkl'), 'rb'))
del _

In [None]:
y_pred1 = {}
for id, label in zip(test_ids1, predict(model1, x_test1, cf1.labels, honv=True)):
    y_pred1[id] = label

y_pred2 = {}
for id, label in zip(test_ids2, predict(model2, x_test2, cf2.labels, honv=False)):
    y_pred2[id] = label

In [None]:
y_combined = []
for id in test_ids1:
    if y_pred1[id] in [1, 2, 4]:
        y_combined.append(y_pred1[id])
    else:
        try:
            y_combined.append(y_pred2[id])
        except:
            y_combined.append(1)

In [None]:
acc, confusion_matrix = evaluate(
    y_test=y_test1, y_pred=y_combined,
    save_fig=False, save_con=False,
    labels=[1, 2, 3, 4, 5],
    names=['', 'plane', 'cylinder', 'sphere', 'cone', 'torus']
)

### Combined infer

In [None]:
# cf1 = Config(bins=20, max_ratio=0.8, n_neighbors=3, labels=[1, 2, 3, 4, 5])
# model1 = pickle.load(open('./honv/full/12345_20_80_3/model.pkl', 'rb'))

# cf2 = Config(bins=10, max_ratio=0.7, n_neighbors=5, labels=[3, 5])
# model2 = pickle.load(open('./honv/full/35_10_70_5/model.pkl', 'rb'))

In [None]:
# x_test1, _ = get_test_data(list(range(1, 926)), './dataset/ply/test/pointCloud/pointCloud%d.ply', bins=20, max_ratio=0.8)
# x_test2, _ = get_test_data(list(range(1, 926)), './dataset/ply/test/pointCloud/pointCloud%d.ply', bins=10, max_ratio=0.7)

In [None]:
# y_pred1 = predict(model1, x_test1, [1, 2, 3, 4, 5])
# y_pred2 = predict(model2, x_test2, [3, 5])

In [None]:
# y_combined = []
# j = 0
# for i in range(len(x_test1)):
#     if y_pred1[i] in [1, 2, 4]:
#         y_combined.append(y_pred1[i])
#     else:
#         y_combined.append(y_pred1[i])
#         # y_combined.append(y_pred2[j])
#         # j += 1

In [None]:
# test_labels = {}
# for i, y in enumerate(y_combined):
#     test_labels[i + 1] = y
# pickle.dump(test_labels, open('test_labels_2.pkl', 'wb'))

In [None]:
# test_labels = pickle.load(open('./test_labels_2.pkl', 'rb'))
# print(test_labels[1]) # test_id: 1..925 --> label: 1..5

In [None]:
# def foo():
#     names = ['', 'plane', 'cylinder', 'sphere', 'cone','torus']
#     x = [names[test_labels[i]] for i in range(1, 926)]
#     with open('test_labels.txt', 'w') as f:
#         f.write('\n'.join(x))

# foo()