In [22]:
import torch
import numpy as np
from numpy.linalg import norm
from sklearn.cluster import DBSCAN
import torch.nn.functional as F
import json

In [2]:
def get_confusion_matrix(idx2category, labels_all, logits_all):
    # modelnet_dict = torch.load("src/eval_data/modelnet_dict.pt")

    cat_list = list(idx2category.values())
    
    print("labels_all: ", labels_all.shape)
    
    _, pred = logits_all.topk(1, 1, True, True)
    pred = pred.reshape(-1)
    
    true_pred = torch.argwhere(pred == labels_all).reshape(-1)
    false_pred = torch.argwhere(pred != labels_all).reshape(-1)
    
    res = torch.zeros(len(cat_list), len(cat_list))
    print("res: ", res.shape)
    for val in true_pred:
        idx = val.item()
        res[labels_all[idx], pred[idx]] += 1
    
    for val in false_pred:
        idx = val.item()
        res[labels_all[idx], pred[idx]] += 1
        # print(labels_all[idx], pred[idx])

    
    res_norm = F.softmax(res, dim=1)

    return res, res_norm

In [3]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
clip_feat_path = "../meta_data/lvis_cat_name_pt_feat.npy"
clip_cat_feat = np.load(clip_feat_path, allow_pickle=True)
objaverse_dict = torch.load("../src/eval_data/objaverse_dict.pt")
ov_category2idx = objaverse_dict["category2idx"]
ov_idx2category = objaverse_dict["idx2category"]
ov_category2idx = objaverse_dict["category2idx"]

ov_labels_all = objaverse_dict["labels_all"]
ov_logits_all = objaverse_dict["logits_all"]

_, pred = ov_logits_all.topk(1, 1, True, True)
ov_pred_all = pred.reshape(-1)

objaverse_dict = None
# print("clip_cat_feat: ", clip_cat_feat.shape) # (1156, 1280)
# clip_cat_feat = F.normalize(clip_cat_feat, dim=-1)
clip_cat_feat = clip_cat_feat / norm(clip_cat_feat, axis=1, keepdims=True)

eps = 0.61855
db_scan = DBSCAN(eps=eps, min_samples=2).fit(clip_cat_feat)
dbscan_labels = db_scan.labels_

In [4]:
unique_labels = np.unique(dbscan_labels)
cluster_dict = {}
for l in unique_labels:
    # if l == -1:
    #     continue
    idx = np.argwhere(dbscan_labels == l).reshape(-1)
    cluster_dict[l] = idx.tolist()


In [5]:
dbscan_labels.shape

(1156,)

In [6]:
# Lets create a new list where predictions inside the cluster = 1 and outside the cluster = 0

In [7]:
cluster_confusion = np.array([-1]*len(ov_labels_all))
cluster_confusion

array([-1, -1, -1, ..., -1, -1, -1])

In [8]:
ov_pred_all.shape

torch.Size([46205])

In [9]:
for idx, pred in enumerate(ov_pred_all):
    true_label = ov_labels_all[idx].item()
    if pred != true_label:
        cluster = dbscan_labels[true_label]
        if pred in cluster_dict[cluster]:
            cluster_confusion[idx] = 1
        else:
            cluster_confusion[idx] = 0


In [10]:
in_cluster_instance = np.argwhere(cluster_confusion == 1).reshape(-1)
out_cluster_instance = np.argwhere(cluster_confusion == 0).reshape(-1)

In [11]:
in_cluster_instance.shape, out_cluster_instance.shape

((11528,), (13123,))

In [12]:
print(f"Percentage of in-cluster confusion: {in_cluster_instance.shape[0]/len(ov_labels_all)*100:.2f}%")
print(f"Percentage of out-cluster confusion: {out_cluster_instance.shape[0]/len(ov_labels_all)*100:.2f}%")

Percentage of in-cluster confusion: 24.95%
Percentage of out-cluster confusion: 28.40%


In [13]:
ov_out_cluster_labels = ov_labels_all[out_cluster_instance]
ov_out_cluster_preds = ov_pred_all[out_cluster_instance]

In [14]:
ov_out_cluster_labels.shape

torch.Size([13123])

In [15]:
ov_out_cluster_preds

tensor([319, 329, 994,  ...,  50, 729, 377])

In [16]:
def get_confusion_matrix(idx2category, labels_all, preds_all):
    cat_list = list(idx2category.values())
    
    print("labels_all: ", labels_all.shape)
        
    true_pred = torch.argwhere(preds_all == labels_all).reshape(-1)
    false_pred = torch.argwhere(preds_all != labels_all).reshape(-1)
    
    res = torch.zeros(len(cat_list), len(cat_list))
    print("res: ", res.shape)
    for val in true_pred:
        idx = val.item()
        res[labels_all[idx], preds_all[idx]] += 1
    
    for val in false_pred:
        idx = val.item()
        res[labels_all[idx], preds_all[idx]] += 1
    
    res_norm = F.softmax(res, dim=1)

    return res, res_norm

In [17]:
confusion_mat, confusion_mat_norm = get_confusion_matrix(ov_idx2category, ov_labels_all, ov_pred_all)
out_cluster_confusion, out_cluster_confusion_norm = get_confusion_matrix(ov_idx2category, ov_out_cluster_labels, ov_out_cluster_preds)

labels_all:  torch.Size([46205])
res:  torch.Size([1156, 1156])


labels_all:  torch.Size([13123])
res:  torch.Size([1156, 1156])


In [18]:
def top_confusion(ov_idx2category, ov_confusion, ov_res_norm, dim=1, topx=40, topy=5, thresh=6):

    # mn_res_norm = torch.load("src/eval_data/mn_confusion_norm.pt")
    # ov_res_norm = torch.load("eval_data/ov_confusion_norm.pt")
    
    ov_res_norm = ov_res_norm.fill_diagonal_(0)
    row_sum = torch.sum(ov_res_norm, dim=dim)
    print("row_sum: ", row_sum.shape) # 1156 categories

    sorted_idx = torch.argsort(row_sum, descending=True)

    # print("row_sum: ", row_sum[sorted_idx[:200]] )
    print("sorted: ", sorted_idx.shape)
    sort_cat = [ov_idx2category[i.item()] for i in sorted_idx[:topx]]

    # print("sorted: ", sorted_idx[:topx], '\n')
    top_xy_dict = {}
    count = 0
    print("\n")

    target_cat_pairs = []
 
    for i in sorted_idx[:topx]:
        topy_cat=[]
        # sim_list = []
        confusion = []
        # top_xy_dict[i.item()] = {"cat_name":[], "count": [], "cats": []}
        topy_pred = torch.argsort(ov_res_norm[i], descending=True)[:topy]
        for cat_i in topy_pred:
            if ov_confusion[i][cat_i].item() > thresh:
                target_cat_pairs.append((i.item(), cat_i.item()))
                topy_cat.append(ov_idx2category[cat_i.item()])
                # sim_list.append(sim_mat[i][cat_i])
                confusion.append(ov_confusion[i][cat_i].item())
                count += 1
        
        
        # top_xy_dict[i.item()]["cats"] = topy_pred.tolist()
        # top_xy_dict[i.item()]["cat_name"] = topy_cat
        # top_xy_dict[i.item()]["count"] = confusion

        # topy_cat = [ov_idx2category[i.item()] for i in topy_pred]
        print(ov_idx2category[i.item()], ": ", topy_cat)
        # print(ov_idx2category[i.item()], ": ", sim_list)
        print(ov_idx2category[i.item()], ": ", confusion, '\n')
        # print(ov_idx2category[i.item()], ": ", ov_res_norm[i][topy_pred])
        
    print("Count: ", count)
    # print("top_xy_dict: ", top_xy_dict)
    # np.save("src/eval_data/top_xy_dict.npy", top_xy_dict)
    
    return target_cat_pairs

In [19]:
target_cat_pairs = top_confusion(ov_idx2category, out_cluster_confusion, out_cluster_confusion_norm, 1, 400, 5, 6)

row_sum:  torch.Size([1156])
sorted:  torch.Size([1156])


sherbert :  ['icecream']
sherbert :  [17.0] 

antenna :  ['mast', 'windmill', 'sword', 'stirrer']
antenna :  [17.0, 10.0, 9.0, 7.0] 

desk :  ['monitor_(computer_equipment) computer_monitor']
desk :  [17.0] 

tux :  ['penguin']
tux :  [17.0] 

fruit_juice :  ['Tabasco_sauce', 'soya_milk']
fruit_juice :  [18.0, 7.0] 

jewelry :  ['ring']
jewelry :  [17.0] 

ping-pong_ball :  ['egg']
ping-pong_ball :  [18.0] 

armor :  ['helmet', 'suit_(clothing)']
armor :  [18.0, 9.0] 

subwoofer :  ['speaker_(stero_equipment)']
subwoofer :  [17.0] 

bullet_train :  ['train_(railroad_vehicle)', 'passenger_car_(part_of_a_train)']
bullet_train :  [19.0, 17.0] 

tapestry :  ['checkerboard']
tapestry :  [21.0] 

runner_(carpet) :  ['bath_mat']
runner_(carpet) :  [25.0] 

patty_(food) :  ['hamburger']
patty_(food) :  [44.0] 

crabmeat :  ['crab_(animal)']
crabmeat :  [25.0] 

control :  ['joystick']
control :  [101.0] 

wine_bucket :  ['barrel']
wine

In [20]:
print(len(target_cat_pairs))
target_cat_pairs

193


[(898, 541),
 (20, 631),
 (20, 1138),
 (20, 999),
 (20, 969),
 (328, 653),
 (1083, 737),
 (438, 9),
 (438, 943),
 (556, 841),
 (758, 379),
 (29, 515),
 (29, 985),
 (982, 947),
 (153, 1065),
 (153, 720),
 (1014, 215),
 (852, 68),
 (724, 489),
 (292, 291),
 (270, 557),
 (1142, 57),
 (1131, 555),
 (37, 1073),
 (229, 57),
 (357, 132),
 (658, 552),
 (658, 1062),
 (658, 659),
 (958, 1106),
 (958, 528),
 (1115, 131),
 (787, 84),
 (787, 179),
 (787, 83),
 (136, 841),
 (628, 1021),
 (628, 1061),
 (628, 77),
 (1061, 1021),
 (1061, 77),
 (221, 1020),
 (221, 1018),
 (221, 259),
 (448, 605),
 (81, 489),
 (127, 197),
 (315, 119),
 (1101, 124),
 (1101, 1141),
 (314, 4),
 (314, 319),
 (94, 502),
 (771, 991),
 (901, 25),
 (918, 123),
 (192, 647),
 (192, 751),
 (192, 552),
 (192, 528),
 (553, 900),
 (1082, 548),
 (109, 677),
 (58, 497),
 (726, 21),
 (14, 83),
 (14, 1141),
 (1023, 718),
 (1023, 789),
 (239, 689),
 (239, 809),
 (933, 1102),
 (298, 1020),
 (548, 996),
 (106, 996),
 (476, 984),
 (25, 901),


In [24]:
# json.dump(target_cat_pairs, open("../meta_data/target_cat_pairs.json", "w"))

In [20]:
cat1 = "armor"
cat2 = "helmet"
ov_category2idx[cat1], ov_category2idx[cat2]

(29, 515)

In [28]:
confusion_mat[ov_category2idx[cat1], ov_category2idx[cat2]]

tensor(10.)

In [31]:
def get_index(true_cat_name, pred_cat_name, ov_category2idx, labels_all, pred_all):
    true_cat = ov_category2idx[true_cat_name]
    pred_cat = ov_category2idx[pred_cat_name]
    idx_true = torch.argwhere(labels_all == true_cat).reshape(-1)
    idx_pred = torch.argwhere(pred_all[idx_true] == pred_cat).reshape(-1)
    return idx_true[idx_pred]

In [32]:
get_index(cat1, cat2, ov_category2idx, ov_labels_all, ov_pred_all)

tensor([808, 811, 814, 840, 859, 884, 888, 893, 941, 947])

In [35]:
# ov_xyz = torch.load("../src/eval_data/ov_xyz.pt")
# eps = 0.61855
# db_scan_point_cloud = DBSCAN().fit(ov_xyz.reshape(46205, -1))
# dbscan_labels_pcloud = db_scan_point_cloud.labels_

In [81]:
row_sum = torch.sum(out_cluster_confusion, dim=1)
row_sum.sort(descending=True)

import plotly.express as px

fig = px.box(row_sum)
fig.update_layout(title="Out of cluster confusion", yaxis=dict(title='Value'), xaxis=dict(title='Box'))
fig.show()

In [45]:
filtered_pair_list = [("antenna", "windmill"), ('armor', 'helmet')]
gray_pair_list = [("antenna", "sword"), ("antenna", "stirrer"), ]

In [21]:
# baseline_labels_all = torch.load("../exp/finetune_1layer@20240129-151703/results/ov_labels_all_finetune_test.pt")
# baseline_logits_all = torch.load("../exp/finetune_1layer@20240129-151703/results/ov_logits_all_finetune_test.pt")
confusion_mat, confusion_mat_norm = get_confusion_matrix(ov_idx2category, ov_labels_all, ov_logits_all)

labels_all:  torch.Size([46205])
res:  torch.Size([1156, 1156])


In [22]:
confusion_mat[cluster_dict[c_no]].shape

torch.Size([12, 1156])

In [23]:
cluster_dict[c_no][:5]

[143, 248, 367, 480, 496]

In [24]:
def top_confusion(ov_idx2category, ov_confusion, ov_res_norm, dim=1, topx=40, topy=5):

    # mn_res_norm = torch.load("src/eval_data/mn_confusion_norm.pt")
    # ov_res_norm = torch.load("eval_data/ov_confusion_norm.pt")
    
    ov_res_norm = ov_res_norm.fill_diagonal_(0)
    row_sum = torch.sum(ov_res_norm, dim=dim)
    print("row_sum: ", row_sum.shape) # 1156 categories

    sorted_idx = torch.argsort(row_sum, descending=True)

    # print("row_sum: ", row_sum[sorted_idx[:200]] )
    print("sorted: ", sorted_idx.shape)
    sort_cat = [ov_idx2category[i.item()] for i in sorted_idx[:topx]]

    # print("sorted: ", sorted_idx[:topx], '\n')
    top_xy_dict = {}

    for i in sorted_idx[:topx]:
        topy_cat=[]
        # sim_list = []
        confusion = []
        # top_xy_dict[i.item()] = {"cat_name":[], "count": [], "cats": []}
        topy_pred = torch.argsort(ov_res_norm[i], descending=True)[:topy]
        for cat_i in topy_pred:
            topy_cat.append(ov_idx2category[cat_i.item()])
            # sim_list.append(sim_mat[i][cat_i])
            confusion.append(ov_confusion[i][cat_i].item())
        
        # top_xy_dict[i.item()]["cats"] = topy_pred.tolist()
        # top_xy_dict[i.item()]["cat_name"] = topy_cat
        # top_xy_dict[i.item()]["count"] = confusion

        # topy_cat = [ov_idx2category[i.item()] for i in topy_pred]
        print(ov_idx2category[i.item()], ": ", topy_cat)
        # print(ov_idx2category[i.item()], ": ", sim_list)
        print(ov_idx2category[i.item()], ": ", confusion, '\n')
        # print(ov_idx2category[i.item()], ": ", ov_res_norm[i][topy_pred])
    
    # print("top_xy_dict: ", top_xy_dict)
    # np.save("src/eval_data/top_xy_dict.npy", top_xy_dict)


In [25]:
def top_confusion_for_filtered_cat(ov_idx2category, ov_confusion, ov_res_norm, cat_idx_from_cluster, dim=1, topx=40, topy=5):

    # mn_res_norm = torch.load("src/eval_data/mn_confusion_norm.pt")
    # ov_res_norm = torch.load("eval_data/ov_confusion_norm.pt")

    ov_res_norm = ov_res_norm.fill_diagonal_(0)

    ov_confusion = ov_confusion[cat_idx_from_cluster]
    ov_res_norm = ov_res_norm[cat_idx_from_cluster]
    
    # print(f"ov_res_norm: {ov_res_norm.shape}")
    row_sum = torch.sum(ov_res_norm, dim=dim)
    # print("row_sum: ", row_sum.shape) # 1156 categories

    sorted_idx = torch.argsort(row_sum, descending=True)

    # print("row_sum: ", row_sum[sorted_idx[:200]] )
    # print("sorted: ", sorted_idx.shape)
    # sort_cat = [ov_idx2category[i.item()] for i in sorted_idx[:topx]]

    # print("sorted: ", sorted_idx[:topx], '\n')
    top_xy_dict = {}

    for i in sorted_idx[:topx]:
        topy_cat=[]
        # sim_list = []
        confusion = []
        # top_xy_dict[i.item()] = {"cat_name":[], "count": [], "cats": []}
        topy_pred = torch.argsort(ov_res_norm[i], descending=True)[:topy]
        for cat_i in topy_pred:
            topy_cat.append(ov_idx2category[cat_i.item()])
            # sim_list.append(sim_mat[i][cat_i])
            confusion.append(ov_confusion[i][cat_i].item())
        
        # top_xy_dict[i.item()]["cats"] = topy_pred.tolist()
        # top_xy_dict[i.item()]["cat_name"] = topy_cat
        # top_xy_dict[i.item()]["count"] = confusion

        # topy_cat = [ov_idx2category[i.item()] for i in topy_pred]
        cat_name = ov_idx2category[cat_idx_from_cluster[i.item()]]
        print(cat_name, ": ", topy_cat)
        # print(ov_idx2category[i.item()], ": ", sim_list)
        print(cat_name, ": ", confusion, '\n')
        # print(ov_idx2category[i.item()], ": ", ov_res_norm[i][topy_pred])
    
    # print("top_xy_dict: ", top_xy_dict)
    # np.save("src/eval_data/top_xy_dict.npy", top_xy_dict)


In [26]:
cat_idx_from_cluster = cluster_dict[c_no]
# filter_confusion = confusion_mat[cat_idx_from_cluster]
# filter_confusion_norm = confusion_mat_norm[cat_idx_from_cluster]
top_confusion_for_filtered_cat(ov_idx2category, confusion_mat, confusion_mat_norm, cat_idx_from_cluster, dim=1, topx=12, topy=5)

shopping_bag :  ['tote_bag', 'shoulder_bag', 'basket', 'handbag', 'clutch_bag']
shopping_bag :  [22.0, 7.0, 4.0, 3.0, 2.0] 

satchel :  ['shoulder_bag', 'backpack', 'clutch_bag', 'pitchfork', 'plume']
satchel :  [12.0, 6.0, 4.0, 0.0, 0.0] 

plastic_bag :  ['pouch', 'shopping_bag', 'beanbag', 'poster', 'ice_pack']
plastic_bag :  [3.0, 2.0, 2.0, 1.0, 1.0] 

clutch_bag :  ['backpack', 'shoulder_bag', 'satchel', 'suitcase', 'handbag']
clutch_bag :  [4.0, 4.0, 2.0, 2.0, 2.0] 

grocery_bag :  ['shopping_bag', 'tote_bag', 'packet', 'cube', 'shoulder_bag']
grocery_bag :  [6.0, 5.0, 2.0, 1.0, 1.0] 

pouch :  ['backpack', 'pottery', 'clutch_bag', 'satchel', 'awning']
pouch :  [4.0, 3.0, 2.0, 1.0, 1.0] 

duffel_bag :  ['backpack', 'handbag', 'shoulder_bag', 'egg', 'tote_bag']
duffel_bag :  [10.0, 4.0, 4.0, 1.0, 1.0] 

handbag :  ['shoulder_bag', 'tote_bag', 'clutch_bag', 'duffel_bag', 'headscarf']
handbag :  [13.0, 11.0, 3.0, 1.0, 1.0] 

tote_bag :  ['shoulder_bag', 'cowbell', 'packet', 'bucket',

In [28]:
cluster_dict[c_no]
arr = [ov_idx2category[i] for i in cluster_dict[c_no]]
print(arr)

['briefcase', 'clutch_bag', 'duffel_bag', 'grocery_bag', 'handbag', 'plastic_bag', 'pouch', 'satchel', 'shopping_bag', 'shoulder_bag', 'suitcase', 'tote_bag']


In [16]:
print(ov_category2idx["briefcase"])
print(ov_category2idx["suitcase"])

143
986


In [17]:
confusion_mat[143][986]

tensor(8.)

In [18]:
def get_index(true_cat_name, pred_cat_name, ov_category2idx, labels_all, pred_all):
    true_cat = ov_category2idx[true_cat_name]
    pred_cat = ov_category2idx[pred_cat_name]
    idx_true = torch.argwhere(labels_all == true_cat).reshape(-1)
    idx_pred = torch.argwhere(pred_all[idx_true] == pred_cat).reshape(-1)
    return idx_true[idx_pred]

In [19]:
get_index("briefcase", "suitcase", ov_category2idx, ov_labels_all, ov_pred_all)

tensor([5911, 5914, 5915, 5921, 5923, 5924, 5933, 5939])

In [20]:
get_index("suitcase", "briefcase", ov_category2idx, ov_labels_all, ov_pred_all)

tensor([39400, 39404, 39410, 39426, 39428, 39432, 39435, 39436])

In [49]:
for idx, val in enumerate(confusion_mat[986]):
    if val > 0:
        print(f"idx: {idx}, val: {val}")
        # print(val.item())


idx: 42, val: 1.0
idx: 143, val: 8.0
idx: 189, val: 2.0
idx: 230, val: 2.0
idx: 311, val: 1.0
idx: 367, val: 1.0
idx: 503, val: 1.0
idx: 609, val: 1.0
idx: 867, val: 1.0
idx: 906, val: 2.0
idx: 986, val: 24.0
idx: 1077, val: 1.0


In [28]:
filter_confusion = confusion_mat[cluster_dict[c_no]]
filter_confusion_norm = confusion_mat_norm[cluster_dict[c_no]]
top_confusion(ov_idx2category, filter_confusion, filter_confusion_norm, dim=1, topx=40, topy=5)

Sharpie :  ['tote_bag', 'shoulder_bag', 'basket', 'handbag', 'clutch_bag']
Sharpie :  [22.0, 7.0, 4.0, 4.0, 2.0] 

Rollerblade :  ['shoulder_bag', 'backpack', 'clutch_bag', 'handbag', 'ski_boot']
Rollerblade :  [11.0, 4.0, 4.0, 2.0, 1.0] 

Ferris_wheel :  ['pouch', 'shopping_bag', 'beanbag', 'chime', 'backpack']
Ferris_wheel :  [3.0, 2.0, 2.0, 1.0, 1.0] 

Bible :  ['backpack', 'shoulder_bag', 'satchel', 'suitcase', 'handbag']
Bible :  [4.0, 4.0, 2.0, 2.0, 2.0] 

Lego :  ['backpack', 'pottery', 'clutch_bag', 'vase', 'wall_socket']
Lego :  [4.0, 3.0, 2.0, 2.0, 1.0] 

Christmas_tree :  ['shopping_bag', 'tote_bag', 'packet', 'shoulder_bag', 'cube']
Christmas_tree :  [7.0, 4.0, 3.0, 2.0, 1.0] 

CD_player :  ['backpack', 'shoulder_bag', 'handbag', 'beanbag', 'tote_bag']
CD_player :  [9.0, 4.0, 3.0, 1.0, 1.0] 

Dixie_cup :  ['shoulder_bag', 'tote_bag', 'clutch_bag', 'hatbox', 'headscarf']
Dixie_cup :  [15.0, 9.0, 4.0, 1.0, 1.0] 

air_conditioner :  ['shoulder_bag', 'packet', 'bucket', 'duffel

In [13]:
for k, v in cluster_dict.items():
    print(f"Cluster {k}: ")
    print([ov_idx2category[i] for i in v])

Cluster 0: 
['Band_Aid', 'bandage']
Cluster 1: 
['Bible', 'book', 'booklet', 'diary', 'hardback_book', 'notebook', 'notepad', 'paperback_book']
Cluster 2: 
['CD_player', 'phonograph_record', 'record_player']
Cluster 3: 
['Ferris_wheel', 'steering_wheel', 'wagon_wheel', 'wheel']
Cluster 4: 
['Rollerblade', 'ice_skate', 'roller_skate', 'skateboard', 'ski', 'ski_boot', 'ski_pole', 'snowboard', 'water_ski']
Cluster 5: 
['Tabasco_sauce', 'hot_sauce']
Cluster 6: 
['aerosol_can', 'barrel', 'beer_bottle', 'beer_can', 'bottle', 'can', 'canister', 'chinaware', 'cooking_utensil', 'cream_pitcher', 'cylinder', 'flowerpot', 'frying_pan', 'jar', 'keg', 'pan_(for_cooking)', 'pan_(metal_container)', 'pitcher_(vessel_for_liquid)', 'pot', 'pottery', 'saucepan', 'thermos_bottle', 'urn', 'vase', 'water_bottle', 'water_jug', 'watering_can', 'wine_bottle']
Cluster 7: 
['airplane', 'fighter_jet', 'helicopter', 'jet_plane', 'seaplane']
Cluster 8: 
['alarm_clock', 'clock', 'clock_tower', 'pocket_watch', 'timer'

In [3]:
obj_idx = ov_category2idx["motor_vehicle"]
val = dbscan_labels[obj_idx]
idx = np.argwhere(dbscan_labels == val)

obj_list = []
for i in idx:
    obj_list.append(ov_idx2category[i.item()])

print("obj_list: ", len(obj_list))
print("obj_list: ", obj_list)


obj_list:  6
obj_list:  ['car_(automobile)', 'convertible_(automobile)', 'generator', 'motor', 'motor_vehicle', 'race_car']


In [8]:
top_xy_dict = np.load("src/eval_data/top_xy_dict.npy", allow_pickle=True).item()
# def get_cat_count(top_xy_dict):
#     cat = top_xy_dict["cats"]
#     count = top_xy_dict["count"]


In [5]:
# top_xy_dict

In [9]:
def get_cluster(idx, dbscan_labels):
    val = dbscan_labels[idx]
    idx = np.argwhere(dbscan_labels == val)

    obj_list = []
    for i in idx:
        obj_list.append(ov_idx2category[i.item()])

    print("obj_list: ", len(obj_list))
    print("obj_list: ", obj_list)

In [7]:
top_40 = [1101,  142,  114,  169,  898,  426, 1025,  312,  438,  950,  758,  554,
         826, 1083,  118, 1037,  122,  612,  386,  902,  189,  568,  270,  903,
         464,  618,  278,  372,  724,  619,  658,  414,  667,  210,  229,  410,
         678,  483,  106, 1019]

In [10]:
cat_idx = 1019
print(ov_idx2category[cat_idx])

teakettle


In [11]:
get_cluster(cat_idx, dbscan_labels)

obj_list:  9
obj_list:  ['coffee_maker', 'coffeepot', 'cup', 'kettle', 'mug', 'teacup', 'teakettle', 'teapot', 'trophy_cup']


In [12]:
top_xy_dict[cat_idx]

{'cat_name': ['teapot',
  'kettle',
  'plume',
  'pocketknife',
  'plow_(farm_equipment)'],
 'count': [59.0, 2.0, 0.0, 0.0, 0.0],
 'cats': [1020, 562, 775, 777, 774]}

In [None]:
[464, 409, 861]
[372, 510]
[106, 548]
[1019, 1020, 562]

1019, 106, 372, 464

In [None]:
[464, 409, 861]
[372, 510]
[106, 548]
[1019, 1020, 562]

arr = [464, 409, 861, 372, 510, 106, 548, 1019, 1020, 562]

In [None]:
# print("dbscan: ", db_scan.labels_)
print("eps: ", eps)
print("dbscan: ", np.unique(db_scan.labels_))