In [385]:
import torch
import torch.nn.functional as F
import json, copy
import h5py
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image, ImageDraw

In [386]:
from scipy.spatial import distance_matrix

In [387]:
from tqdm import tqdm

In [388]:
project_dir = '/home/miskai/デスクトップ/related-work/scene-graph-benchmark'
image_file = json.load(open(f'{project_dir}/datasets/vg/image_data.json'))
vocab_file = json.load(open(f'{project_dir}/datasets/vg/VG-SGG-dicts-with-attri.json'))
data_file = h5py.File(f'{project_dir}/datasets/vg/VG-SGG-with-attri.h5', 'r')
# remove invalid image
corrupted_ims = [1592, 1722, 4616, 4617]
tmp = []
for item in image_file:
    if int(item['image_id']) not in corrupted_ims:
        tmp.append(item)
image_file = tmp

In [389]:
idx2pred = vocab_file["idx_to_predicate"]
pred2idx = vocab_file["predicate_to_idx"]
pred_cnt = vocab_file["predicate_count"]

## Feature load

In [390]:
vis_info_path = "/home/miskai/デスクトップ/related-work/scene-graph-benchmark/SGG-TD2/datasets/vg/vis_concat.npy"
# vis_info_path = "/home/miskai/デスクトップ/related-work/scene-graph-benchmark/SGG-TD2/checkpoints/motifs-predcls-exmp/vis_record<20653/vis.npy"
# vis_info_path2 = "/home/miskai/デスクトップ/related-work/scene-graph-benchmark/SGG-TD2/checkpoints/motifs-predcls-exmp/vis_record>20653/vis.npy"

In [391]:
vis_info = np.load(vis_info_path, allow_pickle=True).item()
# vis_info2 = np.load(vis_info_path2, allow_pickle=True).item()

In [366]:
combined_vis_info = copy.deepcopy(vis_info)
combined_vis_info["feat_sum"] = (combined_vis_info["feat_sum"]+vis_info2["feat_sum"])[1:]
combined_vis_info["feat_sq"] = (combined_vis_info["feat_sq"]+vis_info2["feat_sq"])[1:]
combined_vis_info["freq_sum"] = (combined_vis_info["freq_sum"]+vis_info2["freq_sum"])[1:]
combined_vis_info["freq_sq"] = (combined_vis_info["freq_sq"]+vis_info2["freq_sq"])[1:]
combined_vis_info["cnt"] = (combined_vis_info["cnt"]+vis_info2["cnt"])[1:]

In [367]:
combined_vis_info["feat_ave"] = copy.deepcopy(combined_vis_info["feat_sum"])
combined_vis_info["freq_ave"] = copy.deepcopy(combined_vis_info["freq_sum"])
for i in range(50):
    combined_vis_info["feat_ave"][i] /= combined_vis_info["cnt"][i]
    combined_vis_info["freq_ave"][i] /= combined_vis_info["cnt"][i]

In [368]:
combined_vis_info["fused_ave"] = np.concatenate([combined_vis_info["feat_ave"], combined_vis_info["freq_ave"]], axis=1)

In [407]:
dist_mat = distance_matrix(vis_info["avg_feature"][1:], vis_info["avg_feature"][1:])
# dist_mat = distance_matrix(combined_vis_info["fused_ave"], combined_vis_info["fused_ave"])
dist_mat

array([[ 0.        ,  5.90079227,  5.84091756, ..., 10.3112903 ,
         8.72827768,  6.6189399 ],
       [ 5.90079227,  0.        ,  4.75385566, ...,  9.29334955,
         6.92033695,  5.80215259],
       [ 5.84091756,  4.75385566,  0.        , ...,  9.86153053,
         7.86744407,  6.763253  ],
       ...,
       [10.3112903 ,  9.29334955,  9.86153053, ...,  0.        ,
         4.00661882,  8.09135098],
       [ 8.72827768,  6.92033695,  7.86744407, ...,  4.00661882,
         0.        ,  6.32496914],
       [ 6.6189399 ,  5.80215259,  6.763253  , ...,  8.09135098,
         6.32496914,  0.        ]])

## clustering

In [395]:
def calculate_var(group, dist_mat, verbose=False):
    """
    Args
        group: array which indicates which group classes belong to (#label)
        dist_mat: distance matrix (#label, #label)
    """
    # intra variance
    intra_var = 0
    for n in range(num_groups):
        intra_dist = dist_mat[group==n][:, group==n]
        intra_var += np.mean(intra_dist)
    # inter variance
    inter_var = 0
    # for n in range(num_groups):
    #     for m in range(n+1, num_groups):
    #         inter_dist = dist_mat[group==n][:,group==m]
    #         inter_var += np.min(inter_dist)
    num_pair = 0
    for n in range(num_groups):
        for m in range(n+1, num_groups):
            mean1 = np.mean(combined_vis_info["fused_ave"][group==n], axis=0)
            mean2 = np.mean(combined_vis_info["fused_ave"][group==m], axis=0)
            inter_var += np.sqrt(((mean1-mean2)**2).sum())
            num_pair += 1
    var = 10*intra_var/num_groups+inter_var/num_pair
    if verbose:
        print(intra_var/num_groups, inter_var/num_pair)
    return var

In [396]:
def pick_label(i, group, dist_mat, prior=0):
    """
    Args
        i: group number
        group: array which indicates which group classes belong to (#label)
        dist_mat: distance matrix(#label, #label)
    """
    num = len(labels[group==i])
    if num<=prior:
        return -1
    intra_dist = dist_mat[group==i][:,group==i]
    independence = np.zeros(num)
    for n in range(num):
        independence[n] = intra_dist[n].sum()
    return np.argsort(independence)[::-1][prior]

In [397]:
def update_group(group, dist_mat, max_trial=10):
    """
    Args
        group: array which indicates which group classes belong to (#label)
        dist_mat: distance matrix (#label, #label)
        max_trial: the number of trial until the var increases
    """
    trial = 0
    while trial<max_trial:
        var = calculate_var(group, dist_mat)
        new_group = copy.deepcopy(group)
        updated = False
        priorities = np.random.randint(low=0, high=trial+1, size=num_groups)
        for n in range(num_groups):
            labels = np.where(group==n)[0]
            drop_label = labels[pick_label(n, group, dist_mat, prior=priorities[n])]
            if drop_label != -1:
                updated = True
                distination = np.arange(num_groups)
                distination = np.delete(distination, n)
                new_group[drop_label] = np.random.choice(distination)
        new_var = calculate_var(new_group, dist_mat)
        if new_var>var or (not updated):
            return new_group
        trial += 1
    return group

In [408]:
num_labels = 50
num_groups = 2

In [409]:
labels = np.array([idx2pred[str(i)] for i in range(1, num_labels+1)], dtype=object)
labels

array(['above', 'across', 'against', 'along', 'and', 'at', 'attached to',
       'behind', 'belonging to', 'between', 'carrying', 'covered in',
       'covering', 'eating', 'flying in', 'for', 'from', 'growing on',
       'hanging from', 'has', 'holding', 'in', 'in front of', 'laying on',
       'looking at', 'lying on', 'made of', 'mounted on', 'near', 'of',
       'on', 'on back of', 'over', 'painted on', 'parked on', 'part of',
       'playing', 'riding', 'says', 'sitting on', 'standing on', 'to',
       'under', 'using', 'walking in', 'walking on', 'watching',
       'wearing', 'wears', 'with'], dtype=object)

In [428]:
ini_group = np.random.uniform(low=0, high=num_groups, size=num_labels)
ini_group = np.array([math.floor(num) for num in ini_group])
ini_group

array([1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0])

In [429]:
for n in range(num_groups):
    print(labels[ini_group==n])

['across' 'against' 'and' 'at' 'attached to' 'belonging to' 'carrying'
 'covering' 'flying in' 'has' 'holding' 'in' 'lying on' 'made of'
 'mounted on' 'on back of' 'over' 'painted on' 'part of' 'riding'
 'standing on' 'to' 'wearing' 'with']
['above' 'along' 'behind' 'between' 'covered in' 'eating' 'for' 'from'
 'growing on' 'hanging from' 'in front of' 'laying on' 'looking at' 'near'
 'of' 'on' 'parked on' 'playing' 'says' 'sitting on' 'under' 'using'
 'walking in' 'walking on' 'watching' 'wears']


In [411]:
calculate_var(ini_group, dist_mat, verbose=True)

7.2104178046577765 0.027772776322311968


72.13195082290008

In [412]:
update_group(ini_group, dist_mat)

array([0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0])

In [438]:
best_group = -np.ones(num_labels)
biggest_var = 0
num_trial = 1
for i in range(num_trial):
    print(f"Trial {i}")
    # ini_group = np.random.randint(low=0, high=num_groups, size=num_labels) 
    ini_group = np.random.uniform(low=0, high=num_groups, size=num_labels)
    ini_group = np.array([math.floor(num) for num in ini_group])
    group = ini_group
    new_group = update_group(group, dist_mat, max_trial=15)
    epoch = 0
    while not (new_group == group).all():
        epoch += 1
        print(f"Epoch {epoch}")
        for n in range(num_groups):
            print(labels[new_group==n])
        group = new_group
        new_group = update_group(group, dist_mat, max_trial=15)
    var = calculate_var(group, dist_mat)
    if biggest_var < var:
        biggest_var = var
        best_group = group

Trial 0
Epoch 1
['above' 'across' 'against' 'and' 'at' 'attached to' 'behind'
 'belonging to' 'covering' 'holding' 'looking at' 'lying on' 'made of'
 'mounted on' 'near' 'of' 'on' 'on back of' 'over' 'painted on' 'part of'
 'playing' 'says' 'sitting on' 'standing on' 'to' 'walking on' 'wearing'
 'wears']
['along' 'between' 'carrying' 'covered in' 'eating' 'flying in' 'for'
 'from' 'growing on' 'hanging from' 'has' 'in' 'in front of' 'laying on'
 'parked on' 'riding' 'under' 'using' 'walking in' 'watching' 'with']
Epoch 2
['above' 'across' 'against' 'and' 'at' 'attached to' 'behind'
 'belonging to' 'covering' 'holding' 'looking at' 'lying on' 'made of'
 'mounted on' 'near' 'of' 'on' 'on back of' 'over' 'painted on' 'part of'
 'playing' 'says' 'sitting on' 'standing on' 'to' 'walking in' 'wearing'
 'wears']
['along' 'between' 'carrying' 'covered in' 'eating' 'flying in' 'for'
 'from' 'growing on' 'hanging from' 'has' 'in' 'in front of' 'laying on'
 'parked on' 'riding' 'under' 'using' 'w

In [439]:
for n in range(num_groups):
    print(labels[best_group==n])
print(biggest_var)

['above' 'across' 'against' 'and' 'at' 'attached to' 'behind'
 'belonging to' 'carrying' 'covering' 'holding' 'in' 'lying on' 'made of'
 'mounted on' 'near' 'of' 'on' 'over' 'painted on' 'parked on' 'part of'
 'playing' 'sitting on' 'standing on' 'to' 'under' 'using' 'wears']
['along' 'between' 'covered in' 'eating' 'flying in' 'for' 'from'
 'growing on' 'hanging from' 'has' 'in front of' 'laying on' 'looking at'
 'on back of' 'riding' 'says' 'walking in' 'walking on' 'watching'
 'wearing' 'with']
73.31523377152595


In [358]:
group = "'above' 'against' 'along' 'and' 'attached to' 'behind' 'belonging to' 'between' 'carrying' 'covered in' 'eating' 'for' 'hanging from' 'holding' 'in' 'in front of' 'looking at' 'made of' 'near' 'of' 'on' 'over' 'painted on' 'part of' 'playing' 'riding' 'says' 'sitting on' 'standing on' 'to' 'under' 'walking in' 'walking on' 'with'"
",".join(group.split())

"'above','against','along','and','attached,to','behind','belonging,to','between','carrying','covered,in','eating','for','hanging,from','holding','in','in,front,of','looking,at','made,of','near','of','on','over','painted,on','part,of','playing','riding','says','sitting,on','standing,on','to','under','walking,in','walking,on','with'"

In [422]:
calculate_var(best_group, dist_mat, verbose=True)

7.409734520132674 0.02801711922959411


74.12536232055633

## Calulation of variance from json groups

In [415]:
num_labels = 50

In [444]:
group_json = open("/home/miskai/デスクトップ/related-work/scene-graph-benchmark/SGG-TD2/datasets/vg/cat-base-2cluster.json")
group_json = json.load(group_json)

In [445]:
groups = -np.ones(num_labels+1, dtype=int)
idx = 0
for par in group_json["children"]:
    if par == "root":
        continue
    else:
        for ch in group_json["children"][par]:
            groups[pred2idx[ch]] = idx
        idx += 1
groups = groups[1:]
num_groups = idx

In [446]:
groups

array([0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 1])

In [447]:
calculate_var(groups, dist_mat, verbose=True)

6.605348381742426 0.106501923168834


66.1599857405931