# Prepare a set of nouns for open-vocabulary inference

In [1]:
import json
import pickle
import numpy as np

In [2]:
import torch

In [3]:
from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.modeling.language_backbone.transformers import BERT

In [5]:
with open('../../data/OpenImages/class-descriptions-boxable.csv', 'r') as fin:
    class_list = [item.strip().split(',')[1] for item in fin]

In [8]:
class_list = [item.split(' (')[0] for item in class_list]

In [10]:
class_list = [item.lower() for item in class_list]

In [11]:
class_words = []
for item in class_list:
    class_words.extend(item.split())

In [12]:
class_words = set(class_words)

In [13]:
class_name_to_emb = {}
with open('../datasets/coco/zero-shot/glove.6B.300d.txt', 'r') as fin:
    for row in fin:
        row_tk = row.split()
        if row_tk[0] in class_words:
            class_name_to_emb[row_tk[0]] = [float(num) for num in row_tk[1:]]


In [15]:
len(class_name_to_emb), len(class_words)

(644, 644)

In [17]:
embeddings = []
for item in class_list:
    emb = np.zeros((300,), dtype=np.float32)
    for word in item.split():
        emb += class_name_to_emb[word]
    emb /= len(item.split())
    embeddings.append(emb)

In [18]:
embeddings = np.asarray(embeddings)

In [20]:
embeddings.shape, len(class_list)

((601, 300), 601)

In [24]:
with open('../datasets/coco/zero-shot/oi_nouns.pkl', 'wb') as fout:
    pickle.dump((class_list, embeddings), fout)

In [25]:
bert = BERT(cfg)

In [26]:
_ = bert.to('cuda')

In [27]:
encoded_class_list = bert(class_list)

In [28]:
mask = (1 - encoded_class_list['special_tokens_mask']).to(torch.float32)

In [29]:
bertembeddings = (encoded_class_list['input_embeddings'] * mask[:, :, None]).sum(1) / mask.sum(1)[:, None]

In [30]:
bertembeddings = bertembeddings.cpu().numpy()

In [31]:
bertembeddings.shape

(601, 768)

In [32]:
with open('../datasets/coco/zero-shot/oi_nouns_bertemb.pkl', 'wb') as fout:
    pickle.dump((class_list, bertembeddings), fout)