In [45]:
import json
from collections import OrderedDict, defaultdict
from operator import itemgetter

In [46]:
# Add BILOU and BIO representation.


In [47]:
bilou_or_bio = 'bio'
building_name = 'ebu3b'
#building_name = 'pouya'
token_type = 'justseparate'
with open('metadata/{0}_sentence_dict_{1}.json'.format(building_name, token_type), 'r') as fp:
    word_sentence_dict = json.load(fp)
with open('metadata/{0}_label_dict_{1}.json'.format(building_name, token_type), 'r') as fp:
    word_label_dict = json.load(fp)

In [48]:
# These part could be automated with Brick.
category_dict = {
    'equipment': ['vav', 'chilled_water_pump', 'hot_water_pump', 'ahu', 'supply_fan', 'return_fan', 'exhaust_fan'],
    'network_adapter': ['vnd', 'network_adapter'],
    'idenfitier': ['identifier'],
    'location': ['room', 'floor', 'building', 'server'],
    'none': ['none']
}

In [49]:
def get_key(d, v):
    for k, v_list in d.items():
        if v in v_list:
            return k
    return 'pointtype'

In [50]:
def word_label_to_bilou(word, label):
    char_label_list = list()
    char_category_list = list()
    if label=='none':
        for char in word:
            char_label_list.append((char, 'O'))
            char_category_list.append((char, 'O'))
    else:
        category = get_key(category_dict, label)
        if len(word)==1:
            char_label_list.append((word[0], 'U_'+label))
            char_category_list.append((word[0], 'U_'+category))
        else:
            char_label_list.append((word[0], 'B_' + label))
            char_category_list.append((word[0], 'B_' + category))
            for char in word[1:-1]:
                char_category_list.append((char, 'I_' + category))
                char_label_list.append((char, 'I_' + label))
            char_label_list.append((word[-1], 'L_' + label))
            char_category_list.append((word[-1], 'L_' + category))
    return char_label_list, char_category_list


def word_label_to_bio(word, label):
    char_label_list = list()
    char_category_list = list()
    if label=='none':
        for char in word:
            char_label_list.append((char, 'O'))
            char_category_list.append((char, 'O'))
    else:
        category = get_key(category_dict, label)
        char_label_list.append((word[0], 'B_' + label))
        char_category_list.append((word[0], 'B_' + category))
        if len(word) > 1:
            for char in word[1:]:
                char_category_list.append((char, 'I_' + category))
                char_label_list.append((char, 'I_' + label))
    return char_label_list, char_category_list


def word_label_to_tags(word, label):
    if bilou_or_bio == 'bilou':
        return word_label_to_bilou(word, label)
    elif bilou_or_bio == 'bio':
        return word_label_to_bio(word, label)
    else:
        #assert False 
        raise Exception('select between "bilou" or "bio"')

In [51]:
char_sentence_category_dict = dict()
char_sentence_label_dict = dict()
for srcid, word_labels in word_label_dict.items():
    char_sentence_category = list()
    char_sentence_label = list()
    word_sentence = word_sentence_dict[srcid]
    for word, label in zip(word_sentence, word_labels):
        '''
        if label=='none':
            for char in word:
                char_sentence_label.append((char, 'O'))
                char_sentence_category.append((char, 'O'))
        else:
            category = get_key(category_dict, label)
            if len(word)==1:
                char_sentence_label.append((word[0], 'U_'+label))
                char_sentence_category.append((word[0], 'U_'+category))
            else:
                char_sentence_label.append((word[0], 'B_' + label))
                char_sentence_category.append((word[0], 'B_' + category))
                for char in word[1:-1]:
                    char_sentence_category.append((char, 'I_' + category))
                    char_sentence_label.append((char, 'I_' + label))
                char_sentence_label.append((word[-1], 'L_' + label))
                char_sentence_category.append((word[-1], 'L_' + category))
        '''
        char_label_list, char_category_list = word_label_to_tags(word, label)
        char_sentence_label += char_label_list
        char_sentence_category += char_category_list
    char_sentence_category_dict[srcid] = char_sentence_category
    char_sentence_label_dict[srcid] = char_sentence_label

In [52]:
with open('metadata/brick_tags.json', 'r') as fp:
    tag_list = json.load(fp)
brick_tag_labels = list()
for tag in tag_list:
    char_label_list, _ = word_label_to_tags(tag, tag)
    brick_tag_labels.append(char_label_list)
with open('metadata/brick_tags_labels.json', 'w') as fp:
    json.dump(brick_tag_labels, fp, indent=2)

In [53]:
with open('metadata/{0}_char_category_dict.json'.format(building_name), 'w') as fp:
    json.dump(char_sentence_category_dict, fp, indent=2)
with open('metadata/{0}_char_label_dict.json'.format(building_name), 'w') as fp:
    json.dump(char_sentence_label_dict, fp, indent=2)