In [62]:
import itertools

import numpy as np
from scipy.io.matlab import loadmat

def load_dataset(dataset):
    if dataset == 'umls':
        mat = loadmat('../../almc/data/%s/uml.mat' % (dataset))
        T = np.array(mat['Rs'], np.float32)
        rnames = [name[0][0] for name in mat['relnames']]
        enames = [name[0][0] for name in mat['names']]
        gnames = [name[0] for name in mat['gnames'][0]]
    elif dataset == 'nation':
        mat = loadmat('../../almc/data/%s/dnations.mat' % (dataset))
        T = np.array(mat['R'], np.float32)
        rnames = [name[0] for name in mat['relnnames'][0]]
        enames = [name[0] for name in mat['countrynames'][0]]        
        gnames = enames
        
    T = np.swapaxes(T, 1, 2)
    T = np.swapaxes(T, 0, 1)  # [relation, entity, entity]
    T[np.isnan(T)] = 0
    
    return T, rnames, enames, gnames

In [63]:
dataset = 'umls'
T, relnames, entity_names, group_names = load_dataset(dataset)

In [64]:
with open('../%s/entities.txt' % dataset, 'w') as ef:
    for ei, entity in enumerate(entity_names):
        ef.write('%d\t%s\n' % (ei, entity.strip()))
with open('../%s/relations.txt' % dataset, 'w') as rf:
    for ri, relation in enumerate(relnames):
        rf.write('%d\t%s\n' % (ri, relation.strip()))

if entity_names is not group_names:
    with open('../%s/entity_category.txt' % dataset, 'w') as gf:
        for gi, group in enumerate(group_names):
            gf.write('%d\t%s\n' % (gi, group))
        
n_relation, n_entity, _ = T.shape

with open('../%s/triples.txt' % dataset, 'w') as tf:
    for ri, ei, ej in itertools.product(range(n_relation), range(n_entity), range(n_entity)):
        if T[ri, ei, ej] == 1:
            tf.write('%d\t%d\t%d\n' % (ri, ei, ej))