In [None]:
import pandas as pd
import polars as pl
import numpy as np
import json
import os
from os.path import exists
import time
import sys
sys.path.insert(1, '../../framework')
from racetrack import *
rt = RACETrack()

In [None]:
_data_ = json.loads(open('../../../data/2024_vast/MC1/mc1.json').read())

In [None]:
_json_repr_ = rt.jsonRepr(_data_)
_df_, _relates_, _labels_ = _json_repr_.starPathGraphDataFrame()
g   = rt.createNetworkXGraph(_df_, _relates_)
pos = rt.hyperTreeLayout(g)
#rt.interactiveGraphLayout(_df_, ln_params={'relationships':_relates_, 'node_labels':_labels_, 'pos':pos}, w=800, h=600)

In [None]:
_lu_ = {'sort':[]}
for i in range(len(_data_['nodes'])):    
    as_list = list(_data_['nodes'][i].keys())
    as_list.sort()
    _lu_['sort'].append('-'.join(as_list))
rt.co_mgr.optimizeCategoricalColors(set(_lu_['sort']))
svgs = [rt.histogram(pd.DataFrame(_lu_), bin_by='sort', color_by='sort', w=256, h=64)._repr_svg_()]

_lu_ = {'links':[]}
for i in range(len(_data_['links'])):    
    as_list = list(_data_['links'][i].keys())
    as_list.sort()
    _lu_['links'].append('-'.join(as_list))
rt.co_mgr.optimizeCategoricalColors(set(_lu_['links']))
svgs.append(rt.histogram(pd.DataFrame(_lu_), bin_by='links', color_by='links', w=600, h=64)._repr_svg_())

rt.tile(svgs)

In [None]:
ofi = rt.ontologyFrameworkInstance()

#
# nodes
#
for i in range(len(_data_['nodes'])):
    _obj_ = _data_['nodes'][i]
    if   'dob'     in _obj_:
        _country_, _dob_, _id_, _type_ = _obj_['country'], _obj_['dob'], _obj_['id'], _obj_['type']
        _country_uid_ = ofi.resolveUniqIdAndUpdateLookups(_country_, 'obj:Country',  'uniq', 'obj')
        _dob_id_      = ofi.resolveUniqIdAndUpdateLookups(_dob_,     'xsd:date',     'date', 'obj')
        _id_uid_      = ofi.resolveUniqIdAndUpdateLookups(_id_,      _type_,         'uniq', 'sbj,obj')
        ofi.bufferTripleToAddLater(_id_uid_, 'Property.hasCitizenship', _country_uid_)
        ofi.bufferTripleToAddLater(_id_uid_, 'Property.hasBirthDate',   _dob_id_)
    elif 'country' in _obj_:
        _country_, _id_, _type_ = _obj_['country'], _obj_['id'], _obj_['type']
        _country_uid_ = ofi.resolveUniqIdAndUpdateLookups(_country_, 'obj:Country',  'uniq', 'obj')
        _id_uid_      = ofi.resolveUniqIdAndUpdateLookups(_id_,      _type_,         'uniq', 'sbj,obj')
        ofi.bufferTripleToAddLater(_id_uid_, 'Property.incorporatedIn', _country_uid_)
    else:
        _id_, _type_  = _obj_['id'], _obj_['type']
        _id_uid_      = ofi.resolveUniqIdAndUpdateLookups(_id_,      _type_,         'uniq', 'sbj,obj')

#
# links
#
for i in range(len(_data_['links'])):
    _obj_ = _data_['links'][i]
    if '_algorithm' in _obj_:
        _algorithm_, _article_id_, _date_added_ = _obj_['_algorithm'], _obj_['_articleid'], _obj_['_date_added']
        _last_edited_by_, _raw_source_, _key_   = _obj_['_last_edited_by'], _obj_['_raw_source'], _obj_['key']
        _source_, _target_, _type_              = _obj_['source'], _obj_['target'], _obj_['type']
        _algorithm_uid_      = ofi.resolveUniqIdAndUpdateLookups(_algorithm_,      'obj:Algorithm', 'uniq', 'obj')
        _article_id_uid_     = ofi.resolveUniqIdAndUpdateLookups(_article_id_,     'obj:Article',   'uniq', 'obj')
        _date_added_id_      = ofi.resolveUniqIdAndUpdateLookups(_date_added_,     'xsd:date',      'date', 'obj')
        _last_edited_by_uid_ = ofi.resolveUniqIdAndUpdateLookups(_last_edited_by_, 'Entity.Person', 'uniq', 'obj')
        _key_uid_            = ofi.resolveUniqIdAndUpdateLookups(_key_,            'xsd:integer',   'valu', 'obj')
        _source_uid_         = ofi.resolveUniqId(_source_)
        _target_uid_         = ofi.resolveUniqId(_target_)
        link_uid             = ofi.bufferTripleToAddLater(_source_uid_, _type_, _target_uid_)
        ofi.bufferTripleToAddLater(link_uid, 'Source.fromAlgorithm',   _algorithm_uid_)
        ofi.bufferTripleToAddLater(link_uid, 'Source.fromArticle',     _article_id_uid_)
        ofi.bufferTripleToAddLater(link_uid, 'Property.dateAdded',     _date_added_id_)
        ofi.bufferTripleToAddLater(link_uid, 'Property.lastEditedBy',  _last_edited_by_uid_)
        ofi.bufferTripleToAddLater(link_uid, 'Property.hasKey',        _key_uid_)
    else:
        _date_added_                 = _obj_['_date_added']
        _key_                        = _obj_['key']
        _source_, _target_, _type_   = _obj_['source'], _obj_['target'], _obj_['type']
        _date_added_id_  = ofi.resolveUniqIdAndUpdateLookups(_date_added_,  'xsd:date',      'date', 'obj')
        _key_uid_        = ofi.resolveUniqIdAndUpdateLookups(_key_,         'xsd:integer',   'valu', 'obj')
        _source_uid_     = ofi.resolveUniqId(_source_)
        _target_uid_     = ofi.resolveUniqId(_target_)
        link_uid         = ofi.bufferTripleToAddLater(_source_uid_, _type_, _target_uid_)
        ofi.bufferTripleToAddLater(link_uid, 'Property.dateAdded', _date_added_id_)
        ofi.bufferTripleToAddLater(link_uid, 'Property.hasKey',    _key_uid_)

ofi.appendBufferedTriplesAndClearBuffer()

In [None]:
rt.histogram(ofi.df_triples, bin_by='vrb', w=256, h=384)

In [None]:
#ofi.to_files('../../../data/2024_vast/MC1/2024_rt_ontology')
#rt.ontologyFrameworkInstance().fm_files('../../../data/2024_vast/MC1/2024_rt_ontology')