In [1]:
%load_ext autoreload
%autoreload 2

env = 'prod'
pk_project = 6857901
execute = False
metadata_str = ''
import_manner = 'one-shot' # 'batch'

import os
import pandas as pd
import numpy as np
from datetime import datetime
import duckdb
import plotly.express as px

import geovpylib.analysis as a
import geovpylib.database as db
import geovpylib.queries as q
import geovpylib.pks as pks
import geovpylib.sparql as sparql
import geovpylib.utils as u

eta = u.Eta()

# db.connect_external(os.getenv(''))
db.connect_geovistory(env, pk_project, execute)
db.set_metadata({'import-id': datetime.today().strftime('%Y%m%d') + '-' + metadata_str})
db.set_insert_manner(import_manner)

[DB] Environment not connected.


# Solve Favorite names for BHP entities

## Fetch data from BHP

In [2]:
actr_names = u.read_df('../../data/bhp/actor_name.csv')[['fk_actor', 'is_standard_name', 'concat_name']]
actr_names['key'] = actr_names['fk_actor'].astype(pd.StringDtype()) + '-' + actr_names['concat_name']
actr_names = actr_names[actr_names['is_standard_name']]
actr_std_names_set = set(actr_names['key'])

FileNotFoundError: [Errno 2] No such file or directory: '../../data/bhp/actor_name.csv'

In [None]:
coac_names = u.read_df('../../data/bhp/collective-actor-name.csv')[['fk_collective_actor', 'is_standard_name', 'name']]
coac_names['key'] = coac_names['fk_collective_actor'].astype(pd.StringDtype()) + '-' + coac_names['name']
coac_names = coac_names[coac_names['is_standard_name']]
coac_std_names_set = set(coac_names['key'])

### Fetch data from GV

In [None]:
persons = db.query(f"""
    select
        r1.pk_entity as pk_gv, r1.fk_class, a4.string as name, ipr2.pk_entity as pk_ipr, a7.string as uri
    from information.resource r1
    inner join projects.info_proj_rel ipr1 on ipr1.fk_entity = r1.pk_entity and ipr1.fk_project = {pk_project} and ipr1.is_in_project = true
    inner join information.statement s2 on s2.fk_object_info = r1.pk_entity and s2.fk_property = {pks.properties.aial_isAppelationForLanguageOf_entity}
    inner join projects.info_proj_rel ipr2 on ipr2.fk_entity = s2.pk_entity and ipr2.fk_project = {pk_project} and ipr2.is_in_project = true
    inner join information.statement s3 on s3.fk_subject_info = s2.fk_subject_info and s3.fk_property = {pks.properties.aial_refersToName_appellation}
    inner join projects.info_proj_rel ipr3 on ipr3.fk_entity = s3.pk_entity and ipr3.fk_project = {pk_project} and ipr3.is_in_project = true
    inner join information.appellation a4 on a4.pk_entity = s3.fk_object_info
    inner join information.statement s5 on s5.fk_subject_info = r1.pk_entity and s5.fk_property = {pks.properties.entity_sameAsURI_URI}
    inner join projects.info_proj_rel ipr5 on ipr5.fk_entity = s5.pk_entity and ipr5.fk_project = {pk_project} and ipr5.is_in_project = true
    inner join information.statement s6 on s6.fk_subject_info = s5.fk_object_info and s6.fk_property = {pks.properties.appe_hasValue_string}
    inner join projects.info_proj_rel ipr6 on ipr6.fk_entity = s6.pk_entity and ipr6.fk_project = {pk_project} and ipr6.is_in_project = true
    inner join information.appellation a7 on a7.pk_entity = s6.fk_object_info
    where r1.fk_class = {pks.classes.person}
""").sort_values('pk_gv')

persons = persons[persons['uri'].str.contains('http://symogih.org')]
persons['pk_bhp'] = [s.replace('http://symogih.org/resource/Actr', '') for s in persons['uri']]
persons['key'] = persons['pk_bhp'] + '-' + persons['name']

In [7]:
groups = db.query(f"""
    select
        r1.pk_entity as pk_gv, r1.fk_class, a4.string as name, ipr2.pk_entity as pk_ipr, a7.string as uri
    from information.resource r1
    inner join projects.info_proj_rel ipr1 on ipr1.fk_entity = r1.pk_entity and ipr1.fk_project = {pk_project} and ipr1.is_in_project = true
    inner join information.statement s2 on s2.fk_object_info = r1.pk_entity and s2.fk_property = {pks.properties.aial_isAppelationForLanguageOf_entity}
    inner join projects.info_proj_rel ipr2 on ipr2.fk_entity = s2.pk_entity and ipr2.fk_project = {pk_project} and ipr2.is_in_project = true
    inner join information.statement s3 on s3.fk_subject_info = s2.fk_subject_info and s3.fk_property = {pks.properties.aial_refersToName_appellation}
    inner join projects.info_proj_rel ipr3 on ipr3.fk_entity = s3.pk_entity and ipr3.fk_project = {pk_project} and ipr3.is_in_project = true
    inner join information.appellation a4 on a4.pk_entity = s3.fk_object_info
    inner join information.statement s5 on s5.fk_subject_info = r1.pk_entity and s5.fk_property = {pks.properties.entity_sameAsURI_URI}
    inner join projects.info_proj_rel ipr5 on ipr5.fk_entity = s5.pk_entity and ipr5.fk_project = {pk_project} and ipr5.is_in_project = true
    inner join information.statement s6 on s6.fk_subject_info = s5.fk_object_info and s6.fk_property = {pks.properties.appe_hasValue_string}
    inner join projects.info_proj_rel ipr6 on ipr6.fk_entity = s6.pk_entity and ipr6.fk_project = {pk_project} and ipr6.is_in_project = true
    inner join information.appellation a7 on a7.pk_entity = s6.fk_object_info
    where r1.fk_class = {pks.classes.group}
""").sort_values('pk_gv')

groups = groups[groups['uri'].str.contains('http://symogih.org')]
groups['pk_bhp'] = [s.replace('http://symogih.org/resource/CoAc', '') for s in groups['uri']]
groups['key'] = groups['pk_bhp'] + '-' + groups['name']


### Join data

In [10]:
std_names_actr = actr_names[actr_names['is_standard_name']]['key'].tolist()
persons['is_standard'] = [row['key'] in std_names_actr for _, row in persons.iterrows()]

# 30s

In [11]:
std_names_coac = coac_names[coac_names['is_standard_name']]['key'].tolist()
groups['is_standard'] = [row['key'] in std_names_coac for _, row in groups.iterrows()]

# 5s

### Clean IPR

In [16]:
ipr_list =  u.get_sql_ready_str(persons['pk_ipr'])

db.execute(f"""
    update projects.info_proj_rel
        set ord_num_of_domain = NULL
    where pk_entity in {ipr_list};
""")

# 1m17s

In [17]:
ipr_list =  u.get_sql_ready_str(groups['pk_ipr'])

db.execute(f"""
    update projects.info_proj_rel
        set ord_num_of_domain = NULL
    where pk_entity in {ipr_list};
""")

# 23s

### Create favorites

In [18]:
persons_std = persons[persons['is_standard']]
ipr_list =  u.get_sql_ready_str(persons_std['pk_ipr'])

db.execute(f"""
    update projects.info_proj_rel
        set ord_num_of_domain = 1
    where pk_entity in {ipr_list};
""")

# 1m8s

In [19]:
groups_std = groups[groups['is_standard']]
ipr_list =  u.get_sql_ready_str(groups_std['pk_ipr'])

db.execute(f"""
    update projects.info_proj_rel
        set ord_num_of_domain = 1
    where pk_entity in {ipr_list};
""")

# 24s

---