In [8]:
%load_ext autoreload
%autoreload 2

env = 'prod'
pk_project = 6857901
execute = True
metadata_str = 'notes-complement-definition'
import_manner = 'one-shot'

import os
import pandas as pd
import numpy as np
from datetime import datetime
import duckdb
import plotly.express as px

import geovpylib.analysis as a
import geovpylib.database as db
import geovpylib.graphs as graphs
import geovpylib.pks as pks
import geovpylib.recordlinkage as rl
import geovpylib.sparql as sparql
import geovpylib.utils as u

eta = u.Eta()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Add [notes] and [compléments] to the right definitions

### Fetch actor text properties from right ones

In [9]:
text_prop = u.read_df('../../data/bhp/actor_text_property.csv')
definitions = text_prop[['fk_actor', 'property_type', 'text', 'notes']].copy()

In [10]:
added = []

definitions['def'] = pd.NA
for i, row in definitions.iterrows():
    the_str = ''
    if row['property_type'] == 'complément': the_str += '[Complément] '
    if pd.notna(row['text']) and row['text'] != 'None': the_str += row['text']
    if pd.notna(row['notes']) and row['notes'] != 'None': the_str += ' [Notes] ' + row['notes']
    definitions.at[i, 'def'] = the_str

    if '[Notes]' in the_str or '[Complément]' in the_str: 
        added.append(i)

definitions = definitions[['fk_actor', 'text', 'def']]
definitions.columns = ['pk_bhp', 'previous_def', 'new_def']

definitions = definitions.loc[added]
definitions.reset_index(inplace=True, drop=True)

### Fetch actors from Geovistory, and their definitions

In [11]:
db.connect_geovistory(env, pk_project, execute, skip_protection=True)

persons = db.query(f"""
    select distinct
        r.pk_entity as pk_gv,
        a3.string as uri,
        a5.pk_entity as pk_appe,
        a5.string as definition
    from information.resource r
    inner join projects.info_proj_rel ipr on ipr.fk_entity = r.pk_entity and ipr.fk_project = {pk_project} and ipr.is_in_project = true
    -- URI
    inner join information.statement s1 on s1.fk_subject_info = r.pk_entity and s1.fk_property = {pks.properties.entity_sameAsURI_URI}
    inner join projects.info_proj_rel ipr1 on ipr1.fk_entity = s1.pk_entity and ipr1.fk_project = {pk_project} and ipr1.is_in_project = true
    inner join information.statement s2 on s2.fk_subject_info = s1.fk_object_info and s2.fk_property = {pks.properties.appe_hasValue_string}
    inner join projects.info_proj_rel ipr2 on ipr2.fk_entity = s2.pk_entity and ipr2.fk_project = {pk_project} and ipr2.is_in_project = true
    inner join information.appellation a3 on a3.pk_entity = s2.fk_object_info
    inner join projects.info_proj_rel ipr3 on ipr3.fk_entity = a3.pk_entity and ipr3.fk_project = {pk_project} and ipr3.is_in_project = true
    -- Definition
    inner join information.statement s4 on s4.fk_subject_info = r.pk_entity and s4.fk_property = {pks.properties.entity_hasDefinition_text}
    inner join projects.info_proj_rel ipr4 on ipr4.fk_entity = s4.pk_entity and ipr4.fk_project = {pk_project} and ipr4.is_in_project = true
    inner join information.statement s5 on s5.fk_subject_info = s4.fk_object_info and s5.fk_property = {pks.properties.text_hasValueVersion_string}
    inner join information.appellation a5 on a5.pk_entity = s5.fk_object_info
    where r.fk_class = {pks.classes.person}
""")
persons = persons[persons.uri.str.contains('symogih.org')]
persons['pk_bhp'] = persons.uri.str.replace('http://symogih.org/resource/Actr', '', regex=False).astype(int)
persons.drop(columns=['uri'], inplace=True)

persons.sort_values('pk_bhp', inplace=True)
persons.drop_duplicates(inplace=True)
persons.reset_index(inplace=True, drop=True)
# persons = persons[['pk_bhp', 'pk_gv', 'pk_birth',  'pk_death']].drop_duplicates()

db.disconnect()

# 18s

[DB] Connecting to PRODUCTION Database ... Connected!
[DB] Database correctly disconnected.


### Put everything together

In [12]:
themerge = definitions.merge(persons, how='left')
themerge = themerge[themerge['previous_def'] == themerge['definition']]
themerge['pk_appe'] = themerge['pk_appe'].astype(pd.Int64Dtype())
themerge['pk_gv'] = themerge['pk_gv'].astype(pd.Int64Dtype())

themerge = themerge[themerge['new_def'] != themerge['definition']]
themerge.shape

(11711, 6)

### Update appellations

In [13]:
db.connect_geovistory(env, pk_project, execute)

sql = ''
eta.begin(len(themerge), 'Updating appellations')
for i, row in themerge.iterrows():

    if row['previous_def'] == ['definition']: continue
    else: 
        sql += f"""
            update information.appellation
                set string = '{row['new_def'].replace("'", "''").replace('%', '%%')}'
            where pk_entity = {row['pk_appe']};
        """
    
    if i % 100 == 0:
        db.execute(sql)
        sql = ''

    # db.execute(sql)
    # sql = ''
    
    eta.iter()
eta.end()

db.execute(sql)

[DB] Connecting to PRODUCTION Database ... Connected!
Updating appellations is done - Elapsed: [00h03'18]                   


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f2cbfaf3e50>

In [14]:
themerge

Unnamed: 0,pk_bhp,previous_def,new_def,pk_gv,pk_appe,definition
44701,56865,Les bases fondamentales du droit civil en face...,[Complément] Les bases fondamentales du droit ...,6532801,7267597,Les bases fondamentales du droit civil en face...
44703,56835,"Annales de l'Université de Grenoble, Bulletin ...",[Complément] Annales de l'Université de Grenob...,6532800,7267582,"Annales de l'Université de Grenoble, Bulletin ..."
44722,56872,"""La révision du régime hypothécaire établi par...","[Complément] ""La révision du régime hypothécai...",6503488,7236946,"""La révision du régime hypothécaire établi par..."
44746,56847,Delpech n'a publié aucun article; on trouve de...,[Complément] Delpech n'a publié aucun article;...,6506651,7242948,Delpech n'a publié aucun article; on trouve de...
44760,56874,Revue générale de droit international public; ...,[Complément] Revue générale de droit internati...,6506294,7242198,Revue générale de droit international public; ...
...,...,...,...,...,...,...
110650,2380,Enseigne la discipline de Scotisme auprès de U...,Enseigne la discipline de Scotisme auprès de U...,6541824,7278115,Enseigne la discipline de Scotisme auprès de U...
110655,2380,Enseigne la discipline de Logique auprès de Un...,Enseigne la discipline de Logique auprès de Un...,6541824,7278116,Enseigne la discipline de Logique auprès de Un...
110656,1659,Enseigne la discipline de Théologie auprès de ...,Enseigne la discipline de Théologie auprès de ...,6536283,7272634,Enseigne la discipline de Théologie auprès de ...
110657,2503,Enseigne la discipline de Théologie dogmatique...,Enseigne la discipline de Théologie dogmatique...,6541834,7278126,Enseigne la discipline de Théologie dogmatique...
