In [1]:
%load_ext autoreload
%autoreload 2

env = 'prod'
pk_project = 6857901
execute = True
metadata_str = 'add-comment-to-birth-and-death'
import_manner = 'one-shot' # 'batch'

import os
import pandas as pd
import numpy as np
from datetime import datetime
import duckdb
import plotly.express as px

import geovpylib.analysis as a
import geovpylib.database as db
import geovpylib.graphs as graphs
import geovpylib.pks as pks
import geovpylib.recordlinkage as rl
import geovpylib.sparql as sparql
import geovpylib.utils as u

eta = u.Eta()

# Add comment to birth and death

### Fetch information from BHP

In [2]:
db.connect_external(os.environ.get('YELLOW_BHP'))

bhp = db.query(f"""
    select 
        ir.fk_associated_object as pk_bhp,
        ir.fk_information as fk_info,
        id.complement as complement,
        id.notes as notes,
        id.date_hour_label
    from bhp.information_role ir
    inner join bhp.information_date id on ir.fk_information = id.fk_information
    where ir.fk_type_role = 40 or ir.fk_type_role = 45
""")

bhp = bhp.dropna(subset=['pk_bhp'])
bhp = bhp[bhp['pk_bhp'].str.contains('Actr')]
bhp = bhp[pd.notna(bhp['complement']) | pd.notna(bhp['notes']) | pd.notna(bhp['date_hour_label'])]
bhp = bhp[(bhp['complement'] != '') | (bhp['notes'] != '') | (bhp['notes'] != '')]
bhp['uri'] = ['http://symogih.org/resource/Info' + str(fk_info) for fk_info in bhp['fk_info']]
bhp.drop(columns=['pk_bhp'], inplace=True)

bhp['complement'] = bhp['complement'].str.replace('<p>', '', regex=False).str.replace('</p>', '', regex=False).str.strip()
bhp['notes'] = bhp['notes'].str.replace('<p>', '', regex=False).str.replace('</p>', '', regex=False).str.strip()

bhp['complement'] = bhp['complement'].fillna('')
bhp['notes'] = bhp['notes'].fillna('')
bhp['date_hour_label'] = bhp['date_hour_label'].fillna('')

a.infos(bhp)

db.disconnect()

[DB] Connecting to PGSQL Database ... Connected!
Shape:  (50, 5) - extract:


Unnamed: 0,fk_info,complement,notes,date_hour_label,uri
326,15059,Date à supprimer,,,http://symogih.org/resource/Info15059
479,15847,,,premier ventôse an quatre,http://symogih.org/resource/Info15847
498,16059,,,13 floréal an trois,http://symogih.org/resource/Info16059
524,17521,,,3 complémentaire an six,http://symogih.org/resource/Info17521
526,17523,,,30 frimaire an IX,http://symogih.org/resource/Info17523


[DB] Database correctly disconnected.


### Fetch Geovistory equivalent

In [3]:
db.connect_geovistory(env, pk_project, False, skip_protection=True)

uris = "('" + "','".join(bhp.uri.tolist()) + "')"

gv = db.query(f"""
    select
        s2.fk_subject_info as pk_entity,
        s1.fk_subject_info as pk_uri,
        a.string as uri
    from information.appellation a
    inner join information.statement s1 on s1.fk_object_info = a.pk_entity and s1.fk_property = {pks.properties.appe_hasValue_string}
    inner join information.statement s2 on s2.fk_object_info = s1.fk_subject_info and s2.fk_property = {pks.properties.entity_sameAsURI_URI}
    where a.string in {uris}      
""")

db.disconnect()

[DB] Connecting to PRODUCTION Database ... Connected!
[DB] Database correctly disconnected.


### Merge data

In [None]:
merged = bhp.merge(gv).drop(columns=['fk_info', 'uri', 'pk_uri'])

### Create data

In [None]:
db.connect_geovistory(env, pk_project, execute)

# Complements
complements = merged[merged['complement'] != ''].copy()
complements['pk_comment'] = db.resources.create(pks.classes.comment, len(complements))
complements['pk_appellation'] = db.appellations.create(complements['complement'])
db.statements.create(complements['pk_entity'], pks.properties.entity_hasComment_text, complements['pk_comment'])
db.statements.create(complements['pk_comment'], pks.properties.comment_hasCommentType_CommentType, 8065621)
db.statements.create(complements['pk_comment'], pks.properties.text_hasValueVersion_string, complements['pk_appellation'])

# Notes
notes = merged[merged['notes'] != ''].copy()
notes['pk_comment'] = db.resources.create(pks.classes.comment, len(notes))
notes['pk_appellation'] = db.appellations.create(notes['notes'])
db.statements.create(notes['pk_entity'], pks.properties.entity_hasComment_text, notes['pk_comment'])
db.statements.create(notes['pk_comment'], pks.properties.comment_hasCommentType_CommentType, 8065632)
db.statements.create(notes['pk_comment'], pks.properties.text_hasValueVersion_string, notes['pk_appellation'])

# date_hour_label
date_hour_labels = merged[merged['date_hour_label'] != ''].copy()
date_hour_labels['pk_comment'] = db.resources.create(pks.classes.comment, len(date_hour_labels))
date_hour_labels['pk_appellation'] = db.appellations.create(date_hour_labels['date_hour_label'])
db.statements.create(date_hour_labels['pk_entity'], pks.properties.entity_hasComment_text, date_hour_labels['pk_comment'])
db.statements.create(date_hour_labels['pk_comment'], pks.properties.comment_hasCommentType_CommentType, 8065621)
db.statements.create(date_hour_labels['pk_comment'], pks.properties.text_hasValueVersion_string, date_hour_labels['pk_appellation'])
