In [1]:
%load_ext autoreload
%autoreload 2

env = 'prod'
pk_project = 6857901
execute = True
metadata_str = 'comment-correction'
import_manner = 'one-shot'

import os
import pandas as pd
import numpy as np
from datetime import datetime
import duckdb
import plotly.express as px

import geovpylib.analysis as a
import geovpylib.database as db
import geovpylib.graphs as graphs
import geovpylib.pks as pks
import geovpylib.recordlinkage as rl
import geovpylib.sparql as sparql
import geovpylib.utils as u

eta = u.Eta()

# Comment correction

According to [GitHub issue comment](https://github.com/geovistory/symogih/issues/6#issuecomment-1600581989), there is a need to make a correction.

### Correct Properties

In [2]:
db.connect_geovistory(env, pk_project, execute, skip_protection=True)

wrong_statement = db.query(f"""
    select 
        s.pk_entity, s.fk_subject_info, fk_property, fk_object_info, r1.fk_class as subject_class, r2.fk_class as object_class
    from information.statement s
    inner join projects.info_proj_rel ipr on ipr.fk_entity = s.pk_entity and ipr.fk_project = {pk_project} and ipr.is_in_project = true
    inner join information.resource r1 on r1.pk_entity = s.fk_subject_info 
    inner join information.resource r2 on r2.pk_entity = s.fk_object_info 
    where s.fk_property = {pks.properties.text_hasTextType_textType} and r1.fk_class = {pks.classes.text}
""")['pk_entity'].tolist()

db.disconnect()

[DB] Connecting to PRODUCTION Database ... Connected!
[DB] Database correctly disconnected.


In [4]:
db.connect_geovistory(env, pk_project, execute)

values = '(' + ','.join([str(e) for e in wrong_statement]) + ')'

db.execute(f"""
    update information.statement
           set fk_property = {pks.properties.comment_hasCommentType_CommentType}
    where pk_entity in {values};
""")

db.disconnect()

[DB] Connecting to PRODUCTION Database ... Connected!
[DB] Database correctly disconnected.


### Correct classes

In [5]:
db.connect_geovistory(env, pk_project, execute, skip_protection=True)

wrong_instances = db.query(f"""
    select
        s.pk_entity, s.fk_subject_info, fk_property, fk_object_info, r1.fk_class as subject_class, r2.fk_class as object_class
    from information.statement s
    inner join projects.info_proj_rel ipr on ipr.fk_entity = s.pk_entity and ipr.fk_project = 6857901 and ipr.is_in_project = true
    inner join information.resource r1 on r1.pk_entity = s.fk_subject_info 
    inner join information.resource r2 on r2.pk_entity = s.fk_object_info 
    where s.fk_property = 1763 and r2.fk_class = 785
""")['fk_object_info'].tolist()

db.disconnect()

[DB] Connecting to PRODUCTION Database ... Connected!
[DB] Database correctly disconnected.


In [6]:
db.connect_geovistory(env, pk_project, execute)

values = '(' + ','.join([str(e) for e in wrong_instances]) + ')'

db.execute(f"""
    update information.resource
           set fk_class = {pks.classes.comment}
    where pk_entity in {values}; 
""")

db.disconnect()

[DB] Connecting to PRODUCTION Database ... Connected!
[DB] Database correctly disconnected.
