In [1]:
# General lib
import pandas as pd

# geovdata package
import geovdata.sparql as sparql
import geovdata.kit as kit
from geovdata.ipython import infos

# geovpylib package
import geovpylib.database as db
import geovpylib.pks as pks


env = 'prod'
pk_project = pks.projects.switzerland_and_beyond
execute = True

## Data from IDREF

In [2]:
data = pd.read_csv('./export_idref_dhs.csv', sep='\t')

data['DHS_'] = [uri[:-1] if uri[-1] == '/' else uri for uri in data['DHS']]

infos(data)

Shape:  (6996, 3) - extract:


Unnamed: 0,IDREF,DHS,DHS_
0,http://www.idref.fr/256737665/id,http://hls-dhs-dss.ch/articles/028802/,http://hls-dhs-dss.ch/articles/028802
1,http://www.idref.fr/200153501/id,http://hls-dhs-dss.ch/articles/045091/,http://hls-dhs-dss.ch/articles/045091
2,http://www.idref.fr/237172658/id,http://hls-dhs-dss.ch/articles/046283/,http://hls-dhs-dss.ch/articles/046283
3,http://www.idref.fr/234967145/id,http://hls-dhs-dss.ch/articles/028346/,http://hls-dhs-dss.ch/articles/028346
4,http://www.idref.fr/192092146/id,http://hls-dhs-dss.ch/articles/007322/,http://hls-dhs-dss.ch/articles/007322


## Data from Geovistory

In [3]:
sparql.connect_geovistory(pk_project)

table = sparql.query("""
    select *
    where {
        ?entity ontome:p1842/ontome:p1843 ?uri
    }
""")


table['pk_entity'] = [int(uri.replace('http://geovistory.org/resource/i', '')) for uri in table['entity']]
table['uri'] = table['uri'].str.replace('https://', 'http://')

infos(table)

>> SPARQL endpoint of Geovistory project 153 set.
Shape:  (23980, 3) - extract:


Unnamed: 0,entity,uri,pk_entity
0,http://geovistory.org/resource/i10315216,http://hls-dhs-dss.ch/articles/028377,10315216
1,http://geovistory.org/resource/i10315217,http://hls-dhs-dss.ch/articles/044295,10315217
2,http://geovistory.org/resource/i10315218,http://hls-dhs-dss.ch/articles/028424,10315218
3,http://geovistory.org/resource/i10315219,http://hls-dhs-dss.ch/articles/011946,10315219
4,http://geovistory.org/resource/i10315220,http://hls-dhs-dss.ch/articles/012028,10315220


## Merging idref and geovistory

In [4]:
data = data.merge(table, left_on='DHS_', right_on='uri', how='left')
data['pk_entity'] = data['pk_entity'].astype(pd.Int64Dtype())

infos(data)

Shape:  (7012, 6) - extract:


Unnamed: 0,IDREF,DHS,DHS_,entity,uri,pk_entity
0,http://www.idref.fr/256737665/id,http://hls-dhs-dss.ch/articles/028802/,http://hls-dhs-dss.ch/articles/028802,http://geovistory.org/resource/i10323059,http://hls-dhs-dss.ch/articles/028802,10323059
1,http://www.idref.fr/200153501/id,http://hls-dhs-dss.ch/articles/045091/,http://hls-dhs-dss.ch/articles/045091,http://geovistory.org/resource/i10316225,http://hls-dhs-dss.ch/articles/045091,10316225
2,http://www.idref.fr/237172658/id,http://hls-dhs-dss.ch/articles/046283/,http://hls-dhs-dss.ch/articles/046283,http://geovistory.org/resource/i10330969,http://hls-dhs-dss.ch/articles/046283,10330969
3,http://www.idref.fr/234967145/id,http://hls-dhs-dss.ch/articles/028346/,http://hls-dhs-dss.ch/articles/028346,http://geovistory.org/resource/i10329182,http://hls-dhs-dss.ch/articles/028346,10329182
4,http://www.idref.fr/192092146/id,http://hls-dhs-dss.ch/articles/007322/,http://hls-dhs-dss.ch/articles/007322,http://geovistory.org/resource/i10319423,http://hls-dhs-dss.ch/articles/007322,10319423


## Add infos into Geovistory

In [5]:
db.connect_geovistory(env, pk_project, execute)

[DB] Connecting to PRODUCTION Database ... Connected!


In [6]:
selection = data[pd.notna(data['pk_entity'])].drop_duplicates(subset=['IDREF', 'pk_entity'])

db.shortcuts.add_uris(selection['pk_entity'], selection['IDREF'])

[DB] Creating 6921 resources of class [967] ... Done in 0000y00m00d-00h00m02s
[DB] Creating info_proj_rel of 6921 entities with project <153> ... Done in 0000y00m00d-00h00m01s
[DB] Creating 6921 appellations ... Done in 0000y00m00d-00h00m03s
[DB] Creating 6921 statements ... Updating metadata ... Done in 0000y00m00d-00h00m04s
[DB] Creating info_proj_rel of 6921 entities with project <153> ... Done in 0000y00m00d-00h00m02s
[DB] Creating 6921 statements ... Updating metadata ... Done in 0000y00m00d-00h00m03s
[DB] Creating info_proj_rel of 6921 entities with project <153> ... Done in 0000y00m00d-00h00m02s


## Export table for IDREF

In [7]:
to_export = data[['IDREF', 'DHS', 'entity']].drop_duplicates().copy()
to_export.rename(inplace=True, columns={'entity':'uri_geovistory'})

to_export.to_csv('./export_idref_dhs_with_geovistory.csv', sep='\t', index=False)

In [8]:
to_export[to_export.duplicated(subset=['IDREF'], keep=False)].sort_values('IDREF')

Unnamed: 0,IDREF,DHS,uri_geovistory
4576,http://www.idref.fr/027161625/id,http://hls-dhs-dss.ch/articles/018734/,http://geovistory.org/resource/i2188083
4577,http://www.idref.fr/027161625/id,http://hls-dhs-dss.ch/articles/018734/,http://geovistory.org/resource/i2188346
6723,http://www.idref.fr/027894134/id,http://hls-dhs-dss.ch/articles/014666/,http://geovistory.org/resource/i27061
6724,http://www.idref.fr/027894134/id,http://hls-dhs-dss.ch/articles/014666/,http://geovistory.org/resource/i786979
6204,http://www.idref.fr/076991954/id,http://hls-dhs-dss.ch/articles/014707/,http://geovistory.org/resource/i25941
6205,http://www.idref.fr/076991954/id,http://hls-dhs-dss.ch/articles/014707/,http://geovistory.org/resource/i6516379
1155,http://www.idref.fr/081090994/id,http://hls-dhs-dss.ch/articles/031239/,http://geovistory.org/resource/i26222
1156,http://www.idref.fr/081090994/id,http://hls-dhs-dss.ch/articles/031239/,http://geovistory.org/resource/i6508101


In [9]:
to_export[to_export.duplicated(subset=['uri_geovistory'], keep=False)].sort_values('uri_geovistory').dropna()

Unnamed: 0,IDREF,DHS,uri_geovistory
69,http://www.idref.fr/027204936/id,http://hls-dhs-dss.ch/articles/048826/,http://geovistory.org/resource/i26164
102,http://www.idref.fr/033556091/id,http://hls-dhs-dss.ch/articles/044657/,http://geovistory.org/resource/i26164
1023,http://www.idref.fr/148830145/id,http://hls-dhs-dss.ch/articles/031350/,http://geovistory.org/resource/i26164
2018,http://www.idref.fr/033566836/id,http://hls-dhs-dss.ch/articles/044400/,http://geovistory.org/resource/i26164
2475,http://www.idref.fr/070506744/id,http://hls-dhs-dss.ch/articles/015224/,http://geovistory.org/resource/i26164
2759,http://www.idref.fr/084370599/id,http://hls-dhs-dss.ch/articles/022484/,http://geovistory.org/resource/i26164
3239,http://www.idref.fr/150814011/id,http://hls-dhs-dss.ch/articles/028514/,http://geovistory.org/resource/i26164
3250,http://www.idref.fr/028864697/id,http://hls-dhs-dss.ch/articles/043138/,http://geovistory.org/resource/i26164
3264,http://www.idref.fr/073318469/id,http://hls-dhs-dss.ch/articles/027029/,http://geovistory.org/resource/i26164
4052,http://www.idref.fr/026904667/id,http://hls-dhs-dss.ch/articles/042218/,http://geovistory.org/resource/i26164


In [11]:
to_export[pd.isna(to_export['uri_geovistory'])]

Unnamed: 0,IDREF,DHS,uri_geovistory
78,http://www.idref.fr/061092118/id,http://hls-dhs-dss.ch/articles/049051/,
93,http://www.idref.fr/029499283/id,http://hls-dhs-dss.ch/articles/043510/,
335,http://www.idref.fr/243940130/id,http://hls-dhs-dss.ch/articles/044663/,
488,http://www.idref.fr/135783429/id,http://hls-dhs-dss.ch/articles/014717/,
531,http://www.idref.fr/238306488/id,http://hls-dhs-dss.ch/articles/020019/,
...,...,...,...
6710,http://www.idref.fr/274970775/id,http://hls-dhs-dss.ch/articles/035522/,
6791,http://www.idref.fr/261834517/id,http://hls-dhs-dss.ch/articles/007842/,
6794,http://www.idref.fr/267672403/id,http://hls-dhs-dss.ch/articles/004606/,
6824,http://www.idref.fr/271205563/id,http://hls-dhs-dss.ch/articles/015645/,
