# Existing births should have a place

For more info, see the related ticket: https://github.com/geovistory/switzerland-and-beyond/issues/1

In [1]:
env = 'staging'
pk_project = 153

import pandas as pd
import duckdb

import geopy.distance

import geovpylib.database as db
import geovpylib.utils as u
import geovpylib.sparql as sparql
import geovpylib.pks as pks
import geovpylib.graphs as graphs

db.connect(env, pk_project, execute=False)

Requests will not be executed
=== Setting STAGING environment ===
>> Connecting to PGSQL Database ... Connected!


## Fetch data

Those data correspond to information about persons that already exists on Geovistory. The goal is to enrich them with adding a birth place (sometimes with geo coorinates), and URIs.

In [2]:
sparql.init(f"https://sparql.geovistory.org/api_v1_project_{pk_project}")

data = sparql.query("""
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX ontome: <https://ontome.net/ontology/>
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbp: <http://dbpedia.org/property/>
    PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>


    SELECT 
        ?pk_person ?dbpedia_person_uri ?wikidata_person_uri ?pk_birth ?birthPlace ?birthPlace_type1 ?birthPlace_type2 ?lat ?lng
    WHERE {
        ?pk_person a ontome:c21 .   
        ?pk_person owl:sameAs ?dbpedia_person_uri .
        ?pk_birth ontome:p86 ?pk_person
        { 
            SERVICE <https://dbpedia.org/sparql/> { 
                ?dbpedia_person_uri dbo:birthPlace ?birthPlace .  
                optional {?birthPlace dbp:type ?birthPlace_type1 .}
                optional {?birthPlace rdf:type ?birthPlace_type2 .}
                ?birthPlace geo:lat ?lat .
                ?birthPlace geo:long ?lng .
      			?dbpedia_person_uri owl:sameAs ?wikidata_person_uri .
                FILTER (CONTAINS(STR(?birthPlace_type2), 'http://dbpedia.org/ontology/'))
    			FILTER (CONTAINS(STR(?wikidata_person_uri), 'wikidata'))
            } 

        }
    }
""")
                    
# # For some columns, remove the URI part, just keep the last part
data['pk_person'] = [text[text.rindex('/') + 2:] for text in data['pk_person']]
data['birthPlace'] = [text[text.rindex('/') + 1:] for text in data['birthPlace']]
data['birthPlace_type1'] = [text[text.rindex('/') + 1:] if pd.notna(text) and '/' in text else text for text in data['birthPlace_type1']]
data['birthPlace_type2'] = [text[text.rindex('/') + 1:] if pd.notna(text) and '/' in text else text for text in data['birthPlace_type2']]
data['pk_birth'] = [text[text.rindex('/') + 2:] if pd.notna(text) and '/' in text else text for text in data['pk_birth']]

# Aggregate birth place types
birthplaces_t1 = data.groupby(['pk_person', 'birthPlace'])['birthPlace_type1'].apply(lambda x: ', '.join(x.dropna().drop_duplicates())).reset_index()
birthplaces_t2 = data.groupby(['pk_person', 'birthPlace'])['birthPlace_type2'].apply(lambda x: ', '.join(x.dropna().drop_duplicates())).reset_index()
birthplaces_types = birthplaces_t1.merge(birthplaces_t2, on=['pk_person', 'birthPlace'])
birthplaces_types['birthplace_types'] =  birthplaces_types['birthPlace_type1'] + ', ' + birthplaces_types['birthPlace_type2']
birthplaces_types['birthplace_types'] = [text[2:] if text.startswith(', ') else text for text in birthplaces_types['birthplace_types']]

# Merge the types
data = data.merge(birthplaces_types, on=['pk_person', 'birthPlace'])

# Select only the column we are interested in
data = data[['pk_person', 'dbpedia_person_uri', 'wikidata_person_uri', 'pk_birth', 'birthPlace', 'birthplace_types', 'lat', 'lng']].drop_duplicates()

# Drop duplicates to only have one record for each [pk_person, birthPlace]
data.drop_duplicates(inplace=True)

u.infos(data, random=True)

Shape:  (1799, 8)


Unnamed: 0,pk_person,dbpedia_person_uri,wikidata_person_uri,pk_birth,birthPlace,birthplace_types,lat,lng
1604,27271,http://dbpedia.org/resource/Franz_Lindt,http://www.wikidata.org/entity/Q4993327,69752,Switzerland,"MusicalArtist, Location, PopulatedPlace, Count...",46.95,7.45
1371,26568,http://dbpedia.org/resource/Hélène_Boschi,http://www.wikidata.org/entity/Q432871,69674,Lausanne,"City, Location, Place, PopulatedPlace, Settlement",46.5198,6.6335
922,26557,http://dbpedia.org/resource/Auguste_Arthur_de_...,http://www.wikidata.org/entity/Q124894,69081,Geneva,"Location, Settlement, Place, PopulatedPlace",46.2017,6.14694
1431,26594,http://dbpedia.org/resource/Ferdinand_Cattini,http://www.wikidata.org/entity/Q675189,69203,"Grono,_Switzerland","Settlement, Location, Place, PopulatedPlace",46.25,9.15
324,26521,http://dbpedia.org/resource/Marie-Anne_Desmarest,http://www.wikidata.org/entity/Q3291517,68390,France,"Country, Location, Place, PopulatedPlace",47.0,2.35


## Get information about geographical places

### List them

In [3]:
geo_places = data[['birthPlace', 'birthplace_types', 'lat', 'lng']]
geo_places.columns = ['name', 'types', 'lat', 'lng']

# We aggregate all types available for a geo place name
geo_places_types = geo_places.groupby('name')['types'].apply(lambda x: ','.join(sorted(list(dict.fromkeys((', '.join(x).split(', '))))))).reset_index()

# We can now go back to initial geo_place date, and fill with aggregated types
geo_places = geo_places.merge(geo_places_types, on='name').drop(columns=['types_x']).rename(columns={'types_y': 'types'})

# We want only one row for each geo place
geo_places.drop_duplicates(inplace=True)

# We make sure that we only have one row for each place
unicity = len(geo_places) == len(geo_places['name'].unique())
print(f'Unicity: {unicity}, shape now: {geo_places.shape}')

if not unicity:
    gb = geo_places.groupby('name').count().reset_index()
    names = gb[gb['types'] != 1]['name'].tolist()
    print('Remaining duplicates:')
    display(geo_places[geo_places['name'].isin(names)])

print('Now that filter has been done, we observe that there is only duplicates because some geographical places have multiple geo coordinates couples. So we just arbitrarily take one.')

geo_places.drop_duplicates(subset=['name'], inplace=True)

u.infos(geo_places)

Unicity: False, shape now: (486, 4)
Remaining duplicates:


Unnamed: 0,name,lat,lng,types
424,Italy,41.9,12.4833,"Country,Location,Person,Place,PopulatedPlace"
425,Italy,41.9,12.0,"Country,Location,Person,Place,PopulatedPlace"
426,Italy,43.0,12.4833,"Country,Location,Person,Place,PopulatedPlace"
427,Italy,43.0,12.0,"Country,Location,Person,Place,PopulatedPlace"
655,France,47.0,2.35,"Country,Location,Place,PopulatedPlace"
656,France,47.0,2.0,"Country,Location,Place,PopulatedPlace"
657,France,48.85,2.35,"Country,Location,Place,PopulatedPlace"
658,France,48.85,2.0,"Country,Location,Place,PopulatedPlace"
1360,Austria,48.2,13.3333,"Country,Location,Person,Place,PopulatedPlace"
1361,Austria,48.2,16.35,"Country,Location,Person,Place,PopulatedPlace"


Now that filter has been done, we observe that there is only duplicates because some geographical places have multiple geo coordinates couples. So we just arbitrarily take one.
Shape:  (474, 4)


Unnamed: 0,name,lat,lng,types
0,Geneva,46.2017,6.14694,"Location,Place,PopulatedPlace,Settlement"
80,German_Empire,52.5167,13.4,"Country,Location,Place,PopulatedPlace"
98,Mulhouse,47.75,7.34,"City,Location,Place,PopulatedPlace,Settlement"
112,Brescia,45.5417,10.2167,"City,Location,Place,PopulatedPlace,Settlement"
116,Corteno_Golgi,46.1669,10.2444,"Location,Place,PopulatedPlace,Settlement,Village"


### Find already existing geographical places on geovistory

In [4]:
sparql.init('https://sparql.geovistory.org/api_v1_community_data')

geov_geo_places = sparql.query("""
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX geov: <http://geovistory.org/resource/>
    PREFIX ontome: <https://ontome.net/ontology/>

    SELECT
        ?pk_place ?name ?type ?point
    WHERE {
        ?pk_place a ontome:c363 .
        optional {?pk_place rdfs:label ?name }
        optional {?pk_place ontome:p1110 ?typeent . ?typeent rdfs:label ?type}
        optional {?presence ontome:p147 ?pk_place . ?presence ontome:p148 ?point}
    }
""")
                    
# For some columns, remove the URI part, just keep the last part
geov_geo_places['pk_place'] = [text[text.rindex('/') + 2:] for text in geov_geo_places['pk_place']]
geov_geo_places['point'] = [text[text.rindex('(') + 1:-1] if pd.notna(text) and '>' in text else text for text in geov_geo_places['point']]
geov_geo_places['lat'] = [text.split(' ')[1] if pd.notna(text) else text for text in geov_geo_places['point']]
geov_geo_places['lng'] = [text.split(' ')[0] if pd.notna(text) else text for text in geov_geo_places['point']]

# Select only the column we are interested in
geov_geo_places = geov_geo_places[['pk_place', 'name', 'lat', 'lng', 'type']]

# Aggregate place types
place_type = geov_geo_places.groupby('pk_place')['type'].apply(lambda x: ', '.join(x.dropna().drop_duplicates())).reset_index()

# Merge the types
geov_geo_places = geov_geo_places.merge(place_type, on='pk_place').drop(columns=['type_x']).rename(columns={'type_y':'type'}).replace('', pd.NA)

# Drop duplicates to only have one record for each [pk_person, birthPlace]
geov_geo_places.drop_duplicates(inplace=True)

# We make sure that we only have one row for each place
unicity = len(geov_geo_places) == len(geov_geo_places['name'].unique())
print(f'Unicity: {unicity}, shape now: {geov_geo_places.shape}')

if not unicity:
    gb = geov_geo_places.groupby('name').count().reset_index()
    names = gb[gb['pk_place'] != 1]['name'].tolist()
    print('Remaining duplicates:')
    display(geov_geo_places[geov_geo_places['name'].isin(names)])

print('Now that filter has been done, we observe that there is only duplicates because some geographical places have multiple geo coordinates couples. So we just arbitrarily take one.')

geov_geo_places.drop_duplicates(subset=['name'], inplace=True)

u.infos(geov_geo_places)

Unicity: False, shape now: (8527, 5)
Remaining duplicates:


Unnamed: 0,pk_place,name,lat,lng,type
6,748659,(no label),,,
7,748663,(no label),,,
12,771400,(no label),,,
23,1675432,(no label),,,
27,1283709,(no label),,,
...,...,...,...,...,...
8381,6184674,(no label),,,
8388,6188695,Crespano del Grappa,,,
8433,6236084,Toledo,,,
8486,6239308,Mandello del Lario,,,


Now that filter has been done, we observe that there is only duplicates because some geographical places have multiple geo coordinates couples. So we just arbitrarily take one.
Shape:  (8306, 5)


Unnamed: 0,pk_place,name,lat,lng,type
0,207282,Hoogveld NL,51.65833,5.66806,
1,207922,Lübbertsfehn DE,53.38892,7.50997,
2,209341,Stapel FR,50.74907,2.45426,
3,209298,Sopron HU,47.68501,16.59049,
4,209615,Upper Alsace FR,48.07871,7.3554,Area geografica


## Matching geographical places

 For found geographical places listed below, we do not need to create them, rather just complete them

In [5]:
# Look for same names in both tables
matchings = duckdb.query("""
    select
        t2.pk_place,
        t1.name as new_name, t2.name as geov_name,
        t1.lat as new_lat, t2.lat as geov_lat,
        t1.lng as new_lng, t2.lng as geov_lng,
        t1.types as new_types, t2.type as geov_types
    from
        geo_places t1, geov_geo_places t2
    where
        t1.name = t2.name
""").to_df()

u.infos(matchings, nb=50)

Shape:  (32, 9)


Unnamed: 0,pk_place,new_name,geov_name,new_lat,geov_lat,new_lng,geov_lng,new_types,geov_types
0,3195535,Philadelphia,Philadelphia,39.9528,,-75.1636,,"City,Location,Place,PopulatedPlace,Settlement",
1,2215326,Kesswil,Kesswil,47.5833,47.591606,9.31667,9.308147,"Location,Place,PopulatedPlace,Settlement",Orașul
2,1897745,Zug,Zug,47.1681,,8.51694,,"Location,Place,PopulatedPlace,Settlement",
3,1044823,Bormio,Bormio,46.4667,46.466667,10.3667,10.366667,"Location,Place,PopulatedPlace,Settlement",Town
4,1788495,Sennwald,Sennwald,47.2667,,9.5,,"Location,Place,PopulatedPlace,Settlement",Commune
5,1082004,Füssen,Füssen,47.5667,47.566667,10.7,10.7,"Location,Place,PopulatedPlace,Settlement,Stadt...",Town
6,1506600,Vertova,Vertova,45.8167,45.816667,9.85,9.85,"Location,Place,PopulatedPlace,Settlement",Town
7,999686,Bergamo,Bergamo,45.695,45.695,9.67,9.67,"City,Location,Place,PopulatedPlace,Settlement",Town
8,2215345,Küsnacht,Küsnacht,47.3167,47.32018,8.58333,8.58553,"Location,Place,PopulatedPlace,Settlement",Orașul
9,3150818,Niederweningen,Niederweningen,47.5,47.51088010061455,8.38333,8.406931624625164,"Location,Place,PopulatedPlace,Settlement",Orașul


Now that we know which ones already exist, we need to make sure that information that we will import does not already exist, individualy. 

We prepare the table for the import

In [6]:
### URIs ###

# By default, set the URIs
matchings['dbpedia_uri'] = 'http://dbpedia.org/resource/' + matchings['new_name']
matchings.drop(columns=['geov_name'], inplace=True)
matchings.rename(columns={'new_name':'name'}, inplace=True)

# Look at geographical places: does they already have such a URIs?
uris = db.statements.get(fk_subjects=matchings['pk_place'].astype(int).tolist(), fk_properties=pks.properties.entity_sameAsURI_URI)['fk_object'].astype(int)
if len(uris) == 0: 
    print('None of them already has URIs')
else: 
    # Remove already existing URIs from the dataframe
    uris = uris.merge(db.statements.get(fk_subjects=uris, fk_properties=pks.properties.text_hasValueVersion_string), left_on='fk_object', right_on='fk_subject')[['fk_subject_x', 'string']].rename(columns={'fk_subject_x': 'pk_place', 'string': 'uri'})
    uris = uris[uris['uri'].str.contains('http://dbpedia.org/resource/')]
    for i, place in matchings.iterrows():
        if place['pk_place'] in uris['pk_place'].tolist(): matchings.at[i, 'dbpedia_uri'] = pd.NA



### lats & lngs  ###

for i, place in matchings.iterrows():
    if pd.notna(place['new_lat']) and pd.notna(place['geov_lat']) and pd.notna(place['new_lng']) and pd.notna(place['geov_lng']) and place['new_lat'] == place['geov_lat'] and place['new_lng'] == place['geov_lng']: 
        matchings.at[i, 'new_lat'] = pd.NA
        matchings.at[i, 'new_lng'] = pd.NA

# # Drop columns
matchings.drop(columns=['geov_lat', 'geov_lng'], inplace=True)

# # Rename columns
matchings.rename(columns={'new_lat':'lat', 'new_lng':'lng'}, inplace=True)



### Type ###

for i, place in matchings.iterrows():
    # If we already have a value, it already exists: we do not have to set it
    if pd.notna(place['geov_types']): matchings.at[i, 'type'] = pd.NA
    else:
        if 'Country' in place['new_types']: matchings.at[i, 'type'] = 'Country'
        if 'Departments_of_France' in place['new_types']: matchings.at[i, 'type'] = 'Departments_of_France'
        if 'City' in place['new_types']: matchings.at[i, 'type'] = 'City'

# # Drop columns
matchings.drop(columns=['new_types', 'geov_types'], inplace=True)

u.infos(matchings)

None of them already has URIs
Shape:  (32, 6)


Unnamed: 0,pk_place,name,lat,lng,dbpedia_uri,type
0,3195535,Philadelphia,39.9528,-75.1636,http://dbpedia.org/resource/Philadelphia,City
1,2215326,Kesswil,47.5833,9.31667,http://dbpedia.org/resource/Kesswil,
2,1897745,Zug,47.1681,8.51694,http://dbpedia.org/resource/Zug,
3,1044823,Bormio,46.4667,10.3667,http://dbpedia.org/resource/Bormio,
4,1788495,Sennwald,47.2667,9.5,http://dbpedia.org/resource/Sennwald,


## Update existing geographical places

### Update type

In [7]:
selection = matchings[['pk_place', 'type']].dropna()

u.infos(selection)

Shape:  (4, 2)


Unnamed: 0,pk_place,type
0,3195535,City
11,742522,City
14,3230970,City
25,3184085,Country


In [8]:
def get_geo_place_type(text):
    if text == 'City': return pks.entities.pk_geo_place_city
    if text == 'Country': return pks.entities.pk_geo_place_country
    
    raise ValueError(f'Unknown type: {text}')

# Prepare requests
pk_subjects = selection['pk_place'].astype(int).tolist()
pk_properties = pks.properties.geoPlace_hasIdentifyingGeoPlaceType_geoPlaceType
pk_objects = [get_geo_place_type(text) for text in selection['type']]

# Execute request
z = db.statements.create(pk_subjects, pk_properties, pk_objects)

Creating 4 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 4 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]


### Update URIs

In [9]:
selection = matchings[['pk_place', 'dbpedia_uri']].dropna()

u.infos(selection)

Shape:  (32, 2)


Unnamed: 0,pk_place,dbpedia_uri
0,3195535,http://dbpedia.org/resource/Philadelphia
1,2215326,http://dbpedia.org/resource/Kesswil
2,1897745,http://dbpedia.org/resource/Zug
3,1044823,http://dbpedia.org/resource/Bormio
4,1788495,http://dbpedia.org/resource/Sennwald


In [10]:
# Create entities
uris = db.resources.create(pks.classes.uri, len(selection))
appellations = db.appellations.create(selection['dbpedia_uri'].tolist())

# Entity same as URI - Prepare
fk_subjects = selection['pk_place'].astype(int).tolist()
fk_properties = pks.properties.entity_sameAsURI_URI
fk_objects = uris

# Entity same as URI - Request
z = db.statements.create(fk_subjects, fk_properties, fk_objects)


# URI has value appellation - Prepare
fk_subjects = uris
fk_properties = pks.properties.appe_hasValue_string
fk_objects = appellations

# URI has value appellation - Request
z = db.statements.create(fk_subjects, fk_properties, fk_objects)


Creating 32 resources of class [967] ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 32 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 32 appellations ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 32 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 32 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 32 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 32 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 32 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]


### Update Geocoordinates

In [11]:
selection = matchings[['pk_place', 'lat', 'lng']].dropna()

u.infos(selection)

Shape:  (29, 3)


Unnamed: 0,pk_place,lat,lng
0,3195535,39.9528,-75.1636
1,2215326,47.5833,9.31667
2,1897745,47.1681,8.51694
3,1044823,46.4667,10.3667
4,1788495,47.2667,9.5


In [12]:
# Create entities
presences = db.resources.create(pks.classes.presence, len(selection))
places = db.places.create(selection['lat'].astype(float).tolist(), selection['lng'].astype(float).tolist())

# Presence was presence of Geographical place - Prepare
fk_subjects = presences
fk_properties = pks.properties.presence_wasPresenceOf_spacetimeVolume
fk_objects = selection['pk_place'].astype(int).tolist()

# Presence was presence of Geographical place - Request
z = db.statements.create(fk_subjects, fk_properties, fk_objects)


# Presence was at - Prepare
fk_subjects = presences
fk_properties = pks.properties.presence_wasAt_place
fk_objects = places

# Presence was at - Request
z = db.statements.create(fk_subjects, fk_properties, fk_objects)

Creating 29 resources of class [84] ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 29 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 29 places ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 29 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 29 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 29 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 29 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 29 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]


## Import other geographical places

### Create all information to be imported

In [13]:
### Create URIs

geo_places['dbpedia_uri'] = 'http://dbpedia.org/resource/' + geo_places['name']


### Create type

for i, place in geo_places.iterrows():
    if 'Country' in place['types']: geo_places.at[i, 'type'] = 'Country'
    elif 'Departments_of_France' in place['types']: geo_places.at[i, 'type'] = 'Departments_of_France'
    elif 'City' in place['types']: geo_places.at[i, 'type'] = 'City'
    elif 'Town' in place['types']: geo_places.at[i, 'type'] = 'Town'
    elif 'Village' in place['types']: geo_places.at[i, 'type'] = 'Village'

# Drop columns
geo_places.drop(columns=['types'], inplace=True)

u.infos(geo_places)

Shape:  (474, 5)


Unnamed: 0,name,lat,lng,dbpedia_uri,type
0,Geneva,46.2017,6.14694,http://dbpedia.org/resource/Geneva,
80,German_Empire,52.5167,13.4,http://dbpedia.org/resource/German_Empire,Country
98,Mulhouse,47.75,7.34,http://dbpedia.org/resource/Mulhouse,City
112,Brescia,45.5417,10.2167,http://dbpedia.org/resource/Brescia,City
116,Corteno_Golgi,46.1669,10.2444,http://dbpedia.org/resource/Corteno_Golgi,Village


### Filter out already updated geographical places

In [14]:
geo_places = geo_places[~geo_places['dbpedia_uri'].isin(matchings['dbpedia_uri'])]

### Create new geographical places

In [15]:
# Create Geo graphical places
geo_places['pk_entity'] = db.resources.create(pks.classes.geoPlace, len(geo_places))

Creating 442 resources of class [363] ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]


In [16]:
selection = geo_places[pd.notna(geo_places['name'])]

# Add Geographical place name 
pk_entities = selection['pk_entity'].tolist()
names = selection['name'].tolist()
languages = pks.languages.english

graphs.add_names(pk_entities, names, languages)

Creating 442 resources of class [365] ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 appellations ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not

In [17]:
selection = geo_places[pd.notna(geo_places['dbpedia_uri'])]

# Add same as URI
pk_entities = selection['pk_entity'].tolist()
uris = selection['dbpedia_uri'].tolist()

graphs.add_uris(pk_entities, uris)

Creating 442 resources of class [967] ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 appellations ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]


In [18]:
# Add Presence at place
selection = geo_places[pd.notna(geo_places['lat'])]
selection = selection[pd.notna(selection['lng'])]

pk_geoplaces = selection['pk_entity'].tolist()
lats = selection['lat'].astype(float).tolist()
lngs = selection['lng'].astype(float).tolist()

graphs.add_geo_coordinates(pk_geoplaces, lats, lngs)

Creating 442 places ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 resources of class [84] ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]
Creating 442 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 442 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]


In [19]:
# Add type
for i, row in geo_places.iterrows():
    if row['type'] == 'Country': geo_places.at[i, 'pk_type'] = pks.entities.pk_geo_place_country
    if row['type'] == 'City': geo_places.at[i, 'pk_type'] = pks.entities.pk_geo_place_city
    if row['type'] == 'Village': geo_places.at[i, 'pk_type'] = pks.entities.pk_geo_place_village
    if row['type'] == 'Town': geo_places.at[i, 'pk_type'] = pks.entities.pk_geo_place_town

selection = geo_places[pd.notna(geo_places['pk_type'])]

pk_geoplaces = selection['pk_entity'].tolist()
pk_types = selection['pk_type'].tolist()

z = db.statements.create(pk_geoplaces, pks.properties.geoPlace_hasIdentifyingGeoPlaceType_geoPlaceType, pk_types)

Creating 85 statements ... Not executed (executed option is false) Done in [00h00'00]
Creating info_proj_rel of 85 entities with project <153> ... Not executed (executed option is false) Done in [00h00'00]


## Add data to persons

### Update the working datas

In [20]:
data.drop(columns=['birthplace_types', 'lat', 'lng'], inplace=True)
geo_places = geo_places[['name', 'pk_entity']].rename(columns={'pk_entity':'pk_birth_place', 'name':'birthPlace'})
matchings = matchings[['name', 'pk_place']].rename(columns={'pk_place':'pk_birth_place', 'name':'birthPlace'})
geo_places = pd.concat([geo_places, matchings])

data = data.merge(geo_places, on='birthPlace', how='inner').drop(columns=['birthPlace'])
u.infos(data, random=True)

Shape:  (1799, 6)


Unnamed: 0,pk_person,dbpedia_person_uri,wikidata_person_uri,pk_birth,birthPlace,pk_birth_place
763,26307,http://dbpedia.org/resource/André_Maschinot,http://www.wikidata.org/entity/Q521700,68774,France,-1
44,26850,http://dbpedia.org/resource/Jacques-Louis_Soret,http://www.wikidata.org/entity/Q124083,68456,Geneva,-1
1218,25931,http://dbpedia.org/resource/Piero_Ballerini,http://www.wikidata.org/entity/Q1615033,68738,Lombardy,-1
154,25959,http://dbpedia.org/resource/René_Auberjonois_(...,http://www.wikidata.org/entity/Q683105,68799,Switzerland,3184085
973,26262,http://dbpedia.org/resource/Émile_Écuyer,http://www.wikidata.org/entity/Q3588821,69161,Corveissiat,-1


### Create information

In [None]:
### Add DBpedia uris
selection = data[['pk_person', 'dbpedia_person_uri']].dropna()
graphs.add_uris(selection['pk_person'].tolist(), selection['dbpedia_person_uri'].tolist())

### Add Wikidata uris
selection = data[['pk_person', 'wikidata_person_uri']].dropna()
graphs.add_uris(selection['pk_person'].tolist(), selection['wikidata_person_uri'].tolist())

### Add birth place to births
selection = data[['pk_birth', 'pk_birth_place']].dropna()
z = db.statements.create(selection['pk_birth'].tolist(), pks.properties.period_tookPlaceOnOrWithin_phyThing, selection['pk_birth_place'])