In [None]:
!pip install rdflib SPARQLWrapper ipyleaflet

### In this Lesson
* Use Wikidata SPARQL endpoint to retriev long/lat for places
* Use HistoGIS Rest endpoint to retrieve administrative divisions of this long/lat
* Use Lobid/GND to enrich MPR Data

#### Lets start of with (the beginning) of an ÖBL-Biography

*Kob, Anton* - https://apis.acdh.oeaw.ac.at/person/70582

Arbeitete zuerst als Müllergeselle bei seinem Vater. Nach erster Lehrzeit beim Vergolder A. Hanny in Kaltern, zog er 1852 nach München und von dort als Vergoldergehilfe nach Augsburg, kehrte aber noch im selben Jahre zurück und nahm von nun an Unterricht bei Joh. Bapt. Pendl, dessen Schwiegersohn er später wurde. 1859 übersiedelte er nach Gries b. Bozen und 1863 nach Bozen. 

In [121]:
bio = "Arbeitete zuerst als Müllergeselle bei seinem Vater. Nach erster Lehrzeit beim Vergolder A. Hanny in Kaltern, zog er 1852 nach München und von dort als Vergoldergehilfe nach Augsburg, kehrte aber noch im selben Jahre zurück und nahm von nun an Unterricht bei Joh. Bapt. Pendl, dessen Schwiegersohn er später wurde. 1859 übersiedelte er nach Gries b. Bozen und 1863 nach Bozen."

#### And now we use the Regex of yesterdays NLP session to extract PlaceNames and Years

In [152]:
import re
year_place = []
for year in re.finditer(r'([\d\–]{2,7}).*?\snach\s.*?([A-Z]\w+)', bio):
    print('year: {} / place: {}'.format(year.group(1), year.group(2)))
    year_place.append((year.group(1), year.group(2)))
year_place

year: 1852 / place: München
year: 1859 / place: Gries
year: 1863 / place: Bozen


[('1852', 'München'), ('1859', 'Gries'), ('1863', 'Bozen')]

In [153]:
from IPython.display import Image
from IPython.core.display import HTML 

In [154]:
import sys
from rdflib import Graph,URIRef
import pandas as pd
pd.set_option("display.width",100)
pd.set_option("display.max_colwidth",80)

In [155]:
from SPARQLWrapper import SPARQLWrapper, JSON
import requests

In [156]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

In [157]:
city = year_place[-1][1]
when = int(year_place[-1][0])
print(city, when)

Bozen 1863


In [158]:
sparql.setQuery(f"""
SELECT DISTINCT
  ?city ?cityLabel ?population ?gnd ?lat_long
WHERE {{
  ?city wdt:P31 wd:Q1549591 .
  ?city rdfs:label "{city}"@de .
  OPTIONAL {{ ?city wdt:P1082 ?population }}
  OPTIONAL {{ ?city wdt:P3529 ?medianIncome }}
  OPTIONAL {{ ?city wdt:P227 ?gnd }}
  OPTIONAL {{ ?city wdt:P625 ?lat_long }}
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
}}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [159]:
results_df = pd.io.json.json_normalize(results['results']['bindings'])
results_df[['city.value', 'cityLabel.value', 'lat_long.value', 'population.value']]

  """Entry point for launching an IPython kernel.


Unnamed: 0,city.value,cityLabel.value,lat_long.value,population.value
0,http://www.wikidata.org/entity/Q6526,Bolzano,Point(11.35 46.5),107317


In [160]:
import re
for idx, row in results_df.iterrows():
    print(row['lat_long.value'], type(row['lat_long.value']))

Point(11.35 46.5) <class 'str'>


In [161]:
for idx, row in results_df.iterrows():
    lat_long = re.match('Point\(([0-9\.]+)\s([0-9\.]+)\)', row['lat_long.value'])
    if lat_long:
        long = lat_long.group(1)
        lat = lat_long.group(2)
        print(lat, long)
        results_df.at[idx, 'lat'] = lat
        results_df.at[idx, 'long'] = long

46.5 11.35


In [167]:
results_df[['city.value', 'cityLabel.value', 'lat_long.value', 'lat', 'long']]

Unnamed: 0,city.value,cityLabel.value,lat_long.value,lat,long
0,http://www.wikidata.org/entity/Q6526,Bolzano,Point(11.35 46.5),46.5,11.35


In [163]:
from ipyleaflet import Map, basemaps, basemap_to_tiles, Marker, GeoJSON
watercolor = basemap_to_tiles(basemaps.Stamen.Watercolor)
center = (results_df.at[0, 'lat'], results_df.at[0, 'long'])
m = Map(layers=(watercolor, ), center=center, zoom=6)
marker = Marker(location=center, draggable=False)
m.add_layer(marker)
for idx, row in results_df.loc[1:,:].iterrows():
    center = (row['lat'], row['long'])
    marker = Marker(location=center, draggable=False)
    m.add_layer(marker)
m

Map(center=['46.5', '11.35'], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoo…

In [164]:
url = 'https://histogis.acdh-dev.oeaw.ac.at/api/where-was'
res_geo = []
for idx, row in results_df.iterrows():
    params = {'lat': row['lat'], 'lng': row['long'], 'when': str(when)}
    res = requests.get(url, params=params)
    df_geo = pd.io.json.json_normalize(res.json()['features'])
    df_geo.at[:,'wikidata'] = row['city.value']
    res_geo.append(df_geo)

  


In [165]:
res_geo[0][['@id', 'names', 'properties.adm_name', 'properties.start_date', 'properties.end_date']]

Unnamed: 0,@id,names,properties.adm_name,properties.start_date,properties.end_date
0,https://histogis.acdh-dev.oeaw.ac.at/shapes/permalink/7df020e1e93fe1a1b35989...,[{'toponym': 'Gefürstete Grafschaft Tirol'}],Princely County,1860-10-21,1867-12-21


In [168]:
res_geo[0]

Unnamed: 0,type,bbox,names,types,descriptions,links,@id,geometry.type,geometry.coordinates,properties.id,...,properties.adm_name,properties.start_date,properties.end_date,properties.date_accuracy,properties.spatial_extent,properties.spatial_extent_sqm,properties.parents,properties.slugged_name,when.timespans,wikidata
0,Feature,"[10.09828922682921, 45.66093004860678, 12.9660288954249, 47.74448869233967]",[{'toponym': 'Gefürstete Grafschaft Tirol'}],"[{'identifier': 'https://histogis.acdh-dev.oeaw.ac.at/vocabs/207', 'label': ...","[{'value': '', 'lang': 'en'}]","[{'type': 'skos:closeMatch', 'identifier': 'Q16543832'}]",https://histogis.acdh-dev.oeaw.ac.at/shapes/permalink/7df020e1e93fe1a1b35989...,MultiPolygon,"[[[[10.49224305074131, 45.93308544114245], [10.49206013069406, 45.9417823441...",10629,...,Princely County,1860-10-21,1867-12-21,D,3.150296,26732.89251,"[{'id': 11128, 'start_date': '1866-10-04', 'end_date': '1867-12-21', 'name':...",gefurstete-grafschaft-tirol__1860-10-21_1867-12-21,"[{'start': {'in': '1860-10-21'}, 'end': {'in': '1867-12-21'}}]",http://www.wikidata.org/entity/Q6526


In [169]:
res_geo[0].at[0, 'properties.id']

10629

In [170]:
count = 0
colors = ['red', 'green', 'blue', 'yellow']
for p in res_geo[0].at[0, 'properties.parents']:
    if when > int(p['start_date'][:4]) and when < int(p['end_date'][:4]):
        geo_json = GeoJSON(
        data=requests.get(f"https://histogis.acdh-dev.oeaw.ac.at/api/tempspatial/{p['id']}/?format=json").json(),
        style={
            'opacity': 1, 'dashArray': '9', 'fillOpacity': 0.3, 'weight': 1, 'color': colors[count]
        },
        hover_style={
            'color': 'white', 'dashArray': '0', 'fillOpacity': 0.5
        },
        )
        m.add_layer(geo_json)
        count += 1
        print(f'adding: {p["name"]} / {p["start_date"]} - {p["end_date"]}')

adding: Österreich / 1859-10-12 - 1866-10-03


In [171]:
m

Map(bottom=5996.0, center=['46.5', '11.35'], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_…

In [172]:
res_geo[0].at[0, 'properties.parents']

[{'id': 11128,
  'start_date': '1866-10-04',
  'end_date': '1867-12-21',
  'name': 'Österreich',
  'permalink': '/shapes/permalink/51f8fe06bde59ddd2b40ff778c7b695a/'},
 {'id': 11115,
  'start_date': '1859-10-12',
  'end_date': '1866-10-03',
  'name': 'Österreich',
  'permalink': '/shapes/permalink/95eaa76b5e732d4b2855741a42db93fb/'}]

### Using Lobid/GND to pull in data on Persons (and other entities)

In [173]:
headers = {'accept': 'application/json'}
params = {'q': 'preferredName:Kreisky, Bruno', 'filter': 'type:Person'}
url = 'https://lobid.org/gnd/search'
res = requests.get(url, params=params, headers=headers)

In [174]:
res

<Response [200]>

In [175]:
res_dict = res.json()
res_df = pd.io.json.json_normalize(res_dict['member'])

  


In [176]:
res_df[['@context', 'preferredName', 'describedBy.id', 'biographicalOrHistoricalInformation', 'dateOfBirth', 'dateOfDeath']]

Unnamed: 0,@context,preferredName,describedBy.id,biographicalOrHistoricalInformation,dateOfBirth,dateOfDeath
0,http://lobid.org/gnd/context.jsonld,"Kreisky, Bruno",https://d-nb.info/gnd/118566512/about,"[Bundeskanzler 1970-1983, Politiker, Oesterreich]",[1911-01-22],[1990-07-29]
1,http://lobid.org/gnd/context.jsonld,"Kreisky, Peter",https://d-nb.info/gnd/170686299/about,[seit 1973 Mitarbeiter der Wirtschaftswissenschaftlichen Abteilung der Arbei...,[1944],[2010]
2,http://lobid.org/gnd/context.jsonld,"Kreisky, Eva",https://d-nb.info/gnd/121036073/about,[Österr. Prof. für Politikwissenschaft mit Schwerpunkt politische Theorie un...,[1944-08-08],
3,http://lobid.org/gnd/context.jsonld,"Kreisky, Bruno",https://d-nb.info/gnd/176237682/about,,,


#### Lets get some data from MPR and query Lobid for it

In [178]:
url_mpr = 'https://mpr.acdh.oeaw.ac.at/apis/api/entities/person/'
params_mpr = {'start_date__isnull': False}
res_mpr = requests.get(url_mpr, params=params_mpr, headers=headers)

In [179]:
res_dict = res_mpr.json()

In [180]:
res_dict

{'next': 'http://mpr.sisyphos.arz.oeaw.ac.at/apis/api/entities/person/?limit=50&offset=50&start_date__isnull=False',
 'previous': None,
 'count': 215,
 'limit': 50,
 'offset': 0,
 'results': [{'url': 'http://mpr.sisyphos.arz.oeaw.ac.at/apis/api/entities/person/1405/',
   'id': 1405,
   'name': 'Ács, Karl',
   'review': False,
   'start_date': '1983-08-14',
   'start_start_date': None,
   'start_end_date': None,
   'end_date': None,
   'end_start_date': None,
   'end_end_date': None,
   'start_date_written': '14.08.1983',
   'end_date_written': None,
   'status': '',
   'references': '',
   'notes': '',
   'published': False,
   'first_name': None,
   'gender': '',
   'source': None,
   'text': [{'id': 131,
     'url': 'http://mpr.sisyphos.arz.oeaw.ac.at/apis/api/metainfo/text/131/',
     'type': 'http://mpr.sisyphos.arz.oeaw.ac.at/apis/api/vocabularies/texttype/168/',
     'label': 'ID: 131'}],
   'collection': [{'id': 3,
     'url': 'http://mpr.sisyphos.arz.oeaw.ac.at/apis/api/metainf

In [189]:
for r in res_dict['results']:
    params = {'q': f"preferredName:{r['name']} OR preferredName:{r['name']}, {r['first_name']} AND dateOfBirth:{r['start_date']}", "filter": "type:Person"}
    res = requests.get(url, params=params, headers=headers)
    res_dict_1 = res.json()
    if int(res_dict_1['totalItems']) > 0:
        print(f"name: {r['name']}, {r['first_name']}, GND results: #{res_dict_1['totalItems']}, GND ID: {res_dict_1['member'][0]['id']}, MPR: https://mpr.acdh.oeaw.ac.at/apis/entities/entity/person/{r['id']}/detail")
    else:
        print(f"name: {r['name']}, {r['first_name']}, GND results: #{res_dict_1['totalItems']}")

name: Ács, Karl, None, GND results: #0
name: Aichinger, Johann, GND results: #0
name: Aigner, Joseph Matthäus, GND results: #1, GND ID: https://d-nb.info/gnd/116274344, MPR: https://mpr.acdh.oeaw.ac.at/apis/entities/entity/person/1495/detail
name: Albini, Giuseppe, GND results: #0
name: Altieri, Ludovico, GND results: #0
name: Amon, Franz, GND results: #1, GND ID: https://d-nb.info/gnd/117471437, MPR: https://mpr.acdh.oeaw.ac.at/apis/entities/entity/person/1583/detail
name: Antonelli, Giacomo, GND results: #0
name: Arndts von Arnesberg, Karl Ludwig, GND results: #1, GND ID: https://d-nb.info/gnd/100416993, MPR: https://mpr.acdh.oeaw.ac.at/apis/entities/entity/person/1820/detail
name: Laurentini, Giovanni, GND results: #0
name: Barbarigo, Ida, GND results: #0
name: Mühlbach, Luise, GND results: #1, GND ID: https://d-nb.info/gnd/118585754, MPR: https://mpr.acdh.oeaw.ac.at/apis/entities/entity/person/2017/detail
name: Bakunin, Michael Aleksandrovič, GND results: #1, GND ID: https://d-nb.i