# Explorando las localizaciones geográficas de BNB Linked Data Platform.

La [plataforma BNB Linked Data](https://bnb.data.bl.uk/) proporciona acceso a la [British National Bibliography (BNB)](http://www.bl.uk/bibliographic/natbib.html) a través de un punto de acceso SPARQL. 

Este notebook muestra cómo obtener obras y los lugares de publicación para crear un mapa interactivo.

### Importamos las librerías de código

In [1]:
import requests
import pandas as pd
import json
import csv
from pandas.io.json import json_normalize  
import folium

### Vamos a identificar el total de registros que se encuentra enlazado a GeoNames.

In [2]:
url = 'https://bnb.data.bl.uk/sparql'
sentencia = """
PREFIX blt: <http://www.bl.uk/schemas/bibliographic/blterms#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX c4dm: <http://purl.org/NET/c4dm/event.owl#>

SELECT (count(?recurso) as ?total_obras)  WHERE {
   ?recurso dct:title ?titulo .
   ?recurso blt:publication ?publicacion .
   ?publicacion c4dm:place ?lugar .
   FILTER regex(?lugar, "geonames", "i")        
}
"""

# usamos json como resultado
cabeceras = {'Accept': 'application/sparql-results+json'}
r = requests.get(url, params = {'format': 'application/sparql-results+json', 'query': sentencia}, headers=cabeceras)

print(r.text)


{ "head": { "link": [], "vars": ["total_obras"] },
  "results": { "distinct": false, "ordered": true, "bindings": [
    { "total_obras": { "type": "typed-literal", "datatype": "http://www.w3.org/2001/XMLSchema#integer", "value": "4164098" }} ] } }


### Si eliminamos la restricción de que los registros se encuentren enlazados a GeoNames, recuperamos el total de registros

In [3]:
url = 'https://bnb.data.bl.uk/sparql'
sentencia = """
PREFIX blt: <http://www.bl.uk/schemas/bibliographic/blterms#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX c4dm: <http://purl.org/NET/c4dm/event.owl#>

SELECT (count(?recurso) as ?total_obras)  WHERE {
   ?recurso dct:title ?titulo .
   ?recurso blt:publication ?publicacion .
   ?publicacion c4dm:place ?lugar .
}
"""

# usamos json como resultado
cabeceras = {'Accept': 'application/sparql-results+json'}
r = requests.get(url, params = {'format': 'application/sparql-results+json', 'query': sentencia}, headers=cabeceras)

print(r.text)


{ "head": { "link": [], "vars": ["total_obras"] },
  "results": { "distinct": false, "ordered": true, "bindings": [
    { "total_obras": { "type": "typed-literal", "datatype": "http://www.w3.org/2001/XMLSchema#integer", "value": "8565183" }} ] } }


### Podemos comprobar que la mitad de registros (alrededor de 4 millones) de los 8 millones totales se encuentran enlazados a GeoNames.

### A continuación, vamos a recuperar el número de registros por localización geográfica en una sentencia SPARQL

In [4]:
url = 'https://bnb.data.bl.uk/sparql'
sentencia = """
PREFIX blt: <http://www.bl.uk/schemas/bibliographic/blterms#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX c4dm: <http://purl.org/NET/c4dm/event.owl#>

SELECT ?lugar (count(?recurso) as ?total_obras)  WHERE {
   ?recurso dct:title ?titulo .
   ?recurso blt:publication ?publicacion .
   ?publicacion c4dm:place ?lugar .
   FILTER regex(?lugar, "geonames", "i")        

}
GROUP BY ?lugar
"""

# usamos json como resultado
cabeceras = {'Accept': 'application/sparql-results+json'}
r = requests.get(url, params = {'format': 'application/sparql-results+json', 'query': sentencia}, headers=cabeceras)

print(r.text)


{ "head": { "link": [], "vars": ["lugar", "total_obras"] },
  "results": { "distinct": false, "ordered": true, "bindings": [
    { "lugar": { "type": "uri", "value": "http://sws.geonames.org/2635167/" }	, "total_obras": { "type": "typed-literal", "datatype": "http://www.w3.org/2001/XMLSchema#integer", "value": "362170" }},
    { "lugar": { "type": "uri", "value": "http://sws.geonames.org/6251999/" }	, "total_obras": { "type": "typed-literal", "datatype": "http://www.w3.org/2001/XMLSchema#integer", "value": "14102" }},
    { "lugar": { "type": "uri", "value": "http://sws.geonames.org/2802361/" }	, "total_obras": { "type": "typed-literal", "datatype": "http://www.w3.org/2001/XMLSchema#integer", "value": "2931" }},
    { "lugar": { "type": "uri", "value": "http://sws.geonames.org/3017382/" }	, "total_obras": { "type": "typed-literal", "datatype": "http://www.w3.org/2001/XMLSchema#integer", "value": "3750" }},
    { "lugar": { "type": "uri", "value": "http://sws.geonames.org/1269750/" }	,

### Guardamos el resultado en un fichero CSV

In [5]:
bnbdatos = json.loads(r.text)

with open('bnb_registros_total.csv', 'w', newline='') as fichero:
    csv_salida = csv.writer(fichero, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)    

    csv_salida.writerow(['id_geonames', 'total'])

    for i in bnbdatos['results']['bindings']:
        id_geonames = total= lat = lon = id_wikidata = etiqueta =''

        id_geonames = i['lugar']['value']
        total = i['total_obras']['value']
        csv_salida.writerow([id_geonames, total])    

### Explorando los datos

In [6]:
df = pd.read_csv('bnb_registros_total.csv')

In [7]:
df.head()

Unnamed: 0,id_geonames,total
0,http://sws.geonames.org/2635167/,362170
1,http://sws.geonames.org/6251999/,14102
2,http://sws.geonames.org/2802361/,2931
3,http://sws.geonames.org/3017382/,3750
4,http://sws.geonames.org/1269750/,10207


In [8]:
df['total'].sum()

4164098

In [9]:
df.count()

id_geonames    174
total          174
dtype: int64

## Explorando los lugares

In [10]:
lugares = pd.unique(df['id_geonames']).tolist()
cadena_lugares = ''
for a in sorted(lugares):
    print(a)
    cadena_lugares = cadena_lugares + ' \"' + a.replace("http://sws.geonames.org/", "").replace("/", "") + '\"'

http://sws.geonames.org/102358/
http://sws.geonames.org/1036973/
http://sws.geonames.org/1062947/
http://sws.geonames.org/1149361/
http://sws.geonames.org/1168579/
http://sws.geonames.org/1210997/
http://sws.geonames.org/1220409/
http://sws.geonames.org/1227603/
http://sws.geonames.org/1252634/
http://sws.geonames.org/1269750/
http://sws.geonames.org/1282588/
http://sws.geonames.org/1282988/
http://sws.geonames.org/130758/
http://sws.geonames.org/1327865/
http://sws.geonames.org/142551/
http://sws.geonames.org/145495/
http://sws.geonames.org/146669/
http://sws.geonames.org/1475047/
http://sws.geonames.org/149590/
http://sws.geonames.org/1562822/
http://sws.geonames.org/1605651/
http://sws.geonames.org/163843/
http://sws.geonames.org/1643084/
http://sws.geonames.org/1655842/
http://sws.geonames.org/1668284/
http://sws.geonames.org/1694008/
http://sws.geonames.org/1733045/
http://sws.geonames.org/1814991/
http://sws.geonames.org/1819730/
http://sws.geonames.org/1831722/
http://sws.geonam

## Recogemos la información de wikidata

In [11]:
url = 'https://query.wikidata.org/sparql'
sentencia = """
PREFIX bibo: <http://purl.org/ontology/bibo/>
SELECT ?idgeonames ?lat ?lon ?x ?xLabel 
WHERE {{ 
  values ?idgeonames {{ {0} }} 
  ?x wdt:P1566 ?idgeonames ; 
   p:P625 [
     psv:P625 [
       wikibase:geoLatitude ?lat ;
       wikibase:geoLongitude ?lon ;
       wikibase:geoGlobe ?globe ;
     ];
     ps:P625 ?coord
   ]
   SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
}}
"""

sentencia = sentencia.format(cadena_lugares)
print(sentencia)

r = requests.get(url, params = {'format': 'json', 'query': sentencia})
puntos_geo = r.json()


PREFIX bibo: <http://purl.org/ontology/bibo/>
SELECT ?idgeonames ?lat ?lon ?x ?xLabel 
WHERE { 
  values ?idgeonames {  "102358" "1036973" "1062947" "1149361" "1168579" "1210997" "1220409" "1227603" "1252634" "1269750" "1282588" "1282988" "130758" "1327865" "142551" "145495" "146669" "1475047" "149590" "1562822" "1605651" "163843" "1643084" "1655842" "1668284" "1694008" "1733045" "1814991" "1819730" "1831722" "1835841" "1861060" "1880251" "1899402" "192950" "1966436" "2017370" "2029969" "203312" "2077456" "2088628" "2139685" "2186224" "2205218" "2233387" "2245662" "226074" "2264397" "2287781" "2300660" "2309096" "2328926" "239880" "2400553" "2403846" "2410758" "2411586" "241170" "2413451" "2461445" "2464461" "248816" "2510769" "2542007" "2562770" "2623032" "2629691" "2634895" "2635167" "2638360" "2641364" "2658434" "2661886" "272103" "2750405" "2782113" "2802361" "285153" "285570" "286963" "289688" "290291" "290557" "2921044" "294640" "2960313" "2963597" "298795" "2993457" "3017382" "

## Vamos a mostrar un mapa con todas las localizaciones

In [12]:
mapa_circulos = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)

for geo in puntos_geo['results']['bindings']:
    idwikidata = geo['x']['value']
    lat = geo['lat']['value']
    lon = geo['lon']['value']
    idgeonames = geo['idgeonames']['value']
    etiqueta = geo['xLabel']['value']
    
    total = df.loc[df['id_geonames'] == "http://sws.geonames.org/"+idgeonames+"/", ["total"]].values[0][0]
        
    texto_popup = str(total) + " registros publicados en <a hreh='" + idwikidata + "'>" + etiqueta + "</a>"

    folium.Circle(
      location=[lat, lon],
      popup=texto_popup,
      radius=float(total)/10,
      color='crimson',
      fill=True,
      fill_color='crimson'
    ).add_to(mapa_circulos)

mapa_circulos    

## References

https://plotly.com/python/bubble-maps/