In [1]:
from __future__ import unicode_literals

from itertools import cycle
from geopy.distance import vincenty
import sparql
import simplekml
import numpy as np
import pandas as pd
import requests
import urllib
from random import random

In [2]:
# sparql.USER_AGENT = 'fnielsenbot, faan@dtu.dk'
service = sparql.Service("https://query.wikidata.org/bigdata/namespace/wdq/sparql")

In [3]:
statement = """
prefix pr: <http://www.wikidata.org/prop/reference/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX v: <http://www.wikidata.org/prop/statement/>
PREFIX q: <http://www.wikidata.org/prop/qualifier/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX prov: <http://www.w3.org/ns/prov#>

SELECT ?work ?work_label ?author ?author_label ?genre ?genre_label ?location ?location_label ?geo ?citat ?published_in_label WHERE {
  ?work wdt:P31/wdt:P279* wd:Q386724 . 
  ?work wdt:P50 ?author .
  ?work p:P840 ?location_statement .
  ?work rdfs:label ?work_label filter (lang(?work_label) = "da") .
  OPTIONAL { ?author rdfs:label ?author_label filter (lang(?author_label) = "da") . }
  ?location_statement v:P840 ?location .
  ?location wdt:P17 wd:Q35 .
  ?location wdt:P625 ?geo . 
  ?location rdfs:label ?location_label filter (lang(?location_label) = "da") .
  OPTIONAL { 
    ?work wdt:P136 ?genre .
    ?genre rdfs:label ?genre_label filter (lang(?genre_label) = "da") . 
  }
  OPTIONAL {
    ?location_statement prov:wasDerivedFrom ?ref .
    ?ref pr:P1683 ?citat .
  }
  OPTIONAL {
    ?work wdt:P1433 ?published_in .
    ?published_in rdfs:label ?published_in_label filter (lang(?published_in_label) = "da") .
  }  
}
"""

In [4]:
statement_libraries = """
prefix pr: <http://www.wikidata.org/prop/reference/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX v: <http://www.wikidata.org/prop/statement/>
PREFIX q: <http://www.wikidata.org/prop/qualifier/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX prov: <http://www.w3.org/ns/prov#>

SELECT ?library ?libraryLabel ?geo ?url ?address WHERE {
  ?library wdt:P31/wdt:P279* wd:Q2326815 .
  ?library wdt:P17 wd:Q35 .
  ?library wdt:P625 ?geo .
  OPTIONAL { ?library wdt:P969 ?address . }
  OPTIONAL { ?library wdt:P856 ?url . }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "da,en" . }
}
"""

In [5]:
result = service.query(statement, timeout=60)
df = pd.DataFrame(result.fetchall(), columns=result.variables)

In [6]:
result = service.query(statement_libraries)
libraries = pd.DataFrame(result.fetchall(), columns=result.variables)

In [7]:
df.shape

(664, 11)

In [8]:
df.head()

Unnamed: 0,work,work_label,author,author_label,genre,genre_label,location,location_label,geo,citat,published_in_label
0,http://www.wikidata.org/entity/Q21821244,Revyen,http://www.wikidata.org/entity/Q12324854,Lone Hørslev,http://www.wikidata.org/entity/Q49084,novelle,http://www.wikidata.org/entity/Q12326563,Mejdal,Point(8.65833 56.3514),"Nu sagde jeg til damen i bussen, at jeg kom fr...",Midtjyske fortællinger
1,http://www.wikidata.org/entity/Q21821251,Lovprisning i fuglelandet,http://www.wikidata.org/entity/Q4569333,Henning Mortensen,http://www.wikidata.org/entity/Q49084,novelle,http://www.wikidata.org/entity/Q21821282,Sondrup,Point(10.06884 55.89767),"Fra mit sted i verden som hedder Sondrup, hvor...",Midtjyske fortællinger
2,http://www.wikidata.org/entity/Q21821251,Lovprisning i fuglelandet,http://www.wikidata.org/entity/Q4569333,Henning Mortensen,http://www.wikidata.org/entity/Q49084,novelle,http://www.wikidata.org/entity/Q21821299,Banegårdsgade,Point(10.14937 55.97094),Vi kommer ind i rundkørslen og viere gennem by...,Midtjyske fortællinger
3,http://www.wikidata.org/entity/Q21821251,Lovprisning i fuglelandet,http://www.wikidata.org/entity/Q4569333,Henning Mortensen,http://www.wikidata.org/entity/Q49084,novelle,http://www.wikidata.org/entity/Q2350872,Hundslund Sogn,Point(10.0517 55.9153),"Det er søndag formiddag, og jeg kan svagt høre...",Midtjyske fortællinger
4,http://www.wikidata.org/entity/Q21821379,Omveje,http://www.wikidata.org/entity/Q21821375,Anita Lillevang,http://www.wikidata.org/entity/Q49084,novelle,http://www.wikidata.org/entity/Q2224124,Korning,Point(9.6994 55.8511),Korning Kro! Her har jeg været til fest engang...,Midtjyske fortællinger


In [9]:
df_backup = df.copy()

In [10]:
df = df_backup.copy()

In [11]:
def stringify(literal):
    if literal:
        if hasattr(literal, 'value'):
            return literal.value
        else:
            return literal
    else:
        return ''

In [12]:
# Fix values in elements
iri_columns = ['work', 'work_label', 'author', 'author_label', 'genre', 'genre_label', 'location', 'location_label', 'citat']
for column in df.columns:
    df.ix[:, column] = [stringify(element) for element in df.ix[:, column]]

In [13]:
for column in libraries.columns:
    libraries.ix[:, column] = [stringify(element) for element in libraries.ix[:, column]]

In [14]:
description_template = u"""
<p>{citat}</p>
<p><i>{author}: {title}</i></p>
<a href="https://bibliotek.dk/da/search/work?search_block_form={title_escaped}">Søg på bibliotek.dk efter bogen</a>
"""

libraries_template = u"""
<![CDATA[
<p>{address}</p>
<p>{url}</p>
]]>
"""

In [15]:
grouped = df.groupby('location')

In [16]:
location, df_location = list(grouped)[6]
df_location

Unnamed: 0,work,work_label,author,author_label,genre,genre_label,location,location_label,geo,citat,published_in_label
105,http://www.wikidata.org/entity/Q21707912,Biografi & Testamente,http://www.wikidata.org/entity/Q1159183,Dan Turèll,http://www.wikidata.org/entity/Q482,poesi,http://www.wikidata.org/entity/Q110289,Tivoli,Point(12.568333333 55.673611111),... og sét Pierrot og Columbine på Pantomimen ...,All time greatest hits


In [17]:
style = """
  <Style id="orangecy">
    <IconStyle>
      <Icon><href>kml/marker-icon-orangebrown.png</href></Icon>
      <hotSpot x="0.5" y="0" xunits="fraction" yunits="fraction"/>
    </IconStyle>
  </Style>
"""

In [18]:
kml = simplekml.Kml()

dark_style = simplekml.Style(iconstyle=simplekml.IconStyle(
        icon=simplekml.Icon(href="http://fnielsen.github.io/littar/kml/marker-icon-dark.png"),
        hotspot=simplekml.HotSpot(x=0.5, y=0, xunits='fraction', yunits='fraction')))
pinky_style = simplekml.Style(iconstyle=simplekml.IconStyle(
        icon=simplekml.Icon(href="http://fnielsen.github.io/littar/kml/marker-icon-pinky.png"),
        hotspot=simplekml.HotSpot(x=0.5, y=0, xunits='fraction', yunits='fraction')))
orangecy_style = simplekml.Style(iconstyle=simplekml.IconStyle(
        icon=simplekml.Icon(href="http://fnielsen.github.io/littar/kml/marker-icon-orangecy.png"),
        hotspot=simplekml.HotSpot(x=0.5, y=0, xunits='fraction', yunits='fraction')))
grey_style = simplekml.Style(iconstyle=simplekml.IconStyle(
        icon=simplekml.Icon(href="http://fnielsen.github.io/littar/kml/marker-icon-grey.png"),
        hotspot=simplekml.HotSpot(x=0.5, y=0, xunits='fraction', yunits='fraction')))
yellow_style = simplekml.Style(iconstyle=simplekml.IconStyle(
        icon=simplekml.Icon(href="http://fnielsen.github.io/littar/kml/marker-icon-yellow.png"),
        hotspot=simplekml.HotSpot(x=0.5, y=0, xunits='fraction', yunits='fraction')))
default_style = simplekml.Style(iconstyle=simplekml.IconStyle(
        icon=simplekml.Icon(href="http://fnielsen.github.io/littar/kml/marker-icon.png"),
        hotspot=simplekml.HotSpot(x=0.5, y=0, xunits='fraction', yunits='fraction')))


for location, works in grouped:
    if type(works) == pd.Series:
        works = pd.DataFrame(works).T
    location = works.location_label.iloc[0]
    if location in [u'København', 'Danmark']:
        continue
    coords = [float(coord) for coord in works.geo.iloc[0][6:-1].split()[::-1]]
    grouped_works = works.groupby('work')
    descriptions = ""
    for work, work_data in grouped_works:
        if type(work_data) == type(pd.Series):
            work_data = pd.DataFrame(work_data).T
        author = ", ".join(set(work_data.author_label))
        genres = set(work_data.genre_label)
        title = work_data.work_label.iloc[0]
        title_escaped = urllib.quote(title.encode('utf-8'))
        citat = work_data.citat.iloc[0]
        if citat:
            citat = u'»' + citat + u'«'
        description = description_template.format(
            author=author,
            title=title,
            title_escaped=title_escaped,
            citat=citat)
        descriptions += "<hr>" + description
    description = "<![CDATA[" + description + "]]>"
    pnt = kml.newpoint(
        name=location,
        coords=[coords],
        description=descriptions)
    if len(grouped_works) > 1:
        pnt.style = dark_style
    elif genres.intersection(['roman', 'novelle', 'eventyr', 'historisk roman', 'kriminalroman']):
        pnt.style = default_style
    elif genres.intersection(['biografi', 'selvbiografi', 'erindringer']):
        pnt.style = orangecy_style
    elif genres.intersection(['poesi']):
        pnt.style = yellow_style
    elif genres.intersection(['faglitteratur', 'true crime']):
        pnt.style = pinky_style
    else:
        print(genres, title)
        pnt.style = grey_style

(set([u'faktion']), u'Den Hemmelige Socialdemokrat')
(set([u'']), u'Forf\xf8rerens Dagbog')
(set([u'']), u'Rejsen til Jordens Indre')
(set([u'']), u'Forf\xf8rerens Dagbog')
(set([u'faktion']), u'Den Hemmelige Socialdemokrat')
(set([u'faktion']), u'Den Hemmelige Socialdemokrat')
(set([u'']), u'Beton - historier fra skurvognen')
(set([u'']), u'Kamp og sejr')
(set([u'']), u'Lego - Den danske ledelseskanon, 3: Globaliseringen af den gode id\xe9')
(set([u'']), u'Rejsen til Jordens Indre')
(set([u'']), u'Dragerkustens brudenat')
(set([u'']), u'Kamp og sejr')
(set([u'']), u'Kamp og sejr')
(set([u'']), u'Kneveren fra Tj\xe6rby')
(set([u'']), u'\xd8jenvidner 1914-1918 - s\xf8nderjyske soldaters beretninger')
(set([u'faktion']), u'Den Hemmelige Socialdemokrat')
(set([u'']), u'S\xe5dan er danskerne!')
(set([u'faktion']), u'Den Hemmelige Socialdemokrat')
(set([u'faktion']), u'Den Hemmelige Socialdemokrat')
(set([u'faktion']), u'Den Hemmelige Socialdemokrat')
(set([u'faktion']), u'Den Hemmelige Soc

In [19]:
kml.save("../../../Dropbox/Public/littar.kml")

In [20]:
def to_coord(value):
    return [float(element) for element in value.geo[6:-1].split()][::-1]

In [22]:
if True:
    for n, row in libraries.iterrows():
        name = row['libraryLabel']
        coords = row['geo'][6:-1].split()[::-1]
        url = stringify(row['url'])
        address = stringify(row['address'])
        description = libraries_template.format(url=url, address=address)
        pnt = kml.newpoint(
                name=name,
                coords=[coords],
                description=description)

In [23]:
for location, works in grouped:
    if type(works) == pd.Series:
        works = pd.DataFrame(works).T
    location = works.location_label.iloc[0]
    if location in [u'København', 'Danmark']:
        continue
    grouped_works = works.groupby('work')
    # print(len(grouped_works))

In [24]:
libraries_coords = np.array([to_coord(row) for index, row in libraries.iterrows()])
works_coords = np.array([to_coord(row) for index, row in df.iterrows()])

In [25]:
for n in range(libraries_coords.shape[0]):
    library_coord = libraries_coords[n, :]
    print('{} - {}, {}'.format(libraries.ix[n, 'libraryLabel'], library_coord[0], library_coord[1]))
    # distances = np.sum((works_coords - library_coord) ** 2, axis=1)
    distances = [vincenty(work_coord, library_coord).meters for work_coord in works_coords]
    indices = np.argsort(distances)
    libraries.ix[n, 'Minimum distance to work'] = distances[indices[0]]
    for m in range(3):
        a, b = df.ix[indices[m], ['work_label', 'location_label']].values.tolist()
        print("  {} - {} - {:f} km".format(a, b, distances[indices[m]] / 1000))

Bibliotekshuset - 55.650359, 12.6132581
  Planen - Urbanplanen - 1.179287 km
  Drabschefen - nye sager - Polensgade - 1.598576 km
  Drabschefen - nye sager - Polensgade - 1.598576 km
Sydhavnens Bibliotek - 55.6468105, 12.529623
  spindoktor - Sydhavnen - 0.225328 km
  spindoktor - Sydhavnen - 0.225328 km
  The Hornet's sting - Harald Jensens Gade - 0.533911 km
Tingbjerg Bibliotek - 55.718864, 12.481163
  Drabschefen - nye sager - Tingbjerg - 0.317672 km
  Drabschefen - nye sager - Tingbjerg - 0.317672 km
  Koordinater - Mosesvinget - 1.905758 km
Valby Bibliotek - 55.665354, 12.513072
  Attentatet - Pelargonievej - 0.563470 km
  Attentatet - Pelargonievej - 0.563470 km
  Attentatet - Pelargonievej - 0.563470 km
Kulturstationen Vanløse. Biblioteket - 55.686532, 12.491274
  Han fik morderne til at tilstå - Katrinedals Skole - 0.612625 km
  Han fik morderne til at tilstå - Dybendalsvej - 1.001613 km
  Dobbeltmordet på Peter Bangs Vej - sagen genoptaget - Peter Bangs Vej - 1.320854 km
Veste

In [26]:
libraries.sort_values(by='Minimum distance to work', ascending=False).head(20)

Unnamed: 0,library,libraryLabel,geo,url,address,Minimum distance to work
416,http://www.wikidata.org/entity/Q21106842,LollandBibliotekerne. Fejø bibliotekspunkt: L...,Point(11.4294413 54.9511807),,Herredsvej 259,26567.241794
281,http://www.wikidata.org/entity/Q21106935,Vester Skerninge Bibliotek,Point(10.4492831 55.0732223),,Fåborgvej 53,25221.804263
431,http://www.wikidata.org/entity/Q21106865,Servicepunkt Gedser,Point(11.9305815 54.5758245),,,25062.988187
414,http://www.wikidata.org/entity/Q21106839,LollandBibliotekerne. Horslunde bibliotekspunk...,Point(11.2170846 54.9101833),,Hovedgaden 6,23614.112018
96,http://www.wikidata.org/entity/Q21107216,Tarm Bibliotek,Point(8.516476 55.907727),,Engvej 2,22736.246244
370,http://www.wikidata.org/entity/Q21098860,Sejerø Bibliotek,Point(11.1434117 55.8797104),,Mastrupvej,22382.795968
407,http://www.wikidata.org/entity/Q21106828,Nykøbing Sj. Bibliotek,Point(11.6654828 55.9225568),,Vesterbro Torv,22168.086244
97,http://www.wikidata.org/entity/Q21107217,Videbæk Bibliotek,Point(8.629023 56.090198),,Fischersvej 6,21547.743951
95,http://www.wikidata.org/entity/Q21107215,Skjern Bibliotek,Point(8.4964523 55.9479462),,Engtoften 1,21528.804714
428,http://www.wikidata.org/entity/Q21106861,Sakskøbing Bibliotek,Point(11.6353457 54.7977338),,Rådhusgade 6,21310.000197
