Runeberg
========

In [1]:
from everything import *
from dasem.runeberg import Runeberg

In [2]:
runeberg = Runeberg()

In [3]:
catalogue = runeberg.catalogue()

In [4]:
catalogue.ix[catalogue.language == 'dk', :].head(15)

Unnamed: 0,author,author_id,book_id,language,title,type,year
21,Alexander von Humboldt,humboale,eurasrus,dk,A. v. Humboldts Reiser i det Europæiske og Asi...,Book,1856
27,Hans Christian Ørsted,orstehan,aanden,dk,Aanden i Naturen,Book,1850
29,Karl Gjellerup,gjelleka,aanderotid,dk,Aander og Tider. Et Rekviem over Charles Darwin,Book,1882
30,Frederik Dreier,dreiefre,aandetroen,dk,Aandetroen og Den frie Tænkning,Book,1852
31,Emil Aarestrup,aarestru,aarestrups,dk,Aarestrups samlede digte,Book,1913
40,Adam Bremensis,adambrem,adamafbrem,dk,Adam af Bremen om Menigheden i Norden under Er...,Book,1862
41,Gustav Wied,wiedgust,adelgejs,dk,"Adel, Gejstlighed, Borger og Bonde",Book,1897
76,Christian Frederik Emil Horneman,hornechr,aladdinev,dk,Aladdin. Eventyr-Opera i 4e Akter,Music,1893
86,Jens Zetlitz,zetlijen,alkoran,dk,Alkoran d. e. Capitlernes Bog for 1806,Book,1806
99,Thomas Hansen Erslew,ersletho,almiforf,dk,Almindeligt Forfatter-Lexicon for Kongeriget D...,Book,1843-1868


In [5]:
query = """
SELECT ?author ?authorLabel ?runeberg WHERE {
  ?author wdt:P3154 ?runeberg .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "da". }  
} 
"""
service = sparql.Service('https://query.wikidata.org/sparql', method="GET")
response = service.query(query)
wikidata = pd.DataFrame(response.fetchall(), columns=response.variables)

In [6]:
wikidata.head()

Unnamed: 0,author,authorLabel,runeberg
0,http://www.wikidata.org/entity/Q991,Fjodor Dostojevskij,dostofjo
1,http://www.wikidata.org/entity/Q5679,Lord Byron,byrongeo
2,http://www.wikidata.org/entity/Q211039,Jeppe Aakjær,aakjajep
3,http://www.wikidata.org/entity/Q347953,Steen Steensen Blicher,blicher
4,http://www.wikidata.org/entity/Q331893,N.F.S. Grundtvig,grundtvg


In [7]:
wikidata.runeberg = wikidata.runeberg.astype(str)
wikidata.author = wikidata.author.astype(str)
wikidata.authorLabel = wikidata.authorLabel.astype(unicode)
wikidata.rename(columns={'runeberg': 'author_id'}, inplace=True)
wikidata.head()

Unnamed: 0,author,authorLabel,author_id
0,http://www.wikidata.org/entity/Q991,Fjodor Dostojevskij,dostofjo
1,http://www.wikidata.org/entity/Q5679,Lord Byron,byrongeo
2,http://www.wikidata.org/entity/Q211039,Jeppe Aakjær,aakjajep
3,http://www.wikidata.org/entity/Q347953,Steen Steensen Blicher,blicher
4,http://www.wikidata.org/entity/Q331893,N.F.S. Grundtvig,grundtvg


In [8]:
def aggregator(elements):
    return u",".join(set(elements))

catalogue_authors = catalogue.groupby('author_id', as_index=False).agg(aggregator)

In [9]:
catalogue_authors.head()

Unnamed: 0,author_id,author,book_id,language,title,type,year
0,,,"trefinska,stud1840,stud1845,nordutid,univhus,v...","fr,dk,no,ee,is,de,p>,it,us,fi,se,fo","Vetenskapen och livet,Boken om Stockholm i ord...","Music,Book,Administrative,Periodical",",1853-1857,1903-1931,1857,1900-1913,1772-1773,..."
1,aakjajep,Jeppe Aakjær,regnebraet,dk,Mit Regnebræt En Selvbibliografi,Book,1919
2,aalljaco,Jacob Aall,norge-1815,no,Erindringer som Bidrag til Norges Historie fra...,Book,1844
3,aaloelev,Ülev Aaloe,svet1979,se,Svensk-estnisk ordbok : Rootsi-eesti sõnaraamat,Book,1979
4,aarestru,Emil Aarestrup,"efterladte,eenrum,aarestrups",dk,"Efterladte Digter,Naar jeg i eenrum færdes,Aar...",Book,",1913,1863"


In [10]:
merged = pd.merge(catalogue_authors, wikidata, on='author_id', how='outer', suffixes=('_runeberg', '_wikidata'))
merged.ix[:, ['author_id', 'author_runeberg', 'authorLabel', 'author_wikidata']]

Unnamed: 0,author_id,author_runeberg,authorLabel,author_wikidata
0,,,,
1,aakjajep,Jeppe Aakjær,Jeppe Aakjær,http://www.wikidata.org/entity/Q211039
2,aalljaco,Jacob Aall,,
3,aaloelev,Ülev Aaloe,,
4,aarestru,Emil Aarestrup,Emil Aarestrup,http://www.wikidata.org/entity/Q350408
5,aaseniva,Ivar Aasen,,
6,aasvehaa,Haakon Aasvejen,,
7,abergjon,Jon Olof Åberg,,
8,aberssim,Simon Aberstén,,
9,abrahsel,Selma Abrahamsson,,


In [11]:
# How many authors in Danish?
merged.ix[merged.language=='dk', :].shape[0]

120

In [12]:
danish_merged = merged.ix[merged.language=='dk', ['author_id', 'author_runeberg', 'authorLabel', 'author_wikidata']]
pd.set_option('display.max_rows', len(danish_merged))
danish_merged

Unnamed: 0,author_id,author_runeberg,authorLabel,author_wikidata
1,aakjajep,Jeppe Aakjær,Jeppe Aakjær,http://www.wikidata.org/entity/Q211039
4,aarestru,Emil Aarestrup,Emil Aarestrup,http://www.wikidata.org/entity/Q350408
11,adambrem,Adam Bremensis,Adam af Bremen,http://www.wikidata.org/entity/Q109244
19,aggerpau,Pauline Aggersborg,Pauline Kristine Aggersborg,http://www.wikidata.org/entity/Q28867605
60,anderant,Anton Andersen,Anton Andersen,http://www.wikidata.org/entity/Q26839026
79,ankjaer,Stefan Ankjær,Stefan Ankjær,http://www.wikidata.org/entity/Q12337029
109,bajerfre,Fredrik Bajer,Fredrik Bajer,http://www.wikidata.org/entity/Q191016
119,baudisop,Sophus Bauditz,Sophus Bauditz,http://www.wikidata.org/entity/Q5567266
120,becketyg,Tyge Becker,Tyge Becker,http://www.wikidata.org/entity/Q5568339
138,berggand,Andreas Peter Berggreen,Andreas Peter Berggreen,http://www.wikidata.org/entity/Q499921
