## Enriching the dataset with geographc locations based on Wikidata and GeoNames

In [24]:
import pandas as pd #for handling csv and csv contents
from rdflib import Graph, Literal, RDF, URIRef, Namespace #basic RDF handling

In [25]:
locationPattern = "http://example.org/location/"

In [26]:
df=pd.read_csv('data/movingImageArchive/MovingImageArchiveGeoNames.csv',sep=",",quotechar='"',dtype={'geonames': str})
print(df)

             Location       lat     long   wikidata geonames
0            Aberdeen  57.14369 -2.09814     Q36405  2657832
1       Aberdeenshire  57.16667 -2.66667    Q189912  2657830
2               Angus  56.66667 -2.91667    Q202177  2657306
3         Argyllshire  56.25000 -5.25000    Q652539  2657088
4            Ayrshire  55.50000 -4.50000    Q793283  2656700
5               Banff  57.66477 -2.52964     Q54809  2656402
6        Berwickshire  55.75000 -2.50000    Q786649  2655820
7             Borders       NaN      NaN   Q9177476      NaN
8                Bute  55.83333 -5.10000   Q1147435  2654168
9           Caithness  58.41667 -3.50000    Q864668  2654041
10   Clackmannanshire  56.16667 -3.75000    Q207268  2652975
11      Dumfriesshire  55.16667 -3.50000   Q1247384  2650795
12     Dunbartonshire  56.12639 -4.42069  Q17582129  7280022
13             Dundee  56.46913 -2.97489    Q123709  2650752
14       East Lothian  55.91667 -2.75000    Q207257  2650386
15          Edinburgh  5

In [27]:
g = Graph()

owl = Namespace('http://www.w3.org/2002/07/owl#')
wgs84_pos = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
schema = Namespace("http://schema.org/")
edm = Namespace("http://www.europeana.eu/schemas/edm/")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
dcterms = Namespace("http://purl.org/dc/terms/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")

g.bind("owl", owl)
g.bind("wgs84_pos", wgs84_pos)
g.bind("schema", schema)
g.bind("edm", edm)
g.bind("rdfs", rdfs)
g.bind("rdf", rdf)
g.bind("dcterms", dcterms)
g.bind("skos", skos)

In [28]:
for index, row in df.iterrows():

    # add types
    g.add((URIRef(locationPattern + (row['Location'].lower().replace(" ", ""))), URIRef(rdf+"type"), URIRef(edm + "Place") ))
    g.add((URIRef(locationPattern + row['Location'].lower().replace(" ", "")), URIRef(rdf+'type'), URIRef(schema + "Place") ))
    
    ## add preflabel
    g.add((URIRef(locationPattern + row['Location'].lower().replace(" ", "")), URIRef(skos+'prefLabel'), Literal(row["Location"], "en") ))
    
    ## add lat and long
    g.add((URIRef(locationPattern + row['Location'].lower().replace(" ", "")), URIRef(wgs84_pos+'lat'), Literal(str(row["lat"])) ))
    g.add((URIRef(locationPattern + row['Location'].lower().replace(" ", "")), URIRef(wgs84_pos+'long'), Literal(str(row["long"]))))
    
    if not pd.isnull(row['wikidata']):
        g.add((URIRef(locationPattern + row['Location'].lower().replace(" ", "")), URIRef(owl+'sameAs'), URIRef('https://www.wikidata.org/wiki/' + str(row['wikidata']))))
    if not pd.isnull(row['geonames']):   
        g.add((URIRef(locationPattern + row['Location'].lower().replace(" ", "")), URIRef(owl+'sameAs'), URIRef('https://www.geonames.org/' + str(row['geonames']))))

In [29]:
print(g.serialize(format='turtle'))

g.serialize('rdf/locations.rdf',format='xml')

@prefix edm: <http://www.europeana.eu/schemas/edm/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix schema: <http://schema.org/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wgs84_pos: <http://www.w3.org/2003/01/geo/wgs84_pos#> .

<http://example.org/location/aberdeen> a schema:Place,
        edm:Place ;
    owl:sameAs <https://www.geonames.org/2657832>,
        <https://www.wikidata.org/wiki/Q36405> ;
    wgs84_pos:lat "57.14369" ;
    wgs84_pos:long "-2.09814" ;
    skos:prefLabel "Aberdeen"@en .

<http://example.org/location/aberdeenshire> a schema:Place,
        edm:Place ;
    owl:sameAs <https://www.geonames.org/2657830>,
        <https://www.wikidata.org/wiki/Q189912> ;
    wgs84_pos:lat "57.16667" ;
    wgs84_pos:long "-2.66667" ;
    skos:prefLabel "Aberdeenshire"@en .

<http://example.org/location/angus> a schema:Place,
        edm:Place ;
    owl:sameAs <https://www.geonames.org/2657306>,
        <https://www.wikidata.org/wiki/Q202177> ;
    wgs

<Graph identifier=N1f4ea7c06ac14540a064139b1e9e09c0 (<class 'rdflib.graph.Graph'>)>

In [30]:
g = Graph()
g.parse("rdf/locations.rdf")
g.parse("rdf/movingImageArchive.rdf")
g.serialize('rdf/movingImageArchiveEnriched.rdf',format='xml')

<Graph identifier=N5a83aafdf96f43bb85749582e3ddda70 (<class 'rdflib.graph.Graph'>)>