In [1]:
import osmium
import numpy as np
import pandas as pd
import re
import sys
import urllib
import time

In [2]:
from rdflib import Graph, Namespace, URIRef, BNode, Literal
from rdflib.namespace import RDF, FOAF, XSD

In [6]:
start = time.time()
class osm2rdf_handler(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self)    
        self.counts=0
        self.g = Graph()
        self.graph = self.g
        self.wd = Namespace("http://www.wikidata.org/wiki/")
        self.g.bind("wd", self.wd)
        self.wdt = Namespace("http://www.wikidata.org/prop/direct/")
        self.g.bind("wdt", self.wdt)
        self.wkg = Namespace("http://worldkg.l3s.uni-hannover.de/resource/")
        self.g.bind("wkg", self.wkg)
        self.wkgs = Namespace("http://worldkg.l3s.uni-hannover.de/schema/")
        self.g.bind("wkgs", self.wkgs)
        self.geo = Namespace("http://www.opengis.net/ont/geosparql#")
        self.g.bind("geo", self.geo)
        self.rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#')
        self.g.bind("rdfs", self.rdfs)
        self.rdf = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
        self.g.bind('rdf',self.rdf)
        self.ogc=Namespace("http://www.opengis.net/rdf#")
        self.g.bind('ogc',self.ogc)
        self.sf = Namespace("http://www.opengis.net/ont/sf#")
        self.g.bind('sf', self.sf)
        self.supersub = pd.read_csv('OSM_Ontology_map_features.csv', sep='\t', encoding='utf-8')
        self.key_list = pd.read_csv('Key_List.csv', sep='\t', encoding='utf-8')
        self.key_list = list(self.key_list['key'])
        self.supersub = self.supersub.drop_duplicates()
        self.dict_class = self.supersub.groupby('key')['value'].apply(list).reset_index(name='subclasses').set_index('key').to_dict()['subclasses']
    
    def printTriple(self, s, p, o):
        if p in self.dict_class: 
            if o in self.dict_class[p]:
                rel = URIRef('http://worldkg.l3s.uni-hannover.de/resource/' + s)
                instanceOf = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
                res = URIRef('http://worldkg.l3s.uni-hannover.de/schema/' + o)
                self.g.add((rel, instanceOf , res))
            if o == 'Yes':
                rel = URIRef('http://worldkg.l3s.uni-hannover.de/resource/' + s)
                instanceOf = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
                res = URIRef('http://worldkg.l3s.uni-hannover.de/schema/' + p)
                self.g.add((rel, instanceOf , res))
        else:
            if p=='Point':
                sub = URIRef('http://worldkg.l3s.uni-hannover.de/resource/' + s)
                prop = URIRef('http://www.opengis.net/ont/sf#Point')
                geom = URIRef("http://www.opengis.net/rdf#hasGeometry")
                typ = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
                self.g.add((sub, typ, prop))
                self.g.add((sub, self.geo["asWKT"], Literal(o, datatype=self.geo.wktLiteral)))
            elif p == 'name':
                sub = URIRef('http://worldkg.l3s.uni-hannover.de/resource/' + s)
                prop = URIRef('http://www.w3.org/2000/01/rdf-schema#label')
                self.g.add((sub, prop, Literal(o)))
            elif p == 'wikidata':
                sub = URIRef('http://worldkg.l3s.uni-hannover.de/resource/' + s)
                prop = URIRef("http://worldkg.l3s.uni-hannover.de/schema/" + p)
                obj = URIRef('http://www.wikidata.org/wiki/' + o)
                self.g.add((sub, prop, obj))
            elif p == 'wikipedia':
                sub = URIRef('http://worldkg.l3s.uni-hannover.de/resource/' + s)
                prop = URIRef("http://worldkg.l3s.uni-hannover.de/schema/wikipedia" )
                country = o.split(':')[0]
                ids = urllib.parse.quote(o.split(':')[1])
                url = 'https://'+country+'.wikipedia.org/wiki/'+country+':'+ids
                obj = URIRef(url)
                self.g.add((sub, prop, obj))
            else:
                if p in self.key_list:
                    sub = URIRef('http://worldkg.l3s.uni-hannover.de/resource/' + s)
                    prop = URIRef("http://worldkg.l3s.uni-hannover.de/schema/" + p)
                    self.g.add((sub, prop, Literal(o)))
        
    def __close__(self):
        self.g.serialize('TriplesNetherlandstest.ttl',format="turtle", encoding = "utf-8" )
        print(str(self.counts))

    def node(self, n):
        lat = str(n.location.lat)
        lon = str(n.location.lon)
        id = str(n.id)



        point = 'Point('+str(n.location.lat)+' '+str(n.location.lon)+')'

        self.printTriple(id, "lat", lat)
        self.printTriple(id, "long", lon)
        self.printTriple(id, "Point", point)


        for k,v in n.tags:
            val = str(v)

            val=val.replace("\\", "\\\\")
            val=val.replace('"', '\\"')
            val=val.replace('\n', " ")

            k = k.replace(" ", "")

            self.printTriple(id, k, val)
h = osm2rdf_handler()
h.apply_file('bremen-latest.osm.pbf')
h.graph.serialize('Triples1.ttl',format="turtle", encoding = "utf-8" )
end = time.time()
print(end - start)

574.8360550403595
