In [1]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'
%pylab inline

from __future__ import division
import rdflib
from rdflib.graph import Graph, ConjunctiveGraph
from rdflib import Graph, URIRef, BNode, Literal
from rdflib import RDF
from rdflib import Namespace
from rdflib.namespace import OWL, RDF, RDFS, FOAF, XSD
from SPARQLWrapper import SPARQLWrapper, JSON
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry.multipolygon import MultiPolygon
from shapely import wkt
from descartes import PolygonPatch
from shapely.geometry.polygon import LinearRing
import seaborn as sns
import pandas as pd
from matplotlib.collections import PatchCollection

from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

Populating the interactive namespace from numpy and matplotlib


In [2]:
### Ontologies that already exists and we use them to define characteristcs such as population, name, etc
schema = rdflib.Namespace('http://schema.org/')  
time   = rdflib.Namespace('http://www.w3.org/2006/time#')
dbo    = rdflib.Namespace('http://dbpedia.org/ontology/')
dbp    = rdflib.Namespace('http://dbpedia.org/property/')
res    = rdflib.Namespace('http://dbpedia.org/resource/')
rdf    = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
rdfs   = rdflib.Namespace('http://www.w3.org/2000/01/rdf-schema#')

# From .csv to .rdf

In [10]:
### Load Data in .csv
path = 'C:\Users\massaro\Google Drive\Herus all new\Projects\Ontology\Joao'
df =  pd.read_csv( path+'\data_sample.csv' )

In [4]:
## we use rdflib library to create our dataset in an RDF format

g = Graph()  
joao =  Namespace("http://www.semanticweb.org/joaoOntology#")
g.bind("joao", joao)
# Create the graph 

In [5]:
co2Emission = URIRef(joao["co2Emission"])
# Add the OWL data to the graph
g.add((co2Emission, RDF.type, OWL.Class))
g.add((co2Emission, RDFS.subClassOf, OWL.Thing))
g.add((co2Emission, OWL.equivalentClass, RDF.Property))

In [6]:
for i in df.index:
    tmp = 'city%d'%i
    tmp = URIRef(joao[tmp])
    g.add((tmp, RDF.type, dbo.city))
    
    name = df.city[i]
    #name = Literal(name)
    nameC = name.replace(" ", "_")
    nameC = nameC.replace("'", "_")
    nameC = nameC.replace(",", "")
    nameC = nameC.replace(".", "")
    
    nameC = Literal(nameC)  ## we transform our data in a 'linkable' format
    g.add((tmp,  FOAF.name, nameC ))  ## we use fried of fried (FOAF) ontology to add name to the our cities
    
    
    emission = df.co2[i]
    emission = Literal(emission)    
    
    g.add((tmp, joao.co2Emission, emission ))
    
    population = int(df.population[i])
    population = Literal(population)    
    g.add((tmp, dbo.populationTotal, population ))
    

In [117]:
g.serialize(destination='outputJoao.nt', format='nt')

# Get Data from the web

In [289]:
sparql = SPARQLWrapper("http://factforge.net/repositories/ff-news")
query = """
PREFIX onto: <http://www.ontotext.com/>
PREFIX gn: <http://www.geonames.org/ontology#>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX wgs84_pos: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>


SELECT ?resource ?population ?code ?label ?country FROM onto:disable-sameAs

WHERE {
    #?resource a schema:Country.
    ?resource a dbo:Country.
    #?resource gn:featureClass  gn:P.
    #?resource gn:featureCode  gn:A.ADM3.
    #?resource gn:A.ADM1.
    ?resource dbo:populationTotal ?population.
    ?resource gn:featureCode ?code.
    OPTIONAL {
    ?resource geo:hasGeometry ?Geometry.
    ?resource geo:asWKT ?Coords.
    }
    ?resource rdfs:label ?label.
    #FILTER (?population > 10000000)
    FILTER (lang(?label) = 'en')
}
ORDER BY DESC(?population)
"""
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [290]:
for result in results["results"]["bindings"]:
    print result["label"]["value"], result["population"]["value"], result["code"]["value"]

China 1373541278 http://www.geonames.org/ontology#A.PCLI
India 1326572000 http://www.geonames.org/ontology#A.PCLI
European Union 510056011 http://www.geonames.org/ontology#A.ZN
United States 325145963 http://www.geonames.org/ontology#A.PCLI
Dutch East Indies 263510000 http://www.geonames.org/ontology#A.PCLI
Indonesia 263510000 http://www.geonames.org/ontology#A.PCLI
Brazil 207350000 http://www.geonames.org/ontology#A.PCLI
Pakistan 201995540 http://www.geonames.org/ontology#A.PCLI
Nigeria 188462640 http://www.geonames.org/ontology#A.PCLI
Bangladesh 163187000 http://www.geonames.org/ontology#A.PCLI
Russia 144498215 http://www.geonames.org/ontology#A.PCLI
Mexico 119530753 http://www.geonames.org/ontology#A.PCLI
Ethiopia 99465819 http://www.geonames.org/ontology#A.PCLI
Vietnam 92700000 http://www.geonames.org/ontology#A.PCLI
Congo - Kinshasa 82243000 http://www.geonames.org/ontology#A.PCLI
Democratic Republic of the Congo 82243000 http://www.geonames.org/ontology#A.PCLI
Germany 82175700 ht

In [223]:
sparql = SPARQLWrapper("http://factforge.net/repositories/ff-news")
query = """
PREFIX onto: <http://www.ontotext.com/>
PREFIX gn: <http://www.geonames.org/ontology#>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX wgs84_pos: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?geonameuri ?population ?citylabel ?country FROM onto:disable-sameAs
WHERE {
    #?loc gn:parentFeature dbr:Asia.
    ?loc gn:featureClass  gn:P.
    #?loc gn:featureCode gn:A.ADM3.
    ?loc gn:featureCode gn:P.PPL.
    ?loc owl:sameAs ?geonameuri.
    ?loc dbo:populationTotal ?population.
    ?loc dbo:country ?country .
    ?country a dbo:Country .
    ?loc rdfs:label ?citylabel.
    #?geonameuri wgs84_pos:lat ?latitude.
    FILTER(?population > 300000 )
    #FILTER (lang(?citylabel) = 'en')
    ?country skos:prefLabel ?country_name .  
} ORDER BY DESC(?population)

"""
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [201]:
for result in results["results"]["bindings"]:
    print result["citylabel"]["value"], result["population"]["value"], result["country"]["value"]

Telkabad 427234598036 http://dbpedia.org/resource/Iran
Clonmany 492582018 http://dbpedia.org/resource/Republic_of_Ireland
Dher Umid Ali Shah 184000000 http://dbpedia.org/resource/Pakistan
Odagaon 36000000 http://dbpedia.org/resource/India
Baoding 11194379 http://dbpedia.org/resource/China
Nanyang, Henan 10263660 http://dbpedia.org/resource/China
Linyi 10039440 http://dbpedia.org/resource/China
Weifang 9086241 http://dbpedia.org/resource/China
New York City 8175133 http://dbpedia.org/resource/United_States
Shangqiu 7362975 http://dbpedia.org/resource/China
Xingtai 7104103 http://dbpedia.org/resource/China
Shaoyang 7071741 http://dbpedia.org/resource/China
Yantai 6968202 http://dbpedia.org/resource/China
Yulin, Guangxi 6910000 http://dbpedia.org/resource/China
Erp, Netherlands 6743189 http://dbpedia.org/resource/Netherlands
Luoyang 6549941 http://dbpedia.org/resource/China
Xinyang 6109106 http://dbpedia.org/resource/China
Borno State 5925668 http://dbpedia.org/resource/Nigeria
Qujing 585

Batu Pahat (town) 417458 http://dbpedia.org/resource/Malaysia
Cilegon 416464 http://dbpedia.org/resource/Indonesia
Haldwani 416169 http://dbpedia.org/resource/India
Karji 415727 http://dbpedia.org/resource/India
Zhonghe District 414356 http://dbpedia.org/resource/Taiwan
Malkajgiri 413571 http://dbpedia.org/resource/India
Xinzhuang District 413443 http://dbpedia.org/resource/Taiwan
Kotputli 413256 http://dbpedia.org/resource/India
Kashiwa 411602 http://dbpedia.org/resource/Japan
Thoothukudi 410760 http://dbpedia.org/resource/India
Bellary 410445 http://dbpedia.org/resource/India
Betim 408448 http://dbpedia.org/resource/Brazil
Shikohabad 405472 http://dbpedia.org/resource/India
Ciudad Obregón 405000 http://dbpedia.org/resource/Mexico
Qatif 404182 http://dbpedia.org/resource/Saudi_Arabia
Purnia 404061 http://dbpedia.org/resource/India
Bawshar 402023 http://dbpedia.org/resource/Oman
English Bazar 400295 http://dbpedia.org/resource/India
Malé Dvorníky 400069 http://dbpedia.org/resource/Slov

In [148]:
sparql = SPARQLWrapper("http://factforge.net/repositories/ff-news")
query = """
PREFIX onto: <http://www.ontotext.com/>
PREFIX gn: <http://www.geonames.org/ontology#>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX wgs84_pos: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>

SELECT ?citylabel ? population FROM onto:disable-sameAs
WHERE {
    ?loc gn:parentFeature dbr:Eastern_Europe ; 
        gn:featureClass  gn:P.
    ?loc dbo:populationTotal ?population ; 
        dbo:country ?country .
        
    ?loc rdfs:label ?citylabel.
    ?country a dbo:Country .
    FILTER(?population > 300000 )

    ?country skos:prefLabel ?country_name .  
} ORDER BY DESC(?population)
"""
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

QueryBadFormed: QueryBadFormed: a bad request has been sent to the endpoint, probably the sparql query is bad formed. 

Response:
MALFORMED QUERY: Encountered " "?" "? "" at line 13, column 19.
Was expecting one of:
    "(" ...
    "{" ...
    "from" ...
    "where" ...
    <VAR1> ...
    <VAR2> ...
    

In [22]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
    PREFIX dbo:  <http://dbpedia.org/ontology/>
    PREFIX dbpedia: <http://dbpedia.org/resource/>
    PREFIX dbpprop: <http://dbpedia.org/property/>
    PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
    SELECT DISTINCT  ?pop ?lat ?lon ?areaTotal ?citylabel
    WHERE{ 
       ?city rdf:type dbo:PopulatedPlace.
       ?city geo:lat ?lat.
       ?city geo:lat ?lon.
       ?city dbo:postalCode ?zipCode.
       ?city dbo:populationTotal ?pop.
       ?city dbo:areaTotal ?areaTotal.
       ?city rdfs:label ?citylabel.
       ?city dbo:country ?country.
       FILTER (lang(?citylabel) = 'en')
    } LIMIT 100 
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [23]:
for result in results["results"]["bindings"]:
    print result["citylabel"]["value"], result["pop"]["value"]

Alto, Piedmont 113
Alur, Osmanabad 7931
Bana, Hungary 1752
Bevern, Schleswig-Holstein 587
Bevern (Samtgemeinde) 6528
Buda, Buzău 3179
Castelvecchio di Rocca Barbena 180
Cervino, Campania 5137
Clam Lake, Wisconsin 37
Clam Lake, Wisconsin 37
Clam Lake, Wisconsin 37
Clam Lake, Wisconsin 37
Delchevo, Blagoevgrad Province 49
Fuente-Olmedo 45
Galloway, West Virginia 143
Hranice (České Budějovice District) 220
Lancang Lahu Autonomous County 460976
Leyden Township, Cook County, Illinois 92890
Markt Sankt Martin 1167
Marz, Austria 1974
Middle Lake, Saskatchewan 242
Middle Lake, Saskatchewan 242
Middle Lake, Saskatchewan 242
Middle Lake, Saskatchewan 242
Němčice (Blansko District) 416
Němčice (Domažlice District) 130
Němčice (Kroměříž District) 373
Němčice (Prachatice District) 187
Němčice (Strakonice District) 110
Oak Lake, Manitoba 359
Olmedo, Sardinia 3041
Podmokly (Klatovy District) 166
Podmokly (Rokycany District) 272
Saint-Cyprien, Bas-Saint-Laurent, Quebec 1163
Saint-Cyprien, Chaudière-Ap

In [11]:
### Geonames
g = Graph()
path = 'C:\Users\massaro\Desktop'
g.parse(path+"\all-geonames.rdf", format="nt")

URLError: <urlopen error unknown url type: c>

In [None]:
sparql = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql")
query = """
PREFIX fn: <http://www.w3.org/2005/xpath-functions#>

SELECT ?Municipality ?Name ?WikidataURI ?GeoNamesURI WHERE {
    ?Municipality a <http://schema.org/AdministrativeArea> . #specify only the non-versioned entries.
    ?Municipality <http://schema.org/name> ?Name .
    ?Municipality a <http://www.geonames.org/ontology#A.ADM3> . #municipality only
    ?Municipality <https://ld.geo.admin.ch/def/bfsNumber> ?bfsNumber . #connect to the ?bfsNumber found in wikidata.
    {
    SELECT DISTINCT (xsd:integer(?bfs) AS ?bfsNumber) ?WikidataURI ?GeoNamesURI WHERE {
        SERVICE <https://query.wikidata.org/bigdata/namespace/wdq/sparql>
        {
            ?WikidataURI wdt:P771 ?bfs . #look for bfs number in WD
            ?WikidataURI wdt:P31 wd:Q70208 . #municipality of Switzerland
            ?WikidataURI wdt:P1566 ?GeoNamesId .        
        }
      BIND(IRI(CONCAT(STR("http://sws.geonames.org/"), ?GeoNamesId, "/")) AS ?GeoNamesURI)
    }
  }
}
ORDER BY ?Name
"""
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()