# Interrogation du point d'accès SPARQL de DBPedia (en anglais)

Dans cet exemple, après avoir interrogé le point d'accès SPARQL de lDBPedia, on extrait des données ...


On créer également un document CSV exportable et analysable avec d'autres outils (un tableur par ex.).


## Importer les librairies à utiliser

In [None]:
from SPARQLWrapper import SPARQLWrapper, SPARQLWrapper2, JSON, TURTLE, XML, RDFXML
import pprint
import csv
# from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

from collections import Counter
from operator import itemgetter
import pandas as pd
import sqlite3 as sql


In [2]:
## fonction générique permettant de transformer en liste les résultats d'une requête


def sparql_result_to_list (result, limit, *variables):
    
    i = 0
    result_l = []
    for l in result['results']['bindings']:
        if i < limit:
            l_line = []
            for v in variables[0]:
                l_line.append(l[v]['value'])
                        
        result_l.append(l_line)
        i += 1
        
    return result_l        

## Explorer Wikipedia / DBPedia

DBPedia est une extraction de données semistructurées et structurées à partir des Infobox et du résumé des pages de Wikipedia.

Les versions de DBPedia sont liées aux versions linguistiques de Wikipedia et dépendent de l'activité de 'chapitres' nationaux DBPedia. Il existe une communauté et version francophone.

Nous explorerons ici la version anglaise qui est la plus riche mais les différentes versions sont connectées, et on peut profiter de ces versions.

### Wikipedia: Les économistes, page "Economist"

*  https://en.wikipedia.org/wiki/Economist
*  https://en.wikipedia.org/wiki/Economics


### DBPedia : Economist

*  https://dbpedia.org/page/Economist
*  https://dbpedia.org/ontology/Economist

*  https://dbpedia.org/page/Economics


## DBPedia SPARQL Endpoint

https://dbpedia.org/sparql

Documentation : [SPARQL 1.1 Query Language](http://www.w3.org/TR/sparql11-query/)

### Keynes dans DBPedia

*  https://en.wikipedia.org/wiki/John_Maynard_Keynes
*  https://dbpedia.org/page/John_Maynard_Keynes



In [14]:
sparql = SPARQLWrapper("https://dbpedia.org/sparql") ##, returnFormat=RDFXML)

In [33]:
q_keynes = """

PREFIX dbpedia: <http://dbpedia.org/resource/>
SELECT * WHERE {
dbpedia:John_Maynard_Keynes ?p ?o
}

"""

In [34]:
sparql.setQuery(q_keynes)
sparql.setReturnFormat(JSON)
rc = sparql.queryAndConvert()
# type(rc)

In [35]:
# Nombre de lignes du résultat
len(rc['results']['bindings'])

937

In [36]:
# Inspecter les trois premières lignes
i = 0
for l in rc['results']['bindings']:
    if i < 3:
        print(l)
        i += 1

{'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}, 'o': {'type': 'uri', 'value': 'http://www.w3.org/2002/07/owl#Thing'}}
{'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}, 'o': {'type': 'uri', 'value': 'http://xmlns.com/foaf/0.1/Person'}}
{'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}, 'o': {'type': 'uri', 'value': 'http://dbpedia.org/ontology/Person'}}


In [37]:
variables = ('p', 'o')

In [38]:
r = [print(l) for l in sparql_result_to_list(rc, 1000, variables)]

['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://www.w3.org/2002/07/owl#Thing']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://xmlns.com/foaf/0.1/Person']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://dbpedia.org/ontology/Person']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://schema.org/Person']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://dbpedia.org/class/yago/Object100002684']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://dbpedia.org/class/yago/Official110372373']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://dbpedia.org/class/yago/Organism100004475']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://dbpedia.org/class/yago/Peer109626238']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://dbpedia.org/class/yago/Person100007846']
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://dbpedia.org/class/yago/Philanthropist110421956']
['http://www.w3.org/1999/02/22-rdf-

## Effectifs et propriétés de 'economist' et 'economics'

In [14]:
sparql = SPARQLWrapper("https://dbpedia.org/sparql") ##, returnFormat=RDFXML)

In [4]:
q_economist_1 = """
PREFIX  dbo:  <http://dbpedia.org/ontology/>
PREFIX  dbr:  <http://dbpedia.org/resource/>

SELECT DISTINCT ?p ?direction (count(*) AS ?freq)
WHERE
  {   { SELECT  ?p ("out" AS ?direction)
        WHERE
          { dbo:Economist
                      ?p  ?o
          }
      }
    UNION
      { SELECT  ?p ?direction
        WHERE
          { ?s  ?p  dbo:Economist
            BIND("in" AS ?direction)
          }
      }
  }
GROUP BY ?p ?direction
ORDER BY DESC(?freq)
"""

In [None]:
q_economist_2 = """
PREFIX  dbo:  <http://dbpedia.org/ontology/>
PREFIX  dbr:  <http://dbpedia.org/resource/>

SELECT  ?p ?direction (count(*) AS ?freq)
WHERE
  {   { SELECT  ?p ("out" AS ?direction)
        WHERE
          { dbr:Economist
                      ?p  ?o
          }
      }
    UNION
      { SELECT  ?p ?direction
        WHERE
          { ?s  ?p  dbr:Economist
            BIND("in" AS ?direction)
          }
      }
  }
GROUP BY ?p ?direction
ORDER BY DESC(?freq)

"""

In [None]:
q_economist_3 = """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?p ?direction (COUNT(*) AS ?freq)
WHERE {
{
SELECT ?p ('out' as ?direction)
WHERE {
       dbr:Economics ?p ?o .
}
}
UNION
{
SELECT ?p  ?direction
WHERE {
       ?s ?p dbr:Economics .
BIND ('in' as ?direction)
}

}
}
GROUP BY ?p ?direction
ORDER BY DESC(?freq)
"""

In [6]:
q_economist_4 = """
PREFIX  dbo:  <http://dbpedia.org/ontology/>
PREFIX  dbr:  <http://dbpedia.org/resource/>
PREFIX  foaf: <http://xmlns.com/foaf/0.1/>

SELECT DISTINCT ?p ?direction (count(*) AS ?freq)
WHERE
  {   { SELECT  ?p ?direction
        WHERE
          { ?person  a                     dbo:Economist ;
                     ?p                    ?o
            BIND("out" AS ?direction)
          }
      }
    UNION
      { SELECT  ?p ?direction
        WHERE
          { ?person  a                     dbo:Economist .
            ?s       ?p                    ?person
            BIND("in" AS ?direction)
          }
      }
  }
GROUP BY ?p ?direction
ORDER BY DESC(?freq)
"""

In [20]:
q_economist_4_1 = """
PREFIX  dbo:  <http://dbpedia.org/ontology/>
PREFIX  dbr:  <http://dbpedia.org/resource/>
PREFIX  foaf: <http://xmlns.com/foaf/0.1/>

SELECT DISTINCT ?p ?direction ?freq
WHERE
  {   { SELECT  ?p ?direction (count(*) AS ?freq)
        WHERE
          { ?person  a                     dbo:Economist ;
                     ?p                    ?o
            BIND("out" AS ?direction)
          }
         GROUP BY ?p ?direction    
      }
    UNION
      { SELECT  ?p ?direction (count(*) AS ?freq)
        WHERE
          { ?person  a                     dbo:Economist .
            ?s       ?p                    ?person
            BIND("in" AS ?direction)
          }          
         GROUP BY ?p ?direction
      }
  }
ORDER BY DESC(?freq)       
"""

In [None]:
query_5 = """
PREFIX  dbo:  <http://dbpedia.org/ontology/>
PREFIX  dbr:  <http://dbpedia.org/resource/>
PREFIX dbp: <http://dbpedia.org/property/>
PREFIX  foaf: <http://xmlns.com/foaf/0.1/>

SELECT  ?person ?birthDate
WHERE
{
{ ?person  a                     dbo:Economist ;
   dbp:birthDate ?birthDate.    
# FILTER regex(?birthDate, '\\d{4}')       
FILTER (?birthDate > 1800)
      }

  }
ORDER BY ?birthDate
LIMIT 100
"""

In [24]:
sparql.setQuery(q_economist_4_1)
sparql.setReturnFormat(JSON)
rc = sparql.queryAndConvert()
# type(rc)

In [25]:
# Nombre de lignes du résultat
len(rc['results']['bindings'])

311

In [26]:
# Inspecter les trois premières lignes
i = 0
for l in rc['results']['bindings']:
    if i < 3:
        print(l)
        i += 1

{'p': {'type': 'uri', 'value': 'http://dbpedia.org/ontology/wikiPageWikiLink'}, 'direction': {'type': 'literal', 'value': 'out'}, 'freq': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '67345'}}
{'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}, 'direction': {'type': 'literal', 'value': 'out'}, 'freq': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '45350'}}
{'p': {'type': 'uri', 'value': 'http://dbpedia.org/ontology/wikiPageWikiLink'}, 'direction': {'type': 'literal', 'value': 'in'}, 'freq': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '35420'}}


In [27]:
variables = ('direction', 'p', 'freq')

In [30]:
r = [print(l) for l in sparql_result_to_list(rc, 400, variables)]

['out', 'http://dbpedia.org/ontology/wikiPageWikiLink', '67345']
['out', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', '45350']
['in', 'http://dbpedia.org/ontology/wikiPageWikiLink', '35420']
['out', 'http://www.w3.org/2002/07/owl#sameAs', '25691']
['out', 'http://purl.org/dc/terms/subject', '16596']
['out', 'http://dbpedia.org/property/wikiPageUsesTemplate', '13370']
['out', 'http://www.w3.org/2000/01/rdf-schema#label', '7992']
['out', 'http://www.w3.org/2000/01/rdf-schema#comment', '7944']
['out', 'http://dbpedia.org/ontology/abstract', '7944']
['out', 'http://dbpedia.org/ontology/wikiPageExternalLink', '7654']
['out', 'http://dbpedia.org/property/almaMater', '2426']
['out', 'http://dbpedia.org/ontology/almaMater', '2411']
['in', 'http://dbpedia.org/ontology/wikiPageRedirects', '2134']
['out', 'http://dbpedia.org/property/field', '1962']
['out', 'http://dbpedia.org/ontology/field', '1869']
['out', 'http://dbpedia.org/property/institution', '1813']
['out', 'http://dbpedia.org/ont

In [31]:
len(r)

311

### Résultats

Requêtes effectuées le 21 février 2021

#### Economist (ontology) : https://dbpedia.org/ontology/Economist
<code>
['in', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', '1570']
['out', 'http://www.w3.org/2000/01/rdf-schema#label', '18']
['out', 'http://www.w3.org/2000/01/rdf-schema#comment', '6']
['out', 'http://www.w3.org/2002/07/owl#equivalentClass', '2']
['out', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', '2']
['out', 'http://www.w3.org/ns/prov#wasDerivedFrom', '2']
['out', 'http://www.w3.org/2000/01/rdf-schema#subClassOf', '2']
['out', 'http://www.w3.org/2007/05/powder-s#describedby', '1']
['in', 'http://open.vocab.org/terms/defines', '1']
['out', 'http://www.w3.org/2000/01/rdf-schema#isDefinedBy', '1']
['in', 'http://open.vocab.org/terms/describes', '1']
    </code>


#### Economist (resource): https://dbpedia.org/resource/Economist

<code>
['in', 'http://dbpedia.org/ontology/wikiPageWikiLink', '3667']
['in', 'http://dbpedia.org/ontology/occupation', '380']
['in', 'http://dbpedia.org/ontology/profession', '356']
['in', 'http://dbpedia.org/property/profession', '314']
['in', 'http://dbpedia.org/property/occupation', '248']
['out', 'http://dbpedia.org/ontology/wikiPageWikiLink', '91']
['out', 'http://www.w3.org/2002/07/owl#sameAs', '61']
['in', 'http://dbpedia.org/ontology/academicDiscipline', '23']
['out', 'http://www.w3.org/2000/01/rdf-schema#label', '21']
['out', 'http://www.w3.org/2000/01/rdf-schema#comment', '20']
['out', 'http://dbpedia.org/ontology/abstract', '20']
['in', 'http://dbpedia.org/property/field', '19']
['out', 'http://dbpedia.org/property/wikiPageUsesTemplate', '15']
['in', 'http://dbpedia.org/ontology/field', '9']
['in', 'http://dbpedia.org/property/fields', '8']
['in', 'http://dbpedia.org/ontology/knownFor', '5']
['in', 'http://dbpedia.org/property/discipline', '4']
['in', 'http://dbpedia.org/ontology/wikiPageRedirects', '3']
['out', 'http://purl.org/dc/terms/subject', '3']
['out', 'http://dbpedia.org/ontology/wikiPageExternalLink', '2']
['in', 'http://dbpedia.org/property/schoolTradition', '2']
['out', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', '2']
['in', 'http://dbpedia.org/ontology/education', '2']
['out', 'http://dbpedia.org/ontology/wikiPageRevisionID', '1']
['out', 'http://dbpedia.org/ontology/thumbnail', '1']
['out', 'http://dbpedia.org/ontology/wikiPageID', '1']
['out', 'http://purl.org/dc/terms/isPartOf', '1']
['in', 'http://dbpedia.org/property/office', '1']
['in', 'http://dbpedia.org/ontology/institution', '1']
['in', 'http://dbpedia.org/property/knownFor', '1']
['in', 'http://dbpedia.org/property/contributions', '1']
['out', 'http://xmlns.com/foaf/0.1/isPrimaryTopicOf', '1']
['in', 'http://dbpedia.org/property/otheroccupation', '1']
['in', 'http://dbpedia.org/ontology/influencedBy', '1']
['in', 'http://xmlns.com/foaf/0.1/primaryTopic', '1']
['in', 'http://dbpedia.org/ontology/wikiPageDisambiguates', '1']
['in', 'http://dbpedia.org/property/laterwork', '1']
['in', 'http://dbpedia.org/ontology/discipline', '1']
['out', 'http://www.w3.org/ns/prov#wasDerivedFrom', '1']
['in', 'http://dbpedia.org/property/education', '1']
['out', 'http://www.w3.org/2000/01/rdf-schema#seeAlso', '1']
['out', 'http://xmlns.com/foaf/0.1/depiction', '1']
['out', 'http://dbpedia.org/ontology/wikiPageLength', '1']
['out', 'http://www.w3.org/2004/02/skos/core#closeMatch', '1']

</code>    




#### Economics (https://dbpedia.org/resource/Economics)


<code>
['in', 'http://dbpedia.org/ontology/wikiPageWikiLink', '8925']
['out', 'http://dbpedia.org/ontology/wikiPageWikiLink', '507']
['in', 'http://dbpedia.org/ontology/academicDiscipline', '356']
['in', 'http://dbpedia.org/property/field', '230']
['in', 'http://dbpedia.org/property/discipline', '175']
['in', 'http://dbpedia.org/ontology/field', '128']
['out', 'http://www.w3.org/2002/07/owl#sameAs', '115']
['in', 'http://dbpedia.org/property/fields', '86']
['in', 'http://dbpedia.org/property/subject', '67']
['in', 'http://dbpedia.org/ontology/nonFictionSubject', '61']
['in', 'http://dbpedia.org/ontology/education', '53']
['out', 'http://dbpedia.org/property/wikiPageUsesTemplate', '44']
['in', 'http://dbpedia.org/ontology/wikiPageRedirects', '43']
['in', 'http://dbpedia.org/property/genre', '30']
['in', 'http://dbpedia.org/ontology/mainInterest', '29']
['in', 'http://dbpedia.org/property/mainInterests', '29']
['in', 'http://dbpedia.org/ontology/literaryGenre', '26']
['out', 'http://dbpedia.org/ontology/wikiPageExternalLink', '25']
['out', 'http://www.w3.org/2000/01/rdf-schema#comment', '22']
['out', 'http://dbpedia.org/ontology/abstract', '22']
['out', 'http://www.w3.org/2000/01/rdf-schema#label', '22']
['in', 'http://dbpedia.org/ontology/knownFor', '21']
['in', 'http://dbpedia.org/ontology/almaMater', '18']
['in', 'http://dbpedia.org/ontology/occupation', '17']
['in', 'http://dbpedia.org/property/education', '16']
['in', 'http://dbpedia.org/property/skills', '16']
['in', 'http://dbpedia.org/ontology/profession', '16']
['in', 'http://dbpedia.org/ontology/genre', '14']
['in', 'http://purl.org/dc/elements/1.1/subject', '12']
['in', 'http://dbpedia.org/property/profession', '11']
['in', 'http://dbpedia.org/property/knownFor', '10']
['in', 'http://dbpedia.org/property/category', '7']
['in', 'http://dbpedia.org/property/focus', '5']
['in', 'http://dbpedia.org/ontology/type', '5']
['in', 'http://dbpedia.org/property/industry', '5']
['in', 'http://www.w3.org/2000/01/rdf-schema#seeAlso', '4']
['in', 'http://dbpedia.org/property/type', '4']
['in', 'http://dbpedia.org/property/occupation', '3']
['in', 'http://dbpedia.org/property/researchField', '3']
['in', 'http://dbpedia.org/ontology/industry', '3']
['in', 'http://dbpedia.org/property/almaMater', '3']
['out', 'http://www.w3.org/2000/01/rdf-schema#seeAlso', '3']
['in', 'http://dbpedia.org/property/subjects', '3']
['out', 'http://www.w3.org/2004/02/skos/core#closeMatch', '3']
['in', 'http://dbpedia.org/property/schoolTradition', '2']
['in', 'http://dbpedia.org/ontology/award', '2']
['out', 'http://purl.org/dc/terms/subject', '2']
['in', 'http://dbpedia.org/ontology/wikiPageDisambiguates', '2']
['out', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', '2']
['in', 'http://www.w3.org/2002/07/owl#differentFrom', '2']
['out', 'http://dbpedia.org/ontology/wikiPageRevisionID', '1']
['out', 'http://dbpedia.org/ontology/thumbnail', '1']
['out', 'http://dbpedia.org/ontology/wikiPageID', '1']
['out', 'http://purl.org/dc/terms/isPartOf', '1']
['in', 'http://dbpedia.org/property/faculty', '1']
['in', 'http://dbpedia.org/property/group', '1']
['in', 'http://dbpedia.org/ontology/picture', '1']
['in', 'http://dbpedia.org/property/skillsTested', '1']
['out', 'http://xmlns.com/foaf/0.1/isPrimaryTopicOf', '1']
['out', 'http://dbpedia.org/property/onlinebooks', '1']
['in', 'http://dbpedia.org/property/scope', '1']
['in', 'http://dbpedia.org/property/topics', '1']
['out', 'http://dbpedia.org/property/label', '1']
['in', 'http://xmlns.com/foaf/0.1/primaryTopic', '1']
['in', 'http://dbpedia.org/property/purpose', '1']
['in', 'http://dbpedia.org/property/subDiscipline', '1']
['out', 'http://dbpedia.org/property/by', '1']
['out', 'http://www.w3.org/ns/prov#wasDerivedFrom', '1']
['in', 'http://dbpedia.org/property/activitySector', '1']
['in', 'http://dbpedia.org/property/classesOffered', '1']
['out', 'http://dbpedia.org/property/about', '1']
['out', 'http://xmlns.com/foaf/0.1/depiction', '1']
['out', 'http://dbpedia.org/ontology/wikiPageLength', '1']
['in', 'http://dbpedia.org/property/vp', '1']
['out', 'http://dbpedia.org/property/others', '1']
['in', 'http://dbpedia.org/ontology/service', '1']
</code>    