# PySPARQL CONSTRUCT example
### Query your SPARQL endpoint and manage the results inside Apache Spark

In [1]:
from PySPARQL.Wrapper import PySPARQLWrapper
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession \
    .builder \
    .appName("PySPARQL example") \
    .getOrCreate()

### Use dbpedia SPARQL endpoint

In [3]:
sparql_endpoint = "https://dbpedia.org/sparql"

In [4]:
wrapper = PySPARQLWrapper(spark, sparql_endpoint)

### Construct a graph of the European coutries with their capitals

In [5]:
query = """
PREFIX dbr:    <http://dbpedia.org/resource/>
PREFIX dbo:    <http://dbpedia.org/ontology/>
PREFIX rdf:    <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:   <http://www.w3.org/2000/01/rdf-schema#>

CONSTRUCT {
    ?city a dbo:City ;
        rdfs:label ?cityLabel .
    ?country a dbo:Country ;
        rdfs:label ?countryLabel ;
        dbo:capital ?city ;
        dct:subject dbc:Countries_in_Europe .
}
WHERE { 
    ?city a dbo:City ;
        rdfs:label ?cityLabel .
    ?country a dbo:Country ;
        rdfs:label ?countryLabel ;
        dbo:capital ?city ;
        dct:subject dbc:Countries_in_Europe .
    FILTER (
        LANG(?cityLabel) = "en" &&
        LANG(?countryLabel) = "en"
    )
}     
"""

In [6]:
result = wrapper.query(query)

### Result as a DataFrame

In [7]:
countriesTripleDF = result.dataFrame
countriesTripleDF.show()

+--------------------+--------------------+--------------------+
|             subject|           predicate|              object|
+--------------------+--------------------+--------------------+
|http://dbpedia.or...|http://www.w3.org...|              Latvia|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|             Vilnius|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|Bosnia and Herzeg...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://purl.org/d...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|            Chișinău|
|http://dbpedia.or...|http://www.w3.org...|             Austria|
|http://dbpedia.or...|http://www.w3.org...|            Budapest|
|http://dbpedia.or...|htt

### Result as a GraphFrame

In [8]:
countriesGF = result.graphFrame

In [9]:
countriesGF.vertices.show()

+--------------------+------------------------------------------+
|                  id|http://www_w3_org/2000/01/rdf-schema#label|
+--------------------+------------------------------------------+
|http://dbpedia.or...|                                Kazakhstan|
|http://dbpedia.or...|                                     Minsk|
|http://dbpedia.or...|                                   Estonia|
|http://dbpedia.or...|                                    Astana|
|http://dbpedia.or...|                            Czech Republic|
|http://dbpedia.or...|                      Bosnia and Herzeg...|
|http://dbpedia.or...|                                      Riga|
|http://dbpedia.or...|                                  Pristina|
|http://dbpedia.or...|                                      Rome|
|http://dbpedia.or...|                                     Sofia|
|http://dbpedia.or...|                                 Bucharest|
|http://dbpedia.or...|                                 Ljubljana|
|http://db

In [10]:
countriesGF.edges.show()

+--------------------+--------------------+--------------------+
|                 src|        relationship|                 dst|
+--------------------+--------------------+--------------------+
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://purl.org/d...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://purl.org/d...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|htt

### Save the result as Spark table

In [11]:
countriesTripleDF.write.saveAsTable("triples")
countriesGF.vertices.write.saveAsTable("vertices")
countriesGF.edges.write.saveAsTable("edges")

### Read the data back using SparkSQL

In [12]:
sqlDF = spark.sql("SELECT * FROM triples")
sqlDF.show()

+--------------------+--------------------+--------------------+
|             subject|           predicate|              object|
+--------------------+--------------------+--------------------+
|http://dbpedia.or...|http://www.w3.org...|              Latvia|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|             Vilnius|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|Bosnia and Herzeg...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://purl.org/d...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|            Chișinău|
|http://dbpedia.or...|http://www.w3.org...|             Austria|
|http://dbpedia.or...|http://www.w3.org...|            Budapest|
|http://dbpedia.or...|htt

In [13]:
sqlDF = spark.sql("SELECT * FROM vertices")
sqlDF.show()

+--------------------+------------------------------------------+
|                  id|http://www_w3_org/2000/01/rdf-schema#label|
+--------------------+------------------------------------------+
|http://dbpedia.or...|                      Bosnia and Herzeg...|
|http://dbpedia.or...|                      Republic of Maced...|
|http://dbpedia.or...|                                  Chișinău|
|http://dbpedia.or...|                                Montenegro|
|http://dbpedia.or...|                                Kazakhstan|
|http://dbpedia.or...|                                San Marino|
|http://dbpedia.or...|                                 Bucharest|
|http://dbpedia.or...|                                 Ljubljana|
|http://dbpedia.or...|                                 Lithuania|
|http://dbpedia.or...|                                  Sarajevo|
|http://dbpedia.or...|                                  Valletta|
|http://dbpedia.or...|                                  Slovenia|
|http://db

In [14]:
sqlDF = spark.sql("SELECT * FROM edges")
sqlDF.show()

+--------------------+--------------------+--------------------+
|                 src|        relationship|                 dst|
+--------------------+--------------------+--------------------+
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://purl.org/d...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://www.w3.org...|http://dbpedia.or...|
|http://dbpedia.or...|http://purl.org/d...|http://dbpedia.or...|
|http://dbpedia.or...|http://dbpedia.or...|http://dbpedia.or...|
|http://dbpedia.or...|htt