In [None]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession, HiveContext
from pyspark.sql.functions import *

In [None]:
from IPython.display import display
import pandas as pd

In [None]:
from PySPARQL.Wrapper import PySPARQLWrapper

In [None]:
import timeit
import warnings
warnings.filterwarnings('ignore')

### Connection to Apache Spark

In [None]:
spark = (SparkSession
          .builder
          .appName("interfacing spark sql to hive metastore without configuration file")
          .config("hive.metastore.uris", "thrift://hive-metastore:9083") 
          .enableHiveSupport() 
          .getOrCreate())

### SPARQL query endpoint

In [None]:
sparql_endpoint = "http://ontop:8080/sparql"

### Timeit configuration

In [None]:
repeat = 10
loop = 1

def Average(lst):
    total = 0
    for ele in range(0, len(lst)):
        total = total + lst[ele]
    return total / len(lst)

### Q1

In [None]:
# Get execution times

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?value
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:productPropertyNumeric1 ?value .
            ?product rdf:type bsbm:Product .
            ?product bsbm:producer ?producer .
            ?producer rdf:type bsbm:Producer .
            ?producer foaf:homepage ?hp .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?value > 102)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?value
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:productPropertyNumeric1 ?value .
            ?product rdf:type bsbm:Product .
            ?product bsbm:producer ?producer .
            ?producer rdf:type bsbm:Producer .
            ?producer foaf:homepage ?hp .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?value > 102)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

### Q2

In [None]:
# Get execution times

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?label ?comment ?producer ?price
            ?propertyTextual1 ?propertyTextual2 ?propertyTextual3
            ?propertyNumeric1 ?propertyNumeric2 ?propertyTextual4
            ?propertyTextual5 ?propertyNumeric4
        WHERE {
            ?p rdfs:label ?label .
            ?p rdfs:comment ?comment .
            ?p bsbm:producer ?producer .
            ?p dc:publisher ?ps .
            ?offer bsbm:product ?p .
            ?offer bsbm:price ?price .
            ?p bsbm:productPropertyTextual1 ?propertyTextual1 .
            ?p bsbm:productPropertyTextual2 ?propertyTextual2 .
            ?p bsbm:productPropertyTextual3 ?propertyTextual3 .
            ?p bsbm:productPropertyNumeric1 ?propertyNumeric1 .
            ?p bsbm:productPropertyNumeric2 ?propertyNumeric2 .
            ?p bsbm:productPropertyTextual4 ?propertyTextual4 .
            ?p bsbm:productPropertyTextual5 ?propertyTextual5 .
            ?p bsbm:productPropertyNumeric4 ?propertyNumeric4 .
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?label ?comment ?producer ?price
            ?propertyTextual1 ?propertyTextual2 ?propertyTextual3
            ?propertyNumeric1 ?propertyNumeric2 ?propertyTextual4
            ?propertyTextual5 ?propertyNumeric4
        WHERE {
            ?p rdfs:label ?label .
            ?p rdfs:comment ?comment .
            ?p bsbm:producer ?producer .
            ?p dc:publisher ?ps .
            ?offer bsbm:product ?p .
            ?offer bsbm:price ?price .
            ?p bsbm:productPropertyTextual1 ?propertyTextual1 .
            ?p bsbm:productPropertyTextual2 ?propertyTextual2 .
            ?p bsbm:productPropertyTextual3 ?propertyTextual3 .
            ?p bsbm:productPropertyNumeric1 ?propertyNumeric1 .
            ?p bsbm:productPropertyNumeric2 ?propertyNumeric2 .
            ?p bsbm:productPropertyTextual4 ?propertyTextual4 .
            ?p bsbm:productPropertyTextual5 ?propertyTextual5 .
            ?p bsbm:productPropertyNumeric4 ?propertyNumeric4 .
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

### Q3

In [None]:
# Get execution times

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?product ?label ?p1 ?p3
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            ?product bsbm:productPropertyNumeric3 ?p3 .
            FILTER (?p1 > 1800)
            FILTER (?p3 < 5 )
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?product ?label ?p1 ?p3
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            ?product bsbm:productPropertyNumeric3 ?p3 .
            FILTER (?p1 > 1800)
            FILTER (?p3 < 5 )
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

### Q4

In [None]:
# Get execution times

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?c ?propertyTextual ?p1
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?pcr .
            ?pcr edm:country ?c .
            ?pcr foaf:homepage ?h .
            ?offer gr:validFrom ?vf .
            ?offer bsbm:product ?product .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?product bsbm:productPropertyTextual1 ?propertyTextual .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            FILTER (?p1 > 630)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?c ?propertyTextual ?p1
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?pcr .
            ?pcr edm:country ?c .
            ?pcr foaf:homepage ?h .
            ?offer gr:validFrom ?vf .
            ?offer bsbm:product ?product .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?product bsbm:productPropertyTextual1 ?propertyTextual .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            FILTER (?p1 > 630)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

### Q5

In [None]:
# Get execution times

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?productLabel ?simProperty1 ?simProperty2
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product bsbm:productPropertyNumeric1 ?simProperty1 .
            ?product bsbm:productPropertyNumeric2 ?simProperty2 .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?simProperty1 < 120)
            FILTER (?productLabel != "wineskins banded crc")
            FILTER (?simProperty2 < 170)
        }
        ORDER BY ?productLabel
        LIMIT 5
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?productLabel ?simProperty1 ?simProperty2
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product bsbm:productPropertyNumeric1 ?simProperty1 .
            ?product bsbm:productPropertyNumeric2 ?simProperty2 .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?simProperty1 < 120)
            FILTER (?productLabel != "wineskins banded crc")
            FILTER (?simProperty2 < 170)
        }
        ORDER BY ?productLabel
        LIMIT 5
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

### Q7

In [None]:
# Get execution times

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?productLabel ?price ?vendor ?revTitle
        ?reviewer ?rating1 ?rating2 ?product ?revName
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product rdf:type bsbm:Product .
            ?offer bsbm:product ?product .
            ?offer bsbm:price ?price .
            ?offer bsbm:vendor ?vendor .
            ?offer bsbm:validTo ?date .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?reviewer .
            ?review dc:title ?revTitle .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?reviewer foaf:name ?revName .
            ?reviewer a foaf:Person .
            FILTER (?price > 5000)
            FILTER (str(?product) = "http://example.com/Product/9")
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?productLabel ?price ?vendor ?revTitle
        ?reviewer ?rating1 ?rating2 ?product ?revName
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product rdf:type bsbm:Product .
            ?offer bsbm:product ?product .
            ?offer bsbm:price ?price .
            ?offer bsbm:vendor ?vendor .
            ?offer bsbm:validTo ?date .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?reviewer .
            ?review dc:title ?revTitle .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?reviewer foaf:name ?revName .
            ?reviewer a foaf:Person .
            FILTER (?price > 5000)
            FILTER (str(?product) = "http://example.com/Product/9")
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

### Q8

In [None]:
# Get execution times

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?title ?text ?reviewDate ?reviewer
                        ?reviewerName ?rating1 ?rating2
                        ?rating3 ?rating4 ?product
        WHERE {
            ?product rdfs:label  ?label .
            ?product bsbm:productPropertyTextual1 ?pt .
            ?product bsbm:producer ?producer .
            ?producer edm:country ?c .
            ?producer foaf:homepage ?h .
            ?review bsbm:reviewFor ?product .
            ?review dc:title ?title .
            ?review rev:text ?text .
            ?review bsbm:reviewDate ?reviewDate .
            ?review rev:reviewer ?reviewer .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?review bsbm:rating3 ?rating3 .
            ?review bsbm:rating4 ?rating4 .
            ?reviewer foaf:name ?reviewerName .
            ?reviewer a foaf:Person .
            FILTER (str(?product) = "http://example.com/Product/9")
        }
        ORDER BY DESC(?reviewDate)
        LIMIT 9
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?title ?text ?reviewDate ?reviewer
                        ?reviewerName ?rating1 ?rating2
                        ?rating3 ?rating4 ?product
        WHERE {
            ?product rdfs:label  ?label .
            ?product bsbm:productPropertyTextual1 ?pt .
            ?product bsbm:producer ?producer .
            ?producer edm:country ?c .
            ?producer foaf:homepage ?h .
            ?review bsbm:reviewFor ?product .
            ?review dc:title ?title .
            ?review rev:text ?text .
            ?review bsbm:reviewDate ?reviewDate .
            ?review rev:reviewer ?reviewer .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?review bsbm:rating3 ?rating3 .
            ?review bsbm:rating4 ?rating4 .
            ?reviewer foaf:name ?reviewerName .
            ?reviewer a foaf:Person .
            FILTER (str(?product) = "http://example.com/Product/9")
        }
        ORDER BY DESC(?reviewDate)
        LIMIT 9
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

# Free ?subj and ?pred

In [None]:
# Get execution times

code ='''
query = """
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

        SELECT ?subj ?pred
        WHERE {
            ?subj ?pred "1"^^xsd:int.
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

In [None]:
# Run the query

query = """
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

        SELECT ?subj ?pred
        WHERE {
            ?subj ?pred "1"^^xsd:int.
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

# Visual plot
pandasDF = resultDF.toPandas()
display(pandasDF)

# Entailment regime query 
__attention__: usually takes several minutes 

In [None]:
# Get execution time (only 1 iteration)

code ='''
query = """
    SELECT ?s ?p ?o 
    WHERE { 
        ?s ?p ?o 
    }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=1, globals=globals() , number=1)

print("TIME")
print(result)