In [106]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession, HiveContext
from pyspark.sql.functions import *
import timeit

In [107]:
from PySPARQL.Wrapper import PySPARQLWrapper

### Connection to Apache Spark

In [108]:
spark = (SparkSession
          .builder
          .appName("interfacing spark sql to hive metastore without configuration file")
          .config("hive.metastore.uris", "thrift://hive-metastore:9083") 
          .enableHiveSupport() 
          .getOrCreate())

### SPARQL query endpoint

In [109]:
sparql_endpoint = "http://ontop:8080/sparql"

### Timeit configuration

In [110]:
repeat = 10
loop = 1

def Average(lst):
    total = 0
    for ele in range(0, len(lst)):
        total = total + lst[ele]
    return total / len(lst)

### Q1

In [111]:
# Runt the test

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?value
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:productPropertyNumeric1 ?value .
            ?product rdf:type bsbm:Product .
            ?product bsbm:producer ?producer .
            ?producer rdf:type bsbm:Producer .
            ?producer foaf:homepage ?hp .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?value > 102)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

TIME
[1.4218770640000002, 1.426847763999831, 1.6082297469993136, 1.3164694469996903, 1.2631199130000823, 1.2582888169999933, 1.2783897139997862, 1.2663469479994092, 1.2902684180007782, 1.3214915389999078]

AVG_TIME
1.3451329370998792


In [112]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?value
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:productPropertyNumeric1 ?value .
            ?product rdf:type bsbm:Product .
            ?product bsbm:producer ?producer .
            ?producer rdf:type bsbm:Producer .
            ?producer foaf:homepage ?hp .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?value > 102)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

resultDF.show()  # Plot the results

+--------------------+-----+
|               label|value|
+--------------------+-----+
|            'ahchoo'|  594|
|           'coterie'| 1891|
|'desolates waging...|  374|
|'lignites rallyin...|  133|
|    'manner gatemen'|  831|
|'procreators taiw...| 1504|
|'reexhibit wrang ...|  940|
|'resettling uncoa...| 1743|
|  'tither pettiness'| 1900|
|'vacillator morti...|  834|
+--------------------+-----+



### Q2

In [113]:
# Runt the test

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?label ?comment ?producer ?price
            ?propertyTextual1 ?propertyTextual2 ?propertyTextual3
            ?propertyNumeric1 ?propertyNumeric2 ?propertyTextual4
            ?propertyTextual5 ?propertyNumeric4
        WHERE {
            ?p rdfs:label ?label .
            ?p rdfs:comment ?comment .
            ?p bsbm:producer ?producer .
            ?p dc:publisher ?ps .
            ?offer bsbm:product ?p .
            ?offer bsbm:price ?price .
            ?p bsbm:productPropertyTextual1 ?propertyTextual1 .
            ?p bsbm:productPropertyTextual2 ?propertyTextual2 .
            ?p bsbm:productPropertyTextual3 ?propertyTextual3 .
            ?p bsbm:productPropertyNumeric1 ?propertyNumeric1 .
            ?p bsbm:productPropertyNumeric2 ?propertyNumeric2 .
            ?p bsbm:productPropertyTextual4 ?propertyTextual4 .
            ?p bsbm:productPropertyTextual5 ?propertyTextual5 .
            ?p bsbm:productPropertyNumeric4 ?propertyNumeric4 .
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

TIME
[1.8792619249998097, 1.939286832000107, 2.178938877999826, 1.7234063119994971, 2.1593382009996276, 1.7946767660005207, 1.8100408649997917, 2.291362152000147, 1.7993365120000817, 1.8837381270004698]

AVG_TIME
1.945938656999988


In [114]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?label ?comment ?producer ?price
            ?propertyTextual1 ?propertyTextual2 ?propertyTextual3
            ?propertyNumeric1 ?propertyNumeric2 ?propertyTextual4
            ?propertyTextual5 ?propertyNumeric4
        WHERE {
            ?p rdfs:label ?label .
            ?p rdfs:comment ?comment .
            ?p bsbm:producer ?producer .
            ?p dc:publisher ?ps .
            ?offer bsbm:product ?p .
            ?offer bsbm:price ?price .
            ?p bsbm:productPropertyTextual1 ?propertyTextual1 .
            ?p bsbm:productPropertyTextual2 ?propertyTextual2 .
            ?p bsbm:productPropertyTextual3 ?propertyTextual3 .
            ?p bsbm:productPropertyNumeric1 ?propertyNumeric1 .
            ?p bsbm:productPropertyNumeric2 ?propertyNumeric2 .
            ?p bsbm:productPropertyTextual4 ?propertyTextual4 .
            ?p bsbm:productPropertyTextual5 ?propertyTextual5 .
            ?p bsbm:productPropertyNumeric4 ?propertyNumeric4 .
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

resultDF.show()  # Plot the results

+--------------------+--------------------+--------------------+---------+--------------------+--------------------+--------------------+----------------+----------------+--------------------+--------------------+----------------+
|               label|             comment|            producer|    price|    propertyTextual1|    propertyTextual2|    propertyTextual3|propertyNumeric1|propertyNumeric2|    propertyTextual4|    propertyTextual5|propertyNumeric4|
+--------------------+--------------------+--------------------+---------+--------------------+--------------------+--------------------+----------------+----------------+--------------------+--------------------+----------------+
|            'ahchoo'|'chanceman ventra...|http://example.co...|8.56176E3|'whirs radiation ...|'vichies resituat...|'unreels voicer a...|             594|             434|                null|'hyperbolas knout...|              -1|
|            'ahchoo'|'chanceman ventra...|http://example.co...|3.03146E3|'w

### Q3

In [115]:
# Runt the test

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?product ?label ?p1 ?p3
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            ?product bsbm:productPropertyNumeric3 ?p3 .
            FILTER (?p1 > 1800)
            FILTER (?p3 < 5 )
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

TIME
[1.4152048689993535, 1.3532676089998859, 1.281841459000134, 1.3187303929998961, 1.2268315700002859, 1.1259463480000704, 1.0925609179994353, 1.090250668000408, 1.092925513999944, 1.0814043720001791]

AVG_TIME
1.2078963719999591


In [116]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?product ?label ?p1 ?p3
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            ?product bsbm:productPropertyNumeric3 ?p3 .
            FILTER (?p1 > 1800)
            FILTER (?p3 < 5 )
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

resultDF.show()  # Plot the results

+-------+-----+---+---+
|product|label| p1| p3|
+-------+-----+---+---+
+-------+-----+---+---+



### Q4

In [117]:
# Runt the test

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?c ?propertyTextual ?p1
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?pcr .
            ?pcr edm:country ?c .
            ?pcr foaf:homepage ?h .
            ?offer gr:validFrom ?vf .
            ?offer bsbm:product ?product .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?product bsbm:productPropertyTextual1 ?propertyTextual .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            FILTER (?p1 > 630)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

TIME
[1.6409293859996978, 1.5143511790001867, 1.4720671129998664, 1.4853957100003754, 1.5623859620000076, 1.466603200999998, 1.8230932810001832, 1.6267371320000166, 1.9787561120001556, 1.4888913079994381]

AVG_TIME
1.6059210383999925


In [118]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?label ?c ?propertyTextual ?p1
        WHERE {
            ?product rdfs:label ?label .
            ?product bsbm:producer ?pcr .
            ?pcr edm:country ?c .
            ?pcr foaf:homepage ?h .
            ?offer gr:validFrom ?vf .
            ?offer bsbm:product ?product .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?pers .
            ?pers foaf:name ?fn .
            ?pers edm:country ?cn .
            ?product bsbm:productPropertyTextual1 ?propertyTextual .
            ?product bsbm:productPropertyNumeric1 ?p1 .
            FILTER (?p1 > 630)
        }
        ORDER BY ?label
        LIMIT 10
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

resultDF.show()  # Plot the results

+--------------------+---+--------------------+----+
|               label|  c|     propertyTextual|  p1|
+--------------------+---+--------------------+----+
|           'coterie'| DE|'scalded decoct p...|1891|
|    'manner gatemen'| DE|'guzzling jillion...| 831|
|'procreators taiw...| DE|'exceptionally re...|1504|
|'reexhibit wrang ...| DE|'skinfuls uncerta...| 940|
|'resettling uncoa...| DE|'unrestored kneel...|1743|
|  'tither pettiness'| DE|'solidest incarna...|1900|
|'vacillator morti...| DE|'egoisms welterwe...| 834|
+--------------------+---+--------------------+----+



### Q5

In [119]:
# Runt the test

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?productLabel ?simProperty1 ?simProperty2
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product bsbm:productPropertyNumeric1 ?simProperty1 .
            ?product bsbm:productPropertyNumeric2 ?simProperty2 .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?simProperty1 < 120)
            FILTER (?productLabel != "wineskins banded crc")
            FILTER (?simProperty2 < 170)
        }
        ORDER BY ?productLabel
        LIMIT 5
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

TIME
[1.2483313510001608, 1.1526907839997875, 1.2019185660001312, 1.353941381000368, 1.284430734000125, 1.3256043119999958, 1.3318785020001087, 1.2719816759999958, 1.2185669350001263, 1.1202651530002186]

AVG_TIME
1.2509609394001018


In [120]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?productLabel ?simProperty1 ?simProperty2
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product bsbm:productPropertyNumeric1 ?simProperty1 .
            ?product bsbm:productPropertyNumeric2 ?simProperty2 .
            ?product bsbm:producer ?producer .
            ?review bsbm:reviewFor ?product .
            ?review rdf:type schema:Review .
            ?offer bsbm:product ?product .
            ?offer rdf:type schema:Offer .
            FILTER (?simProperty1 < 120)
            FILTER (?productLabel != "wineskins banded crc")
            FILTER (?simProperty2 < 170)
        }
        ORDER BY ?productLabel
        LIMIT 5
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

resultDF.show()  # Plot the results

+------------+------------+------------+
|productLabel|simProperty1|simProperty2|
+------------+------------+------------+
+------------+------------+------------+



### Q7

In [121]:
# Runt the test

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?productLabel ?price ?vendor ?revTitle
        ?reviewer ?rating1 ?rating2 ?product ?revName
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product rdf:type bsbm:Product .
            ?offer bsbm:product ?product .
            ?offer bsbm:price ?price .
            ?offer bsbm:vendor ?vendor .
            ?offer bsbm:validTo ?date .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?reviewer .
            ?review dc:title ?revTitle .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?reviewer foaf:name ?revName .
            ?reviewer a foaf:Person .
            FILTER (?price > 5000)
            FILTER (str(?product) = "http://example.com/Product/9")
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

TIME
[1.5661725010004375, 1.4059532190003665, 1.6518069370004014, 1.687647785000081, 1.45523542599949, 1.4921913809994294, 1.4307240300004196, 1.4425667409996095, 1.4473819140002888, 1.3965456289997746]

AVG_TIME
1.4976225563000298


In [122]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT ?productLabel ?price ?vendor ?revTitle
        ?reviewer ?rating1 ?rating2 ?product ?revName
        WHERE {
            ?product rdfs:label ?productLabel .
            ?product rdf:type bsbm:Product .
            ?offer bsbm:product ?product .
            ?offer bsbm:price ?price .
            ?offer bsbm:vendor ?vendor .
            ?offer bsbm:validTo ?date .
            ?review bsbm:reviewFor ?product .
            ?review rev:reviewer ?reviewer .
            ?review dc:title ?revTitle .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?reviewer foaf:name ?revName .
            ?reviewer a foaf:Person .
            FILTER (?price > 5000)
            FILTER (str(?product) = "http://example.com/Product/9")
        }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

resultDF.show()  # Plot the results

+--------------------+---------+------+--------------------+--------------------+-------+-------+--------------------+--------------------+
|        productLabel|    price|vendor|            revTitle|            reviewer|rating1|rating2|             product|             revName|
+--------------------+---------+------+--------------------+--------------------+-------+-------+--------------------+--------------------+
|'procreators taiw...|8.09271E3|     1|'improves pervert...|http://example.co...|      7|     10|http://example.co...|'Danijela-Adalbrand'|
|'procreators taiw...|9.23612E3|     1|'improves pervert...|http://example.co...|      7|     10|http://example.co...|'Danijela-Adalbrand'|
|'procreators taiw...|7.26904E3|     1|'slavishness muon...|http://example.co...|      5|     10|http://example.co...|  'Eyana-Aurelianus'|
|'procreators taiw...|9.23612E3|     1|'slavishness muon...|http://example.co...|      5|     10|http://example.co...|  'Eyana-Aurelianus'|
|'procreators taiw..

### Q8

In [123]:
# Runt the test

code ='''
query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?title ?text ?reviewDate ?reviewer
                        ?reviewerName ?rating1 ?rating2
                        ?rating3 ?rating4 ?product
        WHERE {
            ?product rdfs:label  ?label .
            ?product bsbm:productPropertyTextual1 ?pt .
            ?product bsbm:producer ?producer .
            ?producer edm:country ?c .
            ?producer foaf:homepage ?h .
            ?review bsbm:reviewFor ?product .
            ?review dc:title ?title .
            ?review rev:text ?text .
            ?review bsbm:reviewDate ?reviewDate .
            ?review rev:reviewer ?reviewer .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?review bsbm:rating3 ?rating3 .
            ?review bsbm:rating4 ?rating4 .
            ?reviewer foaf:name ?reviewerName .
            ?reviewer a foaf:Person .
            FILTER (str(?product) = "http://example.com/Product/9")
        }
        ORDER BY DESC(?reviewDate)
        LIMIT 9
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame
'''

result = timeit.repeat(code, repeat=repeat, globals=globals() , number=loop)

print("TIME")
print(result)

print("\nAVG_TIME")
print(Average(result))

TIME
[2.0529662589997315, 1.926983907000249, 2.010208208000222, 1.6466033329998027, 2.0347125990001587, 1.650691179000205, 1.7891909820000365, 1.6625204680003662, 1.6934222979998594, 1.6864905579996048]

AVG_TIME
1.8153789791000237


In [125]:
# Run the query

query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX schema: <http://schema.org/>
        PREFIX rev: <http://purl.org/stuff/rev#>
        PREFIX edm: <http://www.europeana.eu/schemas/edm/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX gr: <http://purl.org/goodrelations/v1#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>

        SELECT DISTINCT ?title ?text ?reviewDate ?reviewer
                        ?reviewerName ?rating1 ?rating2
                        ?rating3 ?rating4 ?product
        WHERE {
            ?product rdfs:label  ?label .
            ?product bsbm:productPropertyTextual1 ?pt .
            ?product bsbm:producer ?producer .
            ?producer edm:country ?c .
            ?producer foaf:homepage ?h .
            ?review bsbm:reviewFor ?product .
            ?review dc:title ?title .
            ?review rev:text ?text .
            ?review bsbm:reviewDate ?reviewDate .
            ?review rev:reviewer ?reviewer .
            ?review bsbm:rating1 ?rating1 .
            ?review bsbm:rating2 ?rating2 .
            ?review bsbm:rating3 ?rating3 .
            ?review bsbm:rating4 ?rating4 .
            ?reviewer foaf:name ?reviewerName .
            ?reviewer a foaf:Person .
            FILTER (str(?product) = "http://example.com/Product/9")
        }
        ORDER BY DESC(?reviewDate)
        LIMIT 9
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

resultDF.show()  # Plot the results

+--------------------+--------------------+----------+--------------------+--------------------+-------+-------+-------+-------+--------------------+
|               title|                text|reviewDate|            reviewer|        reviewerName|rating1|rating2|rating3|rating4|             product|
+--------------------+--------------------+----------+--------------------+--------------------+-------+-------+-------+-------+--------------------+
|'heehawed underwa...|'brisker astrophy...|2008-03-23|http://example.co...|'Danijela-Adalbrand'|     -1|      4|     -1|      9|http://example.co...|
|'affirmance subag...|'southwesterners ...|2008-01-16|http://example.co...|  'Allegra-Walburga'|      2|      3|     10|      6|http://example.co...|
|'slavishness muon...|'philatelist lugu...|2008-01-14|http://example.co...|  'Eyana-Aurelianus'|      5|     10|     -1|     -1|http://example.co...|
|'footworn forms n...|'breedings silenc...|2007-12-04|http://example.co...|     'Przemek-Berte'|    