In [1]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession, HiveContext
from pyspark.sql.functions import *

In [3]:
from PySPARQL.Wrapper import PySPARQLWrapper

### Connection to Apache Spark

In [4]:
spark = (SparkSession
          .builder
          .appName("interfacing spark sql to hive metastore without configuration file")
          .config("hive.metastore.uris", "thrift://hive-metastore:9083") 
          .enableHiveSupport() 
          .getOrCreate())

### SPARQL query execution

In [5]:
sparql_endpoint = "http://jena-fuseki:3030/pizzads"

query = """
    PREFIX : <http://www.co-ode.org/ontologies/pizza/pizza.owl#>

    SELECT ?pizzaID ?outcome
    WHERE {

      ?pizzaType :suggestedTempLow ?tempLow; :suggestedDurationLow ?durLow;
                 :suggestedTempUp ?tempUp; :suggestedDurationUp ?durUp .

      SERVICE <http://ontop:8080/sparql> {
        ?pizzaID a ?pizzaType.
        ?pizzaID :temperature ?avgTemp; :start_cooking ?start; :end_cooking ?end.
      }

      BIND ((?end-?start) AS ?cookDuration)

      BIND( IF ((?avgTemp >= ?tempLow && ?avgTemp <= ?tempUp) &&
          (?cookDuration >= ?durLow && ?cookDuration <= ?durUp)
          ,"WELL_COOKED","ANOMALY") AS ?outcome)
    }
"""

wrapper = PySPARQLWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

In [6]:
resultDF.show()  # Plot the results

+--------------------+-----------+
|             pizzaID|    outcome|
+--------------------+-----------+
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
+--------------------+-----------+



In [7]:
# clean the result by removing the prefixes

df2 = resultDF.withColumn("pizzaID", regexp_replace('pizzaID','http://www.co-ode.org/ontologies/pizza/pizza.owl#',''))

In [8]:
df2.show()   # Plot the cleaned results

+-------+-----------+
|pizzaID|    outcome|
+-------+-----------+
|  PZ789|    ANOMALY|
|  PZ333|WELL_COOKED|
|  PZ222|WELL_COOKED|
|  PZ005|WELL_COOKED|
|  PZ456|WELL_COOKED|
|  PZ001|    ANOMALY|
|  PZ789|    ANOMALY|
|  PZ444|    ANOMALY|
|  PZ333|WELL_COOKED|
|  PZ999|WELL_COOKED|
|  PZ111|WELL_COOKED|
|  PZ003|WELL_COOKED|
|  PZ004|    ANOMALY|
|  PZ002|WELL_COOKED|
|  PZ888|WELL_COOKED|
|  PZ666|WELL_COOKED|
|  PZ777|WELL_COOKED|
|  PZ123|    ANOMALY|
|  PZ555|WELL_COOKED|
|  PZ222|    ANOMALY|
+-------+-----------+



### Persists the Spark Dataframe into a Spark table

In [9]:
df2.write.mode("overwrite").saveAsTable('pizzadb.analysis')