In [55]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession, HiveContext


In [56]:
!pip install SPARQL2Spark==0.0.5



In [57]:
from SPARQL2Spark.Wrapper import SPARQL2SparkWrapper

In [58]:
spark = (SparkSession
          .builder
          .appName("interfacing spark sql to hive metastore without configuration file")
          .config("hive.metastore.uris", "thrift://hive-metastore:9083") 
          .enableHiveSupport() 
          .getOrCreate())

In [59]:
sparql_endpoint = "http://jena-fuseki:3030/pizzads"

query = """
    PREFIX : <http://www.co-ode.org/ontologies/pizza/pizza.owl#>

    SELECT ?pizzaID ?outcome
    WHERE {

      ?pizzaType :suggestedTempLow ?tempLow; :suggestedDurationLow ?durLow;
                 :suggestedTempUp ?tempUp; :suggestedDurationUp ?durUp .

      SERVICE <http://ontop:8080/sparql> {
        ?pizzaID a ?pizzaType.
        ?pizzaID :temperature ?avgTemp; :start_cooking ?start; :end_cooking ?end.
      }

      BIND ((?end-?start) AS ?cookDuration)

      BIND( IF ((?avgTemp >= ?tempLow && ?avgTemp <= ?tempUp) &&
          (?cookDuration >= ?durLow && ?cookDuration <= ?durUp)
          ,"WELL_COOKED","ANOMALY") AS ?outcome)
    }
"""

wrapper = SPARQL2SparkWrapper(spark, sparql_endpoint)
result = wrapper.query(query)
resultDF = result.dataFrame

pizzaID,outcome
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ789,ANOMALY
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ333,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ222,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ005,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ456,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ001,ANOMALY
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ789,ANOMALY
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ444,ANOMALY
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ333,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ999,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ111,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ003,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ004,ANOMALY
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ002,WELL_COOKED
http://www.co-ode.org/ontologies/pizza/pizza.owl#PZ

In [60]:
resultDF.show()

+--------------------+-----------+
|             pizzaID|    outcome|
+--------------------+-----------+
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
|http://www.co-ode...|WELL_COOKED|
|http://www.co-ode...|    ANOMALY|
+--------------------+-----------+



In [61]:
from pyspark.sql.functions import *

df2 = resultDF.withColumn("pizzaID", regexp_replace('pizzaID','http://www.co-ode.org/ontologies/pizza/pizza.owl#',''))

In [62]:
df2.show()

+-------+-----------+
|pizzaID|    outcome|
+-------+-----------+
|  PZ789|    ANOMALY|
|  PZ333|WELL_COOKED|
|  PZ222|WELL_COOKED|
|  PZ005|WELL_COOKED|
|  PZ456|WELL_COOKED|
|  PZ001|    ANOMALY|
|  PZ789|    ANOMALY|
|  PZ444|    ANOMALY|
|  PZ333|WELL_COOKED|
|  PZ999|WELL_COOKED|
|  PZ111|WELL_COOKED|
|  PZ003|WELL_COOKED|
|  PZ004|    ANOMALY|
|  PZ002|WELL_COOKED|
|  PZ888|WELL_COOKED|
|  PZ666|WELL_COOKED|
|  PZ777|WELL_COOKED|
|  PZ123|    ANOMALY|
|  PZ555|WELL_COOKED|
|  PZ222|    ANOMALY|
+-------+-----------+



In [63]:
df2.write.mode("overwrite").saveAsTable('pizzadb.analysis')