# Using Stardog with Databricks from local Jupyter

Uninstall PySpark. This is required because the databricks-connect package conflicts with PySpark. For details, see Conflicting PySpark installations. Bash Copy. pip uninstall pyspark.
Install the Databricks Connect client. Bash Copy. pip install -U "databricks-connect==7.3. *" # or X.Y.

The required libraries (installed with pip or conda) include
1. `pyspark` (or `databricks-connect`)
1. `findspark` to locate the Spark installation and set initial config



In [None]:
import findspark
findspark.init()

import pyspark
import random

import stardog
import io

import os
# os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /opt/spark/user-jars/tg-jdbc-driver-1.2.jar pyspark-shell'


In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import StringType, IntegerType
import pandas as pd
from configparser import ConfigParser


In [None]:
spark = SparkSession.builder.appName("StardogAnalysis").getOrCreate()

In [None]:
# define connection parameters
# file contains DEFAULT section plus override sections
config_section = 'doghouse'
parser = ConfigParser()
_ = parser.read('../CREDENTIALS.config')

url = parser.get(config_section, 'url')
user = parser.get(config_section, 'user')
password = parser.get(config_section, 'password')
db = 'insurance'
api_endpoint = 'query'

In [None]:
connection_details = {
    'endpoint': url,
    'username': user,
    'password': password
}


In [None]:
conn = stardog.Connection('insurance', **connection_details)

In [None]:
query = """
PREFIX sqs: <tag:stardog:api:sqs:>
PREFIX : <http://api.stardog.com/>

select * { ?statIRI a :Crime_Stats;
                   :Crime_Type ?offense;
                   :Crime_Count ?crimeCount;
                   :Crime_Zip ?zipCode;
                   :Occurred_In ?zipCodeIri.
          ?zipCodeIri a :Zip_Codes.
          } 
"""

csv_results = conn.select(query, content_type='text/csv')
df = pd.read_csv(io.BytesIO(csv_results))

In [None]:
df.head()

In [None]:
spark.sql('show databases').show()

In [None]:
jdbcDF = spark.read \
  .format("jdbc") \
  .option("driver", "com.tigergraph.jdbc.Driver") \
  .option("url", "jdbc:tg:" + gsurl) \
  .option("user", gsuser) \
  .option("password", gspass) \
  .option("graph", graph) \
  .option("dbtable", "vertex Orders") \
  .option("limit", "10") \
  .option("debug", "0") \
  .load()

jdbcDF.printSchema()
# jdbcDF.show()


In [None]:
df = jdbcDF.toPandas()
df

In [None]:
jdbcDF.show()

In [None]:
!pip list