In [2]:
def get_spark(scale=0, queue=None):
    import os
    import uuid
    import tempfile
    from pyspark.sql import SparkSession
    from pydatafabric.vault_utils import get_secrets

    tmp_uuid = str(uuid.uuid4())
    app_name = f"emart-{os.environ.get('USER', 'default')}-{tmp_uuid}"
    if not queue:
        if "JUPYTERHUB_USER" in os.environ:
            queue = "dmig_eda"
        else:
            queue = "airflow_job"
    os.environ["ARROW_PRE_0_15_IPC_FORMAT"] = "1"

    key = get_secrets("gcp/emart-datafabric/dataflow")["config"]
    key_file_name = tempfile.mkstemp()[1]
    with open(key_file_name, "wb") as key_file:
        key_file.write(key.encode())
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_file.name

    spark = (
        SparkSession.builder.config("spark.app.name", app_name)
        .config("spark.driver.memory", "6g")
        .config("spark.executor.memory", "8g")
        .config("spark.shuffle.service.enabled", "true")
        .config("spark.dynamicAllocation.enabled", "true")
        .config("spark.dynamicAllocation.maxExecutors", "200")
        .config("spark.driver.maxResultSize", "6g")
        .config("spark.rpc.message.maxSize", "1024")
        .config("spark.yarn.queue", queue)
        .config("spark.ui.enabled", "false")
        .config("spark.port.maxRetries", "128")
        .config("spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT", "1")
        .config("spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT", "1")
        .config(
            "spark.jars",
            "hdfs:///jars/ojdbc8.jar,gs://external_libs/spark/jars/spark-bigquery-with-dependencies_2.11-0.16.1.jar",
        )
        .enableHiveSupport()
        .getOrCreate()
    )
    spark.conf.set("spark.sql.execution.arrow.enabled", "true")
    return spark

In [3]:
spark = get_spark()

In [None]:
url = "jdbc:oracle:thin"
connection_properties = {
    "user":
    "password":
    "driver": "oracle.jdbc.driver.OracleDriver"
}

In [5]:
df = spark.read.format("jdbc").option("url", "jdbc:oracle:thin:@10.40.84.202:1521:DataFabricD").option("user", "aidp_svc").option("password", "Aidpdev123$").option("driver", "oracle.jdbc.driver.OracleDriver").option("dbtable", "(SELECT TABLESPACE_NAME FROM USER_TABLESPACES) a").load()

In [6]:
df.show()

+---------------+
|TABLESPACE_NAME|
+---------------+
|         SYSTEM|
|         SYSAUX|
|       UNDOTBS1|
|           TEMP|
|          USERS|
|       DataFabricD_TS|
+---------------+

