In [18]:
from pyspark.sql import SparkSession
from pyspark.sql import Row

# warehouse_location points to the default location for managed databases and tables
warehouse_location = 'hdfs://hdfs-nn:9000/warehouse'

spark = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .enableHiveSupport() \
    .getOrCreate()

In [19]:
spark.sql(
    """
    SHOW DATABASES
    """
).show()


+---------+
|namespace|
+---------+
|  default|
|  tabd_db|
+---------+



In [20]:
spark.sql(
    """
    DROP DATABASE IF EXISTS tabd_db CASCADE
    """
)

DataFrame[]

In [21]:
# you can choose any location in HDFS, just be organized 
# Your data lake will grow with time and will become a swamp
spark.sql(
    """
    CREATE DATABASE tabd_db LOCATION 'hdfs://hdfs-nn:9000/warehouse/tabd.db/'
    """
)

DataFrame[]

In [22]:
spark.sql(
    """
    SHOW DATABASES
    """
).show()

+---------+
|namespace|
+---------+
|  default|
|  tabd_db|
+---------+



In [23]:
spark.sql(
    """
    SHOW TABLES FROM tabd_db
    """
).show()

+--------+---------+-----------+
|database|tableName|isTemporary|
+--------+---------+-----------+
+--------+---------+-----------+



In [24]:
spark.sql(
    """
    DROP TABLE IF EXISTS tabd_db.text_file
    """
)

DataFrame[]

In [25]:
spark.stop()