In [None]:
pip install delta-spark==2.4.0

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
from delta import *

# warehouse_location points to the default location for managed databases and tables
warehouse_location = 'hdfs://hdfs-nn:9000/TrabalhoPratico'

builder = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.TrabalhoPratico.dir", warehouse_location) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \
    .enableHiveSupport() \

spark = spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [None]:
spark.sql(
    """
    CREATE DATABASE silver LOCATION 'hdfs://hdfs-nn:9000/TrabalhoPratico/silver/'
    """
)

In [None]:
spark.sql(
    """
    SHOW DATABASES
    """
).show()

In [None]:
spark.sql(
    """
    SHOW TABLES FROM silver
    """
).show()

In [None]:
# Criação da tabela em silver

spark.sql(
    """
    DROP TABLE IF EXISTS silver.who_suicide_statistics_DeltaTable
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE  silver.who_suicide_statistics_DeltaTable (
        country VARCHAR(50),
        year INT,
        sex VARCHAR(20),
        age VARCHAR(20),
        suicides_number INT,
        population INT
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/TrabalhoPratico/silver/who_suicide_statistics_DeltaTable/'
    """
)

In [None]:
spark.sql(
    """
    SHOW TABLES FROM silver
    """
).show()

In [None]:
spark.sql(
    """
    SELECT *
    FROM silver.who_suicide_statistics_DeltaTable
    """
).show()

In [None]:
spark.sql(
    """
    DESCRIBE FORMATTED silver.who_suicide_statistics_DeltaTable
    """
).toPandas()

In [None]:
spark.stop()