In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
from delta import *

In [2]:
warehouse_location = 'hdfs://hdfs-nn:9000/TrabalhoPratico'

builder = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.TrabalhoPratico.dir", warehouse_location) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \
    .enableHiveSupport() \

spark = spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [None]:
spark.sql(
    """
    CREATE DATABASE silver LOCATION 'hdfs://hdfs-nn:9000/TrabalhoPratico/silver/'
    """
)

In [None]:
spark.sql(
    """
    DROP DATABASE silver 
    """
)

In [None]:
spark.sql(
    """
    SHOW DATABASES
    """
).show()

In [None]:
spark.sql(
    """
    SHOW TABLES FROM silver
    """
).show()

In [None]:
spark.sql(
    """
    DROP TABLE IF EXISTS silver.genderStatsCountry_DeltaTable
    """
)


In [None]:
spark.sql(
    """
    CREATE EXTERNAL TABLE  silver.genderStatsCountry_DeltaTable (
        CountryCode VARCHAR(500),
        ShortName VARCHAR(500),
        TableName VARCHAR(500),
        LongName VARCHAR(500),
        2AplhaCode VARCHAR(500),
        CurrencyUnit VARCHAR(500),
        SpecialNotes VARCHAR(500),
        Region VARCHAR(500),
        IncomeGroup VARCHAR(500),
        WB2Code VARCHAR(500),
        NationalAccountsBaseYear VARCHAR(500),
        NationalAccountsReferenceYear VARCHAR(500),
        SNAPriceValuation VARCHAR(500),
        LendingCategory VARCHAR(500),
        OtherGroups VARCHAR(500),
        SystemOfNationalAccounts VARCHAR(500),
        AlternativeConversionFactor VARCHAR(500),
        PPPSurveyYear VARCHAR(500),
        BalanceOfPaymentsManualInUse VARCHAR(500),
        ExternalDebtReportingStatus VARCHAR(500),
        SystemOfTrade VARCHAR(500),
        GovernmentAccountingConcenpt VARCHAR(500),
        IMFDataDisseminationStandard VARCHAR(500),
        LatestPopulationCensus VARCHAR(500),
        LatestHouseholdSurvey VARCHAR(500),
        SourceOfMostIncome VARCHAR(500),
        VitalResgistrationComplete VARCHAR(500),
        LatestAgriculturalCensus VARCHAR(500),
        LatestIndustrialData VARCHAR(500),
        LatestTradeData VARCHAR(500)
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/TrabalhoPratico/silver/genderStatsCountry_DeltaTable/'
    """
)

In [44]:
spark.sql(
    """
    SHOW TABLES FROM silver
    """
).show()

+---------+--------------------+-----------+
|namespace|           tableName|isTemporary|
+---------+--------------------+-----------+
|   silver|genderstatscountr...|      false|
+---------+--------------------+-----------+



In [45]:
spark.sql(
    """
    SELECT *
    FROM silver.genderStatsCountry_DeltaTable
    """
).show()

+-----------+---------+---------+--------+----------+------------+------------+------+-----------+-------+------------------------+-----------------------------+-----------------+---------------+-----------+------------------------+---------------------------+-------------+----------------------------+---------------------------+-------------+----------------------------+----------------------------+----------------------+---------------------+------------------+--------------------------+------------------------+--------------------+---------------+
|CountryCode|ShortName|TableName|LongName|2AplhaCode|CurrencyUnit|SpecialNotes|Region|IncomeGroup|WB2Code|NationalAccountsBaseYear|NationalAccountsReferenceYear|SNAPriceValuation|LendingCategory|OtherGroups|SystemOfNationalAccounts|AlternativeConversionFactor|PPPSurveyYear|BalanceOfPaymentsManualInUse|ExternalDebtReportingStatus|SystemOfTrade|GovernmentAccountingConcenpt|IMFDataDisseminationStandard|LatestPopulationCensus|LatestHouseholdSu

In [46]:
spark.sql(
    """
    DESCRIBE FORMATTED silver.genderStatsCountry_DeltaTable
    """
).toPandas()

Unnamed: 0,col_name,data_type,comment
0,CountryCode,string,
1,ShortName,string,
2,TableName,string,
3,LongName,string,
4,2AplhaCode,string,
5,CurrencyUnit,string,
6,SpecialNotes,string,
7,Region,string,
8,IncomeGroup,string,
9,WB2Code,string,


In [47]:
spark.stop()