In [1]:
import pyspark
from pyspark.sql import SparkSession
from pyspark import SparkConf
from pyspark.sql.functions import to_date, to_timestamp
from pyspark.sql.types import TimestampType
import pyspark.sql.functions as F

In [2]:
# inicio sessão spark
spark = pyspark.sql.SparkSession.builder.appName("consultas")\
    .config("spark.jars.packages", "io.delta:delta-core_2.12:0.8.0")\
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")\
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")\
    .getOrCreate()

In [96]:
# lendo dados dos usuarios na bronze em formato parquet
df_bronze = spark.read.format("parquet")\
            .option("inferSchema", "true")\
            .option("header", "true")\
            .option("versionAsOf", "0")\
            .load("./data-lake/bronze/users/*.parquet")

df_bronze.count()

310

In [97]:
# lendo dados dos usuarios na silver em formato delta
df_silver = spark.read.format("delta")\
            .load("./data-lake/silver/users")

df_silver.count()

310

In [98]:
# lendo dados dos usuarios na golda em formato delta
df_gold = spark.read.format("delta")\
            .load("./data-lake/gold/users")

df_gold.createOrReplaceTempView("users_gold")

df_result = spark.sql(
        """
        SELECT COUNT(1)
             , SUM(u.total_users)
          FROM users_gold u      
        """
    )
df_result.show()

+--------+----------------+
|count(1)|sum(total_users)|
+--------+----------------+
|      16|             310|
+--------+----------------+



In [99]:
# encerra sessao spark
spark.stop()