##### Criação da silver layer para os gastos do btg pactual

01. Utilizar spark.table() para leitura da tabela bronze
02. Filtar final cartão de crédito no formato regex()
03. Definir schemas corretos

In [0]:
df = spark.table('personalfinance.bronze.gastos_btg')

In [0]:
from pyspark.sql.functions import col, lit, trim, upper, regexp_extract, count, sum, when
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType, TimestampType, DoubleType

In [0]:
df.columns

In [0]:
df = (
    df.withColumn("card_final_orig", col("card_final"))  # snapshot
      .withColumn(
        "card_final",
        when(col("card_final_orig") == "nan", col("id_code"))
        .otherwise(col("card_final"))
      )
      .withColumn(
        "id_code",
        when(col("card_final_orig") == "nan", col("tipo_compra"))
        .otherwise(col("id_code"))
      )
      .withColumn(
        "tipo_compra",
        when(col("card_final_orig") == "nan", lit("Crédito"))
        .otherwise(col("tipo_compra"))
      )
      .drop("card_final_orig")
)


In [0]:
df = df.filter(col('card_final').rlike(r'\d{4}'))

In [0]:
schema = {
    'data_da_compra': DateType(),
    'descricao': StringType(),
    'valor_brl': DoubleType(),
    'tipo_compra': StringType(),
    'id_code': StringType(),
    'card_final': StringType()
}

In [0]:
for coluna in schema:
    df = df.withColumn(coluna, col(coluna).cast(schema.get(coluna)))

In [0]:
df = df.withColumn('descricao', upper(col('descricao'))) \
    .withColumn('data_vencimento_fatura', regexp_extract(col('file_path'), r'\d{4}-\d{2}-\d{2}', 0).cast(DateType()))

In [0]:
df_cards = spark.table('personalfinance.silver.dim_cards')

In [0]:
df = df.join(df_cards.select("card_final", "full_descricao", "bank", "bandeira", "is_current", "dia_vencimento"), on='card_final', how='left')

In [0]:
display(df.limit(10))

In [0]:
df.write \
    .mode('overwrite') \
    .option('overwriteSchema', 'true') \
    .format('delta') \
    .saveAsTable('personalfinance.silver.gastos_btg')