In [0]:
from pyspark.sql.types import DecimalType
from pyspark.sql import SparkSession
from pyspark.sql.functions import expr
from pyspark.sql import functions as F

In [0]:
tmp_delta_path = "/tmp/bronze/tb_products"
df = spark.read.format("delta").load(tmp_delta_path)

%md
|Prefixo|Utilização|
|-------|-------|
|cd|Código|
|nm|Nome|
|ds|Descrição|
|dt|Data|
|hr|Hora|
|nr|Número|
|vl|Valor|
|bool|Booleano|
|txt|Texto Extenso|
|id|Chave Composta|
|qt|Quantidade|


In [0]:
display(df)

CHANGEDAT,CHANGEDBY,CREATEDAT,CREATEDBY,CURRENCY,DEPTH,DIMENSIONUNIT,HEIGHT,PRICE,PRODCATEGORYID,PRODUCTID,PRODUCTPICURL,QUANTITYUNIT,SUPPLIER_PARTNERID,TAXTARIFFCODE,TYPECODE,WEIGHTMEASURE,WEIGHTUNIT,WIDTH,dt_ingestion
20181003,14,20181003,14,USD,,,,288,CB,CB-1163,,EA,100000035,1,PR,16.0,KG,,01-10-2024
20181003,1,20181003,1,USD,,,,699,HB,HB-1171,,EA,100000036,1,PR,11.0,KG,,01-10-2024
20181003,1,20181003,1,USD,,,,799,HB,HB-1172,,EA,100000037,1,PR,12.1,KG,,01-10-2024
20181003,3,20181003,3,USD,,,,649,HB,HB-1173,,EA,100000038,1,PR,13.5,KG,,01-10-2024
20181003,3,20181003,3,USD,,,,379,HB,HB-1174,,EA,100000039,1,PR,11.8,KG,,01-10-2024
20181003,7,20181003,7,USD,,,,899,HB,HB-1175,,EA,100000040,1,PR,12.5,KG,,01-10-2024
20181003,5,20181003,5,USD,,,,1199,HB,HB-1176,,EA,100000041,1,PR,12.9,KG,,01-10-2024
20181003,12,20181003,12,USD,,,,3000,EB,EB-1135,,EA,100000030,1,PR,19.3,KG,,01-10-2024
20181003,7,20181003,7,USD,,,,5000,EB,EB-1136,,EA,100000031,1,PR,20.5,KG,,01-10-2024
20181003,6,20181003,6,USD,,,,7900,EB,EB-1137,,EA,100000032,1,PR,22.0,KG,,01-10-2024


In [0]:
df = (df
.withColumnRenamed('CHANGEDAT', 'dt_change')
.withColumnRenamed('CHANGEDBY', 'cd_changed_by')
.withColumnRenamed('CREATEDAT', 'dt_create')
.withColumnRenamed('CREATEDBY', 'cd_created_by')
.withColumnRenamed('CURRENCY', 'ds_currency')
.withColumnRenamed('DEPTH', 'ds_depth')
.withColumnRenamed('DIMENSIONUNIT', 'ds_dimension_unit')
.withColumnRenamed('HEIGHT', 'ds_height')
.withColumnRenamed('PRICE', 'vl_price')
.withColumnRenamed('PRODCATEGORYID', 'cd_prod_category')
.withColumnRenamed('PRODUCTID', 'cd_product')
.withColumnRenamed('PRODUCTPICURL', 'ds_product_picurl')
.withColumnRenamed('QUANTITYUNIT', 'ds_quantity_unit')
.withColumnRenamed('SUPPLIER_PARTNERID', 'cd_supplier_partner')
.withColumnRenamed('TAXTARIFFCODE', 'cd_tax_tariff')
.withColumnRenamed('TYPECODE', 'ds_type_code')
.withColumnRenamed('WEIGHTMEASURE', 'ds_weight_measure')
.withColumnRenamed('WEIGHTUNIT', 'ds_weight_unit')
.withColumnRenamed('WIDTH', 'ds_widht')
.withColumnRenamed('dt_ingestion', 'dt_ingestion'))

display(df)

dt_change,cd_changed_by,dt_create,cd_created_by,ds_currency,ds_depth,ds_dimension_unit,ds_height,vl_price,cd_prod_category,cd_product,ds_product_picurl,ds_quantity_unit,cd_supplier_partner,cd_tax_tariff,ds_type_code,ds_weight_measure,ds_weight_unit,ds_widht,dt_ingestion
20181003,14,20181003,14,USD,,,,288,CB,CB-1163,,EA,100000035,1,PR,16.0,KG,,01-10-2024
20181003,1,20181003,1,USD,,,,699,HB,HB-1171,,EA,100000036,1,PR,11.0,KG,,01-10-2024
20181003,1,20181003,1,USD,,,,799,HB,HB-1172,,EA,100000037,1,PR,12.1,KG,,01-10-2024
20181003,3,20181003,3,USD,,,,649,HB,HB-1173,,EA,100000038,1,PR,13.5,KG,,01-10-2024
20181003,3,20181003,3,USD,,,,379,HB,HB-1174,,EA,100000039,1,PR,11.8,KG,,01-10-2024
20181003,7,20181003,7,USD,,,,899,HB,HB-1175,,EA,100000040,1,PR,12.5,KG,,01-10-2024
20181003,5,20181003,5,USD,,,,1199,HB,HB-1176,,EA,100000041,1,PR,12.9,KG,,01-10-2024
20181003,12,20181003,12,USD,,,,3000,EB,EB-1135,,EA,100000030,1,PR,19.3,KG,,01-10-2024
20181003,7,20181003,7,USD,,,,5000,EB,EB-1136,,EA,100000031,1,PR,20.5,KG,,01-10-2024
20181003,6,20181003,6,USD,,,,7900,EB,EB-1137,,EA,100000032,1,PR,22.0,KG,,01-10-2024


In [0]:
#Tratamento das colunas de data
df = df.withColumn(
    "dt_change",
    expr("substring(dt_change, 7, 2) || '-' || substring(dt_change, 5, 2) || '-' || substring(dt_change, 1, 4)")
)
df = df.withColumn(
    "dt_create",
    expr("substring(dt_create, 7, 2) || '-' || substring(dt_create, 5, 2) || '-' || substring(dt_create, 1, 4)")
)

display(df)

dt_change,cd_changed_by,dt_create,cd_created_by,ds_currency,ds_depth,ds_dimension_unit,ds_height,vl_price,cd_prod_category,cd_product,ds_product_picurl,ds_quantity_unit,cd_supplier_partner,cd_tax_tariff,ds_type_code,ds_weight_measure,ds_weight_unit,ds_widht,dt_ingestion
03-10-2018,14,03-10-2018,14,USD,,,,288,CB,CB-1163,,EA,100000035,1,PR,16.0,KG,,01-10-2024
03-10-2018,1,03-10-2018,1,USD,,,,699,HB,HB-1171,,EA,100000036,1,PR,11.0,KG,,01-10-2024
03-10-2018,1,03-10-2018,1,USD,,,,799,HB,HB-1172,,EA,100000037,1,PR,12.1,KG,,01-10-2024
03-10-2018,3,03-10-2018,3,USD,,,,649,HB,HB-1173,,EA,100000038,1,PR,13.5,KG,,01-10-2024
03-10-2018,3,03-10-2018,3,USD,,,,379,HB,HB-1174,,EA,100000039,1,PR,11.8,KG,,01-10-2024
03-10-2018,7,03-10-2018,7,USD,,,,899,HB,HB-1175,,EA,100000040,1,PR,12.5,KG,,01-10-2024
03-10-2018,5,03-10-2018,5,USD,,,,1199,HB,HB-1176,,EA,100000041,1,PR,12.9,KG,,01-10-2024
03-10-2018,12,03-10-2018,12,USD,,,,3000,EB,EB-1135,,EA,100000030,1,PR,19.3,KG,,01-10-2024
03-10-2018,7,03-10-2018,7,USD,,,,5000,EB,EB-1136,,EA,100000031,1,PR,20.5,KG,,01-10-2024
03-10-2018,6,03-10-2018,6,USD,,,,7900,EB,EB-1137,,EA,100000032,1,PR,22.0,KG,,01-10-2024


In [0]:
#Formatar as colunas
df = (df
.withColumn('dt_change', F.to_date(F.col("dt_change"), "dd-MM-yyyy"))
.withColumn('cd_changed_by', df['cd_changed_by'].cast('string'))
.withColumn('dt_create', F.to_date(F.col("dt_create"), "dd-MM-yyyy"))
.withColumn('cd_created_by', df['cd_created_by'].cast('string'))
.withColumn('ds_currency', df['ds_currency'].cast('string'))
.withColumn('ds_depth', df['ds_depth'].cast('string'))
.withColumn('ds_dimension_unit', df['ds_dimension_unit'].cast('string'))
.withColumn('ds_height', df['ds_height'].cast('string'))
.withColumn('vl_price', df['vl_price'].cast(DecimalType(10,2)))
.withColumn('cd_prod_category', df['cd_prod_category'].cast('string'))
.withColumn('cd_product', df['cd_product'].cast('string'))
.withColumn('ds_product_picurl', df['ds_product_picurl'].cast('string'))
.withColumn('ds_quantity_unit', df['ds_quantity_unit'].cast('string'))
.withColumn('cd_supplier_partner', df['cd_supplier_partner'].cast('string'))
.withColumn('cd_tax_tariff', df['cd_tax_tariff'].cast('string'))
.withColumn('ds_type_code', df['ds_type_code'].cast('string'))
.withColumn('ds_weight_measure', df['ds_weight_measure'].cast(DecimalType(10,1)))
.withColumn('ds_weight_unit', df['ds_weight_unit'].cast('string'))
.withColumn('ds_widht', df['ds_widht'].cast('string')))

display(df)

dt_change,cd_changed_by,dt_create,cd_created_by,ds_currency,ds_depth,ds_dimension_unit,ds_height,vl_price,cd_prod_category,cd_product,ds_product_picurl,ds_quantity_unit,cd_supplier_partner,cd_tax_tariff,ds_type_code,ds_weight_measure,ds_weight_unit,ds_widht,dt_ingestion
2018-10-03,14,2018-10-03,14,USD,,,,288.0,CB,CB-1163,,EA,100000035,1,PR,16.0,KG,,01-10-2024
2018-10-03,1,2018-10-03,1,USD,,,,699.0,HB,HB-1171,,EA,100000036,1,PR,11.0,KG,,01-10-2024
2018-10-03,1,2018-10-03,1,USD,,,,799.0,HB,HB-1172,,EA,100000037,1,PR,12.1,KG,,01-10-2024
2018-10-03,3,2018-10-03,3,USD,,,,649.0,HB,HB-1173,,EA,100000038,1,PR,13.5,KG,,01-10-2024
2018-10-03,3,2018-10-03,3,USD,,,,379.0,HB,HB-1174,,EA,100000039,1,PR,11.8,KG,,01-10-2024
2018-10-03,7,2018-10-03,7,USD,,,,899.0,HB,HB-1175,,EA,100000040,1,PR,12.5,KG,,01-10-2024
2018-10-03,5,2018-10-03,5,USD,,,,1199.0,HB,HB-1176,,EA,100000041,1,PR,12.9,KG,,01-10-2024
2018-10-03,12,2018-10-03,12,USD,,,,3000.0,EB,EB-1135,,EA,100000030,1,PR,19.3,KG,,01-10-2024
2018-10-03,7,2018-10-03,7,USD,,,,5000.0,EB,EB-1136,,EA,100000031,1,PR,20.5,KG,,01-10-2024
2018-10-03,6,2018-10-03,6,USD,,,,7900.0,EB,EB-1137,,EA,100000032,1,PR,22.0,KG,,01-10-2024


In [0]:
delta = "/tmp/silver/tb_products"
df.write.partitionBy("dt_ingestion").format("delta").mode("overwrite").save(delta)

In [0]:
%fs ls /tmp/silver/tb_products

path,name,size,modificationTime
dbfs:/tmp/silver/tb_products/_delta_log/,_delta_log/,0,0
dbfs:/tmp/silver/tb_products/dt_ingestion=01-10-2024/,dt_ingestion=01-10-2024/,0,0
dbfs:/tmp/silver/tb_products/dt_ingestion=30-09-2024/,dt_ingestion=30-09-2024/,0,0
