In [0]:
%run ../_utils


# Camada bronze

Na camada bronze, nenhuma limpeza ou regra de negócio devem ser aplicadas aos dados.

só vamos ler em parquet e salvar em delta.

Vamos também utilizar da tabela de controle para termos o milestone da ultima execução (aqui nao será utilizado de fato, mas é interessante justamenete para o UPSERT)

## 1 - Processamento da camada bronze

Aqui vamos adicionar uma estrutura que permite um laço de repetição.

O laço será responsável por armazenar os dados e criar tabela delta para cada "entidade" definida no diagrama ER

In [0]:
data = {
    "table_name": [
        "customers",
        "orders",
        "geolocation",
        "products",
        "order_items",
        "sellers",
        "order_payments",
        "product_category_name_translation",
        "order_reviews",
    ],
    "dataset_location": [
        "olist_customers_dataset",
        "olist_orders_dataset",
        "olist_geolocation_dataset",
        "olist_products_dataset",
        "olist_order_items_dataset",
        "olist_sellers_dataset",
        "olist_order_payments_dataset",
        "product_category_name_translation",
        "olist_order_reviews_dataset",
    ],
}


bronze_tables = list(zip(data["table_name"], data["dataset_location"]))


## 2 - Persistência

Estamos pegando os arquivos em parquet (passo apenas didático), salvando os dados em delta e criando as tabelas delta

In [0]:
for table_name, dataset_location in bronze_tables:
    # read data
    parquet_location = f"/FileStore/parquet/brazilian_ecommerce/{dataset_location}"
    target_location = f"dbfs:/delta/brazilian_ecommerce/{dataset_location}/bronze"

    df = spark.read.parquet(parquet_location)
    tb_name = f"olist_bronze.{table_name}"

    save_dataframe(df, format_mode="delta", table_name=tb_name, target_location=target_location)

    create_table(table_name=tb_name, target_location=target_location)
    
    print()

[LOG] Saving olist_bronze.customers delta on dbfs:/delta/brazilian_ecommerce/olist_customers_dataset/bronze... OK!
[LOG] Creating delta table olist_bronze.customers on dbfs:/delta/brazilian_ecommerce/olist_customers_dataset/bronze... OK!

[LOG] Saving olist_bronze.orders delta on dbfs:/delta/brazilian_ecommerce/olist_orders_dataset/bronze... OK!
[LOG] Creating delta table olist_bronze.orders on dbfs:/delta/brazilian_ecommerce/olist_orders_dataset/bronze... OK!

[LOG] Saving olist_bronze.geolocation delta on dbfs:/delta/brazilian_ecommerce/olist_geolocation_dataset/bronze... OK!
[LOG] Creating delta table olist_bronze.geolocation on dbfs:/delta/brazilian_ecommerce/olist_geolocation_dataset/bronze... OK!

[LOG] Saving olist_bronze.products delta on dbfs:/delta/brazilian_ecommerce/olist_products_dataset/bronze... OK!
[LOG] Creating delta table olist_bronze.products on dbfs:/delta/brazilian_ecommerce/olist_products_dataset/bronze... OK!

[LOG] Saving olist_bronze.order_items delta on dbfs:

In [0]:
dbutils.notebook.exit("OK")

In [0]:
%sql

select count(*) from olist_bronze.order_items

count(1)
112650


In [0]:
%sql

select * from olist_bronze.order_items

order_id,order_item_id,product_id,seller_id,shipping_limit_date,price,freight_value
00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,48436dade18ac8b2bce089ec2a041202,2017-09-19T09:45:35Z,58.9,13.29
00018f77f2f0320c557190d7a144bdd3,1,e5f2d52b802189ee658865ca93d83a8f,dd7ddc04e1b6c2c614352b383efe2d36,2017-05-03T11:05:13Z,239.9,19.93
000229ec398224ef6ca0657da4fc703e,1,c777355d18b72b67abbeef9df44fd0fd,5b51032eddd242adc84c38acab88f23d,2018-01-18T14:48:30Z,199.0,17.87
00024acbcdf0a6daa1e931b038114c75,1,7634da152a4610f1595efa32f14722fc,9d7a1d34a5052409006425275ba1c2b4,2018-08-15T10:10:18Z,12.99,12.79
00042b26cf59d7ce69dfabb4e55b4fd9,1,ac6c3623068f30de03045865e4e10089,df560393f3a51e74553ab94004ba5c87,2017-02-13T13:57:51Z,199.9,18.14
00048cc3ae777c65dbb7d2a0634bc1ea,1,ef92defde845ab8450f9d70c526ef70f,6426d21aca402a131fc0a5d0960a3c90,2017-05-23T03:55:27Z,21.9,12.69
00054e8431b9d7675808bcb819fb4a32,1,8d4f2bb7e93e6710a28f34fa83ee7d28,7040e82f899a04d1b434b795a43b4617,2017-12-14T12:10:31Z,19.9,11.85
000576fe39319847cbb9d288c5617fa6,1,557d850972a7d6f792fd18ae1400d9b6,5996cddab893a4652a15592fb58ab8db,2018-07-10T12:30:45Z,810.0,70.75
0005a1a1728c9d785b8e2b08b904576c,1,310ae3c140ff94b03219ad0adc3c778f,a416b6a846a11724393025641d4edd5e,2018-03-26T18:31:29Z,145.95,11.65
0005f50442cb953dcd1d21e1fb923495,1,4535b0e1091c278dfd193e5a1d63b39f,ba143b05f0110f0dc71ad71b4466ce92,2018-07-06T14:10:56Z,53.99,11.4


In [0]:
%sql

select * from olist_bronze.order_reviews

review_id,order_id,review_score,review_comment_title,review_comment_message,review_creation_date,review_answer_timestamp
7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,,,2018-01-18T00:00:00Z,2018-01-18T21:46:59Z
80e641a11e56f04c1ad469d5645fdfde,a548910a1c6147796b98fdf73dbeba33,5,,,2018-03-10T00:00:00Z,2018-03-11T03:05:13Z
228ce5500dc1d8e020d8d1322874b6f0,f9e4b658b201a9f2ecdecbb34bed034b,5,,,2018-02-17T00:00:00Z,2018-02-18T14:36:24Z
e64fb393e7b32834bb789ff8bb30750e,658677c97b385a9be170737859d3511b,5,,Recebi bem antes do prazo estipulado.,2017-04-21T00:00:00Z,2017-04-21T22:02:06Z
f7c4243c7fe1938f181bec41a392bdeb,8e6bfb81e283fa7e4f11123a3fb894f1,5,,Parabéns lojas lannister adorei comprar pela Internet seguro e prático Parabéns a todos feliz Páscoa,2018-03-01T00:00:00Z,2018-03-02T10:26:53Z
15197aa66ff4d0650b5434f1b46cda19,b18dcdf73be66366873cd26c5724d1dc,1,,,2018-04-13T00:00:00Z,2018-04-16T00:39:37Z
07f9bee5d1b850860defd761afa7ff16,e48aa0d2dcec3a2e87348811bcfdf22b,5,,,2017-07-16T00:00:00Z,2017-07-18T19:30:34Z
7c6400515c67679fbee952a7525281ef,c31a859e34e3adac22f376954e19b39d,5,,,2018-08-14T00:00:00Z,2018-08-14T21:36:06Z
a3f6f7f6f433de0aefbb97da197c554c,9c214ac970e84273583ab523dfafd09b,5,,,2017-05-17T00:00:00Z,2017-05-18T12:05:37Z
8670d52e15e00043ae7de4c01cc2fe06,b9bf720beb4ab3728760088589c62129,4,recomendo,aparelho eficiente. no site a marca do aparelho esta impresso como 3desinfector e ao chegar esta com outro nome...atualizar com a marca correta uma vez que é o mesmo aparelho,2018-05-22T00:00:00Z,2018-05-23T16:45:47Z


In [0]:
%sql

select * from olist_bronze.sellers

seller_id,seller_zip_code_prefix,seller_city,seller_state
3442f8959a84dea7ee197c632cb2df15,13023,campinas,SP
d1b65fc7debc3361ea86b5f14c68d2e2,13844,mogi guacu,SP
ce3ad9de960102d0677a81f5d0bb7b2d,20031,rio de janeiro,RJ
c0f3eea2e14555b6faeea3dd58c1b1c3,4195,sao paulo,SP
51a04a8a6bdcb23deccc82b0b80742cf,12914,braganca paulista,SP
c240c4061717ac1806ae6ee72be3533b,20920,rio de janeiro,RJ
e49c26c3edfa46d227d5121a6b6e4d37,55325,brejao,PE
1b938a7ec6ac5061a66a3766e0e75f90,16304,penapolis,SP
768a86e36ad6aae3d03ee3c6433d61df,1529,sao paulo,SP
ccc4bbb5f32a6ab2b7066a4130f114e3,80310,curitiba,PR
