In [0]:
%run ../_utils

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m


In [0]:
from pyspark.sql.functions import col, sum, count


# Camada GOLD

Na camada gold, as limpezas e ajustes já foram feitos, então essa camada é responsável por aplicar regras de negócio, agregações e junções de dados que convirjam para analises.

Essa é uma tabela sumarizada analítica.

O objetivo dela é informar o total de venda bruta por cada vendedor (aqui temos cidade)

In [0]:
tb_name = "olist_gold.total_orders_profit_by_seller_city"
dataset_location = "olist_total_orders_profitprofi_dataset"
target_location = f"dbfs:/FileStore/delta/brazilian_ecommerce/{dataset_location}/gold"

## 1 - Data ingestion


In [0]:
df_order_items = spark.read.table("olist_silver.order_items") # temos o seller_id
df_sellers = spark.read.table("olist_silver.sellers")


## 2 - preparation

In [0]:
df = df_order_items.join(df_sellers, on=['seller_id'], how="inner")

seller_id,order_id,order_item_id,product_id,shipping_limit_date,price,freight_value,seller_zip_code_prefix,seller_city,seller_state
48436dade18ac8b2bce089ec2a041202,00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,2017-09-19T09:45:35Z,58.9,13.29,27277,volta redonda,SP
dd7ddc04e1b6c2c614352b383efe2d36,00018f77f2f0320c557190d7a144bdd3,1,e5f2d52b802189ee658865ca93d83a8f,2017-05-03T11:05:13Z,239.9,19.93,3471,sao paulo,SP
5b51032eddd242adc84c38acab88f23d,000229ec398224ef6ca0657da4fc703e,1,c777355d18b72b67abbeef9df44fd0fd,2018-01-18T14:48:30Z,199.0,17.87,37564,borda da mata,MG
9d7a1d34a5052409006425275ba1c2b4,00024acbcdf0a6daa1e931b038114c75,1,7634da152a4610f1595efa32f14722fc,2018-08-15T10:10:18Z,12.99,12.79,14403,franca,SP
df560393f3a51e74553ab94004ba5c87,00042b26cf59d7ce69dfabb4e55b4fd9,1,ac6c3623068f30de03045865e4e10089,2017-02-13T13:57:51Z,199.9,18.14,87900,loanda,PR
6426d21aca402a131fc0a5d0960a3c90,00048cc3ae777c65dbb7d2a0634bc1ea,1,ef92defde845ab8450f9d70c526ef70f,2017-05-23T03:55:27Z,21.9,12.69,14091,ribeirao preto,SP
7040e82f899a04d1b434b795a43b4617,00054e8431b9d7675808bcb819fb4a32,1,8d4f2bb7e93e6710a28f34fa83ee7d28,2017-12-14T12:10:31Z,19.9,11.85,1026,sao paulo,SP
5996cddab893a4652a15592fb58ab8db,000576fe39319847cbb9d288c5617fa6,1,557d850972a7d6f792fd18ae1400d9b6,2018-07-10T12:30:45Z,810.0,70.75,19010,presidente prudente,SP
a416b6a846a11724393025641d4edd5e,0005a1a1728c9d785b8e2b08b904576c,1,310ae3c140ff94b03219ad0adc3c778f,2018-03-26T18:31:29Z,145.95,11.65,3702,sao paulo,SP
ba143b05f0110f0dc71ad71b4466ce92,0005f50442cb953dcd1d21e1fb923495,1,4535b0e1091c278dfd193e5a1d63b39f,2018-07-06T14:10:56Z,53.99,11.4,2274,sao paulo,SP


In [0]:
df = df.groupBy("seller_city").agg(
    count("order_id").alias("total_orders_by_seller_city"),
    sum("price").alias("total_items_sell_profit"), # total de arrecadação bruta por produto vendido
    sum("freight_value").alias("total_freight_profit") # total de arrecadação bruta por fretes
).withColumn("total_profit", col("total_items_sell_profit") + col("total_freight_profit")) # total arrecadação bruta

In [0]:
display(df)

seller_city,total_orders_by_seller_city,total_items_sell_profit,total_freight_profit,total_profit
igrejinha,3,314.96000000000004,79.25999999999999,394.22
brusque,393,47626.85000000012,10807.520000000008,58434.370000000126
buritama,15,2575.9000000000005,341.47,2917.370000000001
ipaussu,74,7268.0,1431.3200000000004,8699.32
carapicuiba,214,21859.62000000002,3615.1,25474.720000000023
garca,115,22979.630000000023,2612.8700000000003,25592.500000000025
sao joao de meriti,50,4970.830000000001,832.7500000000002,5803.580000000001
fernando prestes,3,86.6,38.67,125.26999999999998
araras,46,10790.85,912.2500000000002,11703.1
nova friburgo,90,62579.26000000003,2673.8599999999988,65253.12000000003



## Saving data

In [0]:
save_dataframe(df, format_mode="delta", table_name=tb_name, target_location=target_location)

[LOG] Saving olist_gold.total_orders_profit_by_seller_city delta on dbfs:/FileStore/delta/brazilian_ecommerce/olist_total_orders_profitprofi_dataset/gold... OK!



## create delta table

In [0]:
create_table(table_name=tb_name, target_location=target_location)

[LOG] Creating delta table olist_gold.total_orders_profit_by_seller_city on dbfs:/FileStore/delta/brazilian_ecommerce/olist_total_orders_profitprofi_dataset/gold... OK!


In [0]:
# exit para fechar a execução
dbutils.notebook.exit("OK")

In [0]:
%sql

select * from olist_gold.total_orders_profit_by_seller_city

seller_city,total_orders_by_seller_city,total_items_sell_profit,total_freight_profit,total_profit
igrejinha,3,314.96000000000004,79.25999999999999,394.22
brusque,393,47626.85000000012,10807.520000000008,58434.370000000126
buritama,15,2575.9000000000005,341.47,2917.370000000001
ipaussu,74,7268.0,1431.3200000000004,8699.32
carapicuiba,214,21859.62000000002,3615.1,25474.720000000023
garca,115,22979.630000000023,2612.8700000000003,25592.500000000025
sao joao de meriti,50,4970.830000000001,832.7500000000002,5803.580000000001
fernando prestes,3,86.6,38.67,125.26999999999998
araras,46,10790.85,912.2500000000002,11703.1
nova friburgo,90,62579.26000000003,2673.8599999999988,65253.12000000003
