# Base Lucro Bruto + View

Esta base faz merge dos dados do Lucro Bruto com os dados da View v18


Esta base extrai 3 diferentes niveis de granulariedade e as colunas presentes dependem deste nivel

Nivel: Data Fatura + Oridem + Item [Chave: Ordem + Item]
Nivel: Data Fatura + SKU [Chave: SKU]
Nivel: Data Fatura + GPD [Chave: GPD]

O código desenvolvido em PySpark

Utilizando boto3 para fazer query direto via Athena para dados da View
E carregando .parquet do LucroBruto


## Imports & Configs

In [None]:
#imports Libs

import os
import pandas as pd
import boto3
import time
from botocore.client import ClientError

import pyarrow.parquet as pq
import s3fs

import findspark                                              #Import library to Search for Spark Installation  

findspark.init()                                              #Search Spark Installation

import pyspark                                                #Only run after findspark.init()

from pyspark.sql import SparkSession                          #Import of Spark Session
from pyspark import SparkContext as spark                     #Import the Regular Spark Contex 
from pyspark.sql import SQLContext                            #Import the SQL Spark Contex 
from pyspark.sql.window import Window
from pyspark.sql.functions import *
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql.functions import year, month, dayofmonth
spark = SparkSession.builder.getOrCreate()



sc = spark.sparkContext                                       #Initialize Spark

---------------

# Creating Variables

## Database

In [None]:
ATHENA_SPG = os.environ['SPG_DATABASE']

In [None]:
ATHENA_HANA = os.environ['GERDAU_HANA_DATABASE']

In [None]:
ATHENA_SALES = os.environ['GERDAU_SALES_DATABASE']

## Buckets

In [None]:
SPG_MANUAL_INPUT_BUCKET = os.environ['MANUAL_INPUT_BUCKET']

In [None]:
SPG_INTEGRATION_INPUT_BUCKET = os.environ['INTEGRATION_INPUT_BUCKET']

In [None]:
SPG_QUERY_BUCKET = os.environ['QUERY_BUCKET']

In [None]:
GERDAU_BUCKET = os.environ['GERDAU_BUCKET']

In [None]:
SPG_OUTPUT_BUCKET = os.environ['OUTPUT_BUCKET']

In [None]:
SPG_INPUT_BUCKET = os.environ['INPUT_BUCKET_FROM_OUTPUT']

## Input Paths

In [None]:
QUERY_VIEW = "SELECT billing_date, sales_order_date, sales_org_cod,gpm, gpd_cod, gpd_desc, material_cod, material_desc, sales_number, sales_item, issuing_city, issuing_state, quantity_ton, practiced_price, preco_prat_norm, preco_politica, preco_lista, rbv, rlv, receiving_customer_cod, branch  FROM db_smart_pricing.vw_strategy_sample_v18  WHERE (billing_date >= '2019-01-01' or sales_order_date >= '2019-01-01')  AND Fabricante LIKE '%Praticado%';"

In [None]:
SPG_INPUT_BUCKET_GP = "SPG_LB/LB_FULL"

## Output Paths

In [None]:
SPG_OUTPUT_BUCKET_GP_STRATEGIC = "SPG_LB/LB_STRATEGIC"

In [None]:
SPG_OUTPUT_BUCKET_GP_ORDER_ITEM = "SPG_LB/LB_OrdemItem"

In [None]:
SPG_OUTPUT_BUCKET_GP_SALES_DATE = "SPG_LB/LB_SalesOrderDate"

In [None]:
SPG_OUTPUT_BUCKET_GP_SKU = "SPG_LB/LB_SKU"

In [None]:
SPG_OUTPUT_BUCKET_GP_GPD = "SPG_LB/LB_GPD"

In [None]:
SPG_OUTPUT_BUCKET_GP_OV = "SPG_LB/LB_OV"

In [None]:
SPG_OUTPUT_BUCKET_GP_SOFC = "SPG_LB/LB_SOFC"

In [None]:
SPG_OUTPUT_BUCKET_GP_SF = "SPG_LB/LB_SF.csv"

## Boto3 Variables

In [None]:
#S3 Configuration
S3_ATHENA_INPUT =  's3://'+SPG_QUERY_BUCKET+'/'+SPG_QUERY_BUCKET_ATHENA

In [None]:
S3_ATHENA_OUTPUT = 's3://'+SPG_QUERY_BUCKET+'/'+SPG_QUERY_BUCKET_ATHENA

In [None]:
region_name = os.environ['AWS_REGION']

In [None]:
aws_access_key_id = os.environ['AWS_ACCESS_KEY']

In [None]:
aws_secret_access_key = os.environ['AWS_SECRET_KEY']

----------------

# Creating Defined Functions

In [None]:
# Run Query

def run_query(query, database, s3_output):
    response = client.start_query_execution(
        QueryString=query,
        QueryExecutionContext={
            'Database': database
            },
        ResultConfiguration={
            'OutputLocation': s3_output,
            }
        )
    return response

In [None]:
def get_aws_path(query,database,s3_output):
    response = run_query(query, database, s3_output)
    file_query = response['QueryExecutionId']
    file_metadata = response['QueryExecutionId'] + '.metadata'
    return file_query

In [None]:
# Wating for 300 seconds until the end of the upload

def wait_athena_load(Bucket, Key):
    time_to_wait = 300
    time_counter = 0

    while True:
        try:
            s3.meta.client.head_object(Bucket=Bucket,Key=Key)
        except ClientError:
            time.sleep(1)
            time_counter += 1
            if time_counter > time_to_wait:
                break
        else:
            break

----------

# Configuring Boto3

In [None]:
#Athena Client Configuration

client = boto3.client('athena', 
    aws_access_key_id = aws_access_key_id, 
    aws_secret_access_key = aws_secret_access_key, 
    region_name = region_name )

In [None]:
#S3 Resource Configuration

s3 = boto3.resource('s3',
    aws_access_key_id = aws_access_key_id,
    aws_secret_access_key = aws_secret_access_key,
    region_name = region_name)

---------------

# Importing Tables

In [None]:
# import Lucro Bruto

df_lb_full = spark.read.parquet("s3a://"+SPG_INPUT_BUCKET+"/"+SPG_INPUT_BUCKET_GP)

In [None]:
# Import CSV from View

athena_response = get_aws_path(QUERY_VIEW,ATHENA_SPG,S3_ATHENA_OUTPUT)

wait_athena_load(SPG_QUERY_BUCKET, SPG_QUERY_BUCKET_ATHENA+"/"+athena_response+".csv")

In [None]:
df_view = spark.read.csv("s3a://"+SPG_QUERY_BUCKET+"/"+SPG_QUERY_BUCKET_ATHENA+"/"+athena_response+".csv", header = 'true')

-----------

# Preparing Table

## TESTE
* Import output LB-BW de Teste com Cálculo de Rateio do Custo de Expedi

In [None]:
# BKP_DF 

ordem_view =  df_view

df_lb = df_lb_full.drop("GTC101018", 'GTC100511', 'GTC100020')

df_lb = df_lb.withColumn("Quantity_ton_bw", col("Quantity_ton").cast("float"))\

In [None]:
df_lb_full = df_lb_full \
    .withColumnRenamed("GTC101018","issuing_state")\
    .withColumnRenamed("Data","sales_order_date")\
    .withColumnRenamed("Sales_Number_Item","sales_item")\
    .withColumnRenamed("Sales_Number","sales_number")\
    .withColumnRenamed("GPD_cod","gpd_cod")\
    .withColumnRenamed("GTC100511","sales_org_cod")\
    .withColumnRenamed("GTC100020","material_cod")\
    .withColumn('year', year(col('sales_order_date'))) \
    .withColumn('month', month(col('sales_order_date')))\
    .withColumn('KEY_LB', concat(col('sales_number'),lit('_'),col('sales_item')))\
    .withColumn('UF', concat(lit('BR/'),col('issuing_state')))\
    .withColumn('FREIGHT_Full', col('SHIP_DEL_LOAD_C_C') + col('PORT_EXPENSES') + col('FREIGHT') + col('COGS_ADJUSTMENTS'))

In [None]:
df_lb_full = df_lb_full\
    .withColumn("sales_item", col("sales_item").cast("int"))\
    .withColumn("Quantity_ton_bw", col("Quantity_ton").cast("float"))\
    .withColumn("sales_number", col("sales_number").cast("int"))\
    .withColumn("gpd_cod", col("gpd_cod").cast("int"))\
    .withColumn("material_cod", col("material_cod").cast("int"))\

In [None]:
filtred_OV = ['BRIN','BRIO','BRDI','BRDO','BRCC','BRCO','BRCG','BRGO']

df_lb_full = df_lb_full.where(df_lb_full.sales_org_cod.isin(filtred_OV))

In [None]:
df_lb_full=df_lb_full.withColumn("sales_org_cod", when(df_lb_full.sales_org_cod.like("BRCO"), "BRCC")\
                                                    .otherwise(when(df_lb_full.sales_org_cod.like("BRGO"), "BRCG")\
                                                              .otherwise(when(df_lb_full.sales_org_cod.like("BRIO"), "BRIN")\
                                                                        .otherwise(when(df_lb_full.sales_org_cod.like("BRDO"), "BRDI")
                                                                                  .otherwise(df_lb_full.sales_org_cod)))))\

In [None]:
df_lb_full=df_lb_full.join(df_view.select(df_view.material_cod.cast("int")
                                          ,df_view.sales_org_cod
                                          ,df_view.gpd_cod.cast("int").alias("gpd_cod_drop")
                                          ,df_view.gpd_desc).distinct(),on=['material_cod','sales_org_cod'],how='left')

In [None]:
df_lb_full=df_lb_full.withColumn("gpd_cod", when(df_lb_full.gpd_cod.isNull(), df_lb_full.gpd_cod_drop)\
                                                .otherwise(df_lb_full.gpd_cod))\
                     .drop("gpd_cod_drop")

In [None]:
#df_lb_full=df_lb_full.filter(~df_lb_full.gpd_desc.isNull())

## Ordem + Item

In [None]:
# Cria Coluna Mês e Ano

ordem_view = ordem_view \
            .withColumn('year', year(col('billing_date'))) \
            .withColumn('month', month(col('billing_date')))

In [None]:
# Group BY ordem_item por data

ordem_item = ordem_view \
    .withColumn("pp_x_volume", col("practiced_price") * col("quantity_ton")) \
    .withColumn("ppn_x_volume", col("preco_prat_norm") * col("quantity_ton")) \
    .groupBy('year', 'month', 'sales_order_date','gpm','gpd_cod','gpd_desc', 'material_desc', 'material_cod' ,'sales_number','sales_item','sales_org_cod','receiving_customer_cod','branch','issuing_state') \
    .agg(F.sum("quantity_ton").alias("quantity_ton"), \
         F.sum("rbv").alias("rbv"), \
         F.sum("rlv").alias("rlv"), \
         F.sum("practiced_price").alias("sum_practiced_price"), \
         F.sum("pp_x_volume").alias("sum_pp_x_volume"), \
         F.sum("preco_prat_norm").alias("sum_preco_prat_norm"), \
         F.sum("ppn_x_volume").alias("sum_ppn_x_volume")) \
    .withColumn("practiced_price_weighted_average", col("sum_practiced_price") / col("sum_pp_x_volume")) \
    .withColumn("preco_prat_norm_weighted_average", col("sum_preco_prat_norm") / col("sum_ppn_x_volume")) \
    .drop('pp_x_volume', 'ppn_x_volume', 'sum_practiced_price','sum_pp_x_volume','sum_preco_prat_norm','sum_ppn_x_volume')

## LB_View Ordem + Item

In [None]:
# Cria chave para Join

ordem_item = ordem_item \
    .withColumn('KEY_LB_DATA', concat(col('year'), lit('-'), col('month'), lit('_'),col('sales_number'), lit('_'), col('sales_item')))

# Sumariza Frete

df_lb = df_lb \
    .withColumn('FREIGHT_Full', col('SHIP_DEL_LOAD_C_C') + col('PORT_EXPENSES') + col('FREIGHT') + col('COGS_ADJUSTMENTS'))
                
  
# Select Colunas LB

df_lb = df_lb.select('KEY_LB_DATA', 'COGS_TOTAL','GROSS_PROFIT_BW','NET_SALES','FREIGHT_Full','Quantity_ton_bw', 'COMISSION_TO_AGENTS')


# Join Ordem View + Lucro Bruto

LB_View = ordem_item.join(df_lb, on=['KEY_LB_DATA'], how='left')

# Limpa nulls

LB_View = LB_View.filter(LB_View.GROSS_PROFIT_BW.isNotNull())

# Cria chave para View por Fatura

LB_View = LB_View \
    .withColumn('KEY_LB', concat(col('sales_number'), lit('_'), col('sales_item')))



## SALES ORDER DATE

In [None]:
sales_order_date = LB_View \
    .withColumn("pp_x_volume", col("practiced_price_weighted_average") * col("quantity_ton")) \
    .withColumn("ppn_x_volume", col("preco_prat_norm_weighted_average") * col("quantity_ton")) \
    .groupBy('KEY_LB','sales_order_date','gpm','gpd_cod','gpd_desc', 'material_desc', 'material_cod' ,'sales_number','sales_item') \
    .agg(F.sum("quantity_ton").alias("quantity_ton"), \
         F.sum("rbv").alias("rbv"), \
         F.sum("rlv").alias("rlv"), \
         F.sum("practiced_price_weighted_average").alias("sum_practiced_price"), \
         F.sum("pp_x_volume").alias("sum_pp_x_volume"), \
         F.sum("preco_prat_norm_weighted_average").alias("sum_preco_prat_norm"), \
         F.sum("ppn_x_volume").alias("sum_ppn_x_volume"), \
         F.sum("COGS_TOTAL").alias("COGS_TOTAL"), \
         F.sum("Quantity_ton_bw").alias("Quantity_ton_bw"), \
         F.sum("FREIGHT_Full").alias("FREIGHT_Full"), \
         F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BW"), \
         F.sum("NET_SALES").alias("NET_SALES")) \
    .withColumn("practiced_price_weighted_average", col("sum_practiced_price") / col("sum_pp_x_volume")) \
    .withColumn("preco_prat_norm_weighted_average", col("sum_preco_prat_norm") / col("sum_ppn_x_volume")) \
    .drop('pp_x_volume', 'ppn_x_volume', 'sum_practiced_price','sum_pp_x_volume','sum_preco_prat_norm','sum_ppn_x_volume')

## SKU OV FILIAL CLIENTE

In [None]:
# Group by SKU

sku_ov_filial_cliente = LB_View \
    .withColumn("pp_x_volume", col("practiced_price_weighted_average") * col("quantity_ton")) \
    .withColumn("ppn_x_volume", col("preco_prat_norm_weighted_average") * col("quantity_ton")) \
    .groupBy('year', 'month','gpm','gpd_cod','gpd_desc', 'material_desc', 'material_cod','receiving_customer_cod','branch','sales_org_cod') \
    .agg(F.sum("quantity_ton").alias("quantity_ton"), \
         F.sum("Quantity_ton_bw").alias("Quantity_ton_bw"), \
         F.sum("rbv").alias("rbv"), \
         F.sum("rlv").alias("rlv"), \
         F.sum("practiced_price_weighted_average").alias("sum_practiced_price"), \
         F.sum("pp_x_volume").alias("sum_pp_x_volume"), \
         F.sum("preco_prat_norm_weighted_average").alias("sum_preco_prat_norm"), \
         F.sum("ppn_x_volume").alias("sum_ppn_x_volume"), \
         F.sum("COGS_TOTAL").alias("COGS_TOTAL"), \
         F.sum("FREIGHT_Full").alias("FREIGHT_Full"), \
         F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BW"), \
         F.sum("NET_SALES").alias("NET_SALES")) \
    .withColumn("practiced_price_weighted_average", col("sum_practiced_price") / col("sum_pp_x_volume")) \
    .withColumn("preco_prat_norm_weighted_average", col("sum_preco_prat_norm") / col("sum_ppn_x_volume")) \
    .drop('pp_x_volume', 'ppn_x_volume', 'sum_practiced_price','sum_pp_x_volume','sum_preco_prat_norm','sum_ppn_x_volume')

## SKU

In [None]:
# Group by SKU/OV

sku = LB_View \
    .withColumn("pp_x_volume", col("practiced_price_weighted_average") * col("quantity_ton")) \
    .withColumn("ppn_x_volume", col("preco_prat_norm_weighted_average") * col("quantity_ton")) \
    .groupBy('year', 'month','gpm','gpd_cod','gpd_desc', 'material_desc', 'material_cod','sales_org_cod') \
    .agg(F.sum("quantity_ton").alias("quantity_ton"), \
         F.sum("Quantity_ton_bw").alias("Quantity_ton_bw"), \
         F.sum("rbv").alias("rbv"), \
         F.sum("rlv").alias("rlv"), \
         F.sum("practiced_price_weighted_average").alias("sum_practiced_price"), \
         F.sum("pp_x_volume").alias("sum_pp_x_volume"), \
         F.sum("preco_prat_norm_weighted_average").alias("sum_preco_prat_norm"), \
         F.sum("ppn_x_volume").alias("sum_ppn_x_volume"), \
         F.sum("COGS_TOTAL").alias("COGS_TOTAL"), \
         F.sum("FREIGHT_Full").alias("FREIGHT_Full"), \
         F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BW"), \
         F.sum("NET_SALES").alias("NET_SALES")) \
    .withColumn("practiced_price_weighted_average", col("sum_practiced_price") / col("sum_pp_x_volume")) \
    .withColumn("preco_prat_norm_weighted_average", col("sum_preco_prat_norm") / col("sum_ppn_x_volume")) \
    .drop('pp_x_volume', 'ppn_x_volume', 'sum_practiced_price','sum_pp_x_volume','sum_preco_prat_norm','sum_ppn_x_volume')

## OV

In [None]:
# Group by OV

sales_order = LB_View \
    .withColumn("pp_x_volume", col("practiced_price_weighted_average") * col("quantity_ton")) \
    .withColumn("ppn_x_volume", col("preco_prat_norm_weighted_average") * col("quantity_ton")) \
    .groupBy('year', 'month','gpm','gpd_cod','gpd_desc','sales_org_cod') \
    .agg(F.sum("quantity_ton").alias("quantity_ton"), \
         F.sum("Quantity_ton_bw").alias("Quantity_ton_bw"), \
         F.sum("rbv").alias("rbv"), \
         F.sum("rlv").alias("rlv"), \
         F.sum("practiced_price_weighted_average").alias("sum_practiced_price"), \
         F.sum("pp_x_volume").alias("sum_pp_x_volume"), \
         F.sum("preco_prat_norm_weighted_average").alias("sum_preco_prat_norm"), \
         F.sum("ppn_x_volume").alias("sum_ppn_x_volume"), \
         F.sum("COGS_TOTAL").alias("COGS_TOTAL"), \
         F.sum("FREIGHT_Full").alias("FREIGHT_Full"), \
         F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BW"), \
         F.sum("NET_SALES").alias("NET_SALES")) \
    .withColumn("practiced_price_weighted_average", col("sum_practiced_price") / col("sum_pp_x_volume")) \
    .withColumn("preco_prat_norm_weighted_average", col("sum_preco_prat_norm") / col("sum_ppn_x_volume")) \
    .drop('pp_x_volume', 'ppn_x_volume', 'sum_practiced_price','sum_pp_x_volume','sum_preco_prat_norm','sum_ppn_x_volume')

## GPD

In [None]:
# Group by GPD

gpd = LB_View \
    .withColumn("pp_x_volume", col("practiced_price_weighted_average") * col("quantity_ton")) \
    .withColumn("ppn_x_volume", col("preco_prat_norm_weighted_average") * col("quantity_ton")) \
    .groupBy('year', 'month','gpm','gpd_cod','gpd_desc') \
    .agg(F.sum("quantity_ton").alias("quantity_ton"), \
         F.sum("Quantity_ton_bw").alias("Quantity_ton_bw"), \
         F.sum("rbv").alias("rbv"), \
         F.sum("rlv").alias("rlv"), \
         F.sum("practiced_price_weighted_average").alias("sum_practiced_price"), \
         F.sum("pp_x_volume").alias("sum_pp_x_volume"), \
         F.sum("preco_prat_norm_weighted_average").alias("sum_preco_prat_norm"), \
         F.sum("ppn_x_volume").alias("sum_ppn_x_volume"), \
         F.sum("COGS_TOTAL").alias("COGS_TOTAL"), \
         F.sum("FREIGHT_Full").alias("FREIGHT_Full"), \
         F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BW"), \
         F.sum("NET_SALES").alias("NET_SALES")) \
    .withColumn("practiced_price_weighted_average", col("sum_practiced_price") / col("sum_pp_x_volume")) \
    .withColumn("preco_prat_norm_weighted_average", col("sum_preco_prat_norm") / col("sum_ppn_x_volume")) \
    .drop('pp_x_volume', 'ppn_x_volume', 'sum_practiced_price','sum_pp_x_volume','sum_preco_prat_norm','sum_ppn_x_volume')

## SALES FORCE - SKU ESTADO OV

In [None]:

# Group by MES SKU ESTADO OV

view_sf = LB_View \
    .groupBy('month','gpm','gpd_cod','gpd_desc', 'material_desc', 'material_cod','sales_org_cod','issuing_state') \
    .agg(F.sum("rlv").alias("rlv"), \
         F.sum("COGS_TOTAL").alias("COGS_TOTAL"), \
         F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BW"))    



In [None]:
# cria chave


view_sf = view_sf \
    .withColumn('KEY_SF', concat(col('material_cod'), col('material_cod'), lit('_'), col('sales_org_cod'), lit('_'),col('issuing_state')))

In [None]:
# Filtra meses

thiyear = [1, 2, 3, 4, 5, 7, 8, 9]

view_sf = view_sf[view_sf.month.isin(thiyear)]

lastmonth = [7, 8, 9]

df_sf = view_sf[view_sf.month.isin(lastmonth)]

In [None]:
# Cria as médias 3 

grouped = df_sf \
    .groupBy('KEY_SF' ) \
    .agg(F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BWm"), \
        F.sum("COGS_TOTAL").alias("COGS_TOTALm"), \
        F.sum("RLV").alias("RLVm")) \
    .withColumn("avg_gross_profit_last_3", col("GROSS_PROFIT_BWm") / 3) \
    .withColumn("avg_cogs_last_3", col("COGS_TOTALm") / 3) \
    .withColumn("avg_rlv_last_3", col("RLVm") / 9) \
    .drop('GROSS_PROFIT_BWm', 'COGS_TOTALm', 'RLVm')

In [None]:
# Cria as médias ano
grouped12 = view_sf \
    .groupBy('KEY_SF' ) \
    .agg(F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BWy"), \
    F.sum("COGS_TOTAL").alias("COGS_TOTALy"),
    F.sum("RLV").alias("RLVy")) \
    .withColumn("avg_gross_profit_year", col("GROSS_PROFIT_BWy") / 9) \
    .withColumn("avg_cogs_last_year", col("COGS_TOTALy") / 9) \
    .withColumn("avg_rlv_last_year", col("RLVy") / 9) \
    .drop('GROSS_PROFIT_BWy', 'COGS_TOTALy', 'RLVy')

In [None]:
# join 3 meses
view_sf = view_sf.join(grouped, on=['KEY_SF'], how='left') 

# join 12 meses

view_sf = view_sf.join(grouped12, on=['KEY_SF'], how='left')

In [None]:
# Retira Mes

sf_sf = view_sf \
    .groupBy('KEY_SF', 'gpm','gpd_cod','gpd_desc', 'material_desc', 'material_cod','sales_org_cod','issuing_state') \
    .agg(F.sum("rlv").alias("rlv"), \
         F.sum("COGS_TOTAL").alias("COGS_TOTAL"), \
         F.sum("GROSS_PROFIT_BW").alias("GROSS_PROFIT_BW"), \
         F.avg("avg_gross_profit_last_3").alias("avg_gross_profit_last_3"), \
         F.avg("avg_cogs_last_3").alias("avg_cogs_last_3"), \
         F.avg("avg_rlv_last_3").alias("avg_rlv_last_3"), \
         F.avg("avg_gross_profit_year").alias("avg_gross_profit_year"), \
         F.avg("avg_cogs_last_year").alias("avg_cogs_last_year"), \
         F.avg("avg_rlv_last_year").alias("avg_rlv_last_year"))

In [None]:
for df_name in [df_lb_full,LB_View,sales_order_date,sku,gpd,sales_order,sku_ov_filial_cliente,sf_sf]:
    for col_name in df_name.columns:
        df_name = df_name.withColumnRenamed(col_name, col_name.upper())
    df_name.columns    

## Exports

# TESTE
* Exports Inativados para teste de Output  full rateio LB com cálculo de rateio do custo de expedição

In [None]:
df_lb_full.write.parquet("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_STRATEGIC, mode = "overwrite")

In [None]:
# Export Ordem + Item

LB_View.write.parquet("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_ORDER_ITEM, mode = "overwrite")

In [None]:
# Export Sales Order Date

sales_order_date.write.parquet("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_SALES_DATE, mode = "overwrite")

In [None]:
# Export SKU

sku.write.parquet("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_SKU, mode = "overwrite")

In [None]:
# Export GPD

gpd.write.parquet("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_GPD, mode = "overwrite")

In [None]:
# Export OV

sales_order.write.parquet("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_OV, mode = "overwrite")

In [None]:
# Export sku_ov_filial_cliente

sku_ov_filial_cliente.write.parquet("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_SOFC, mode = "overwrite")

In [None]:
# Export Sales Force

sf_sf.repartition(1).write.option("header", "true").option("sep", ";")\
     .option("encoding", "Unicode").csv("s3a://"+SPG_OUTPUT_BUCKET+"/"+SPG_OUTPUT_BUCKET_GP_SF, mode = "overwrite")