# Classe FlowOrderStatuses

Classe que implementa o fluxo de Status por Order herdando da classe FlowBase.

In [2]:
from pyspark.sql.functions import col, concat, regexp_replace, max, expr
from pyspark.sql.types import IntegerType
from datetime import datetime

class FlowOrderStatuses(FlowBase):
    def run(self):
        usedDatasets =['Order', 'OrderStatuses']
        
        orderDF = FlowBase.loadUsedDatasets('Order')
        orderStatusesDF = FlowBase.loadUsedDatasets('OrderStatuses')
        
        orderStatusesDF = orderStatusesDF.withColumn('CONCLUDED', expr('''
            CASE WHEN value == 'CONCLUDED' THEN created_at
            ELSE NULL
            END AS CONCLUDED
        '''))
        
        orderStatusesDF = orderStatusesDF.withColumn('REGISTERED', expr('''
            CASE WHEN value == 'REGISTERED' THEN created_at
            ELSE NULL
            END AS REGISTERED
        '''))
        
        orderStatusesDF = orderStatusesDF.withColumn('CANCELLED', expr('''
            CASE WHEN value == 'CANCELLED' THEN created_at
            ELSE NULL
            END AS CANCELLED
        '''))
        
        orderStatusesDF = orderStatusesDF.withColumn('PLACED', expr('''
            CASE WHEN value == 'PLACED' THEN created_at
            ELSE NULL
            END AS PLACED
        '''))
        
        orderStatusesDF.registerTempTable("df_table")
        orderStatusesDF = spark.sql("SELECT order_id, MAX(CONCLUDED) as CONCLUDED, MAX(REGISTERED) as REGISTERED, MAX(CANCELLED) as CANCELLED, MAX(PLACED) as PLACED FROM df_table GROUP BY order_id")
        
        df_final = orderDF.join(orderStatusesDF, (orderDF.order_id == orderStatusesDF.order_id)).drop(orderStatusesDF.order_id)
        
        if self.checkDuplicates(df_final):
            dbutils.notebook.exit('ERROR: Existem linhas duplicadas')
            
        if self.checkColumnsNull(df_final, ['cpf', 'order_id']):
            dbutils.notebook.exit('ERROR: Existem chaves com valores nulos')
        
        spark_udf = udf(FlowBase.encrypt_value, StringType())
        df_final = df_final.withColumn('cpf', spark_udf(col('cpf')))
        
        return df_final