# Classe FlowOrder

Classe que implementa o fluxo com todas as informações de Orders herdando da classe FlowBase.

In [2]:
from pyspark.sql.functions import col, concat, regexp_replace, max, struct, lit
from pyspark.sql.types import IntegerType, StringType
from datetime import datetime
import hashlib

class FlowOrder(FlowBase):
    def run(self):
        usedDatasets =['Order', 'OrderStatuses', 'Consumer', 'Restaurant']
        
        orderDF = FlowOrder.loadUsedDatasets('Order')
        
        orderStatusesDF = FlowOrder.loadUsedDatasets('OrderStatuses')
        consumerDF = FlowOrder.loadUsedDatasets('Consumer')
        restaurantDF = FlowOrder.loadUsedDatasets('Restaurant')
        consumerDF = consumerDF.withColumnRenamed('created_at', 'consumer_created_at')
        
        df_final = orderDF.join(consumerDF, orderDF.customer_id == consumerDF.customer_id, how='left').drop(consumerDF.customer_id).drop(consumerDF.customer_name)
        
        df_final = df_final.join(restaurantDF, df_final.merchant_id == restaurantDF.id, how='left').drop(restaurantDF.id)
        
        orderStatusesDF = orderStatusesDF.withColumn('converted_date', regexp_replace(col('created_at'), '\D', ''))
        orderStatusesDF = orderStatusesDF.withColumn('converted_date', orderStatusesDF.converted_date.cast('long'))
        
        df = orderStatusesDF
        
        df = df.groupBy('order_id').agg(max('converted_date').alias('converted_date'))
        
        df = df.join(orderStatusesDF, (df.order_id == orderStatusesDF.order_id) & (df.converted_date == orderStatusesDF.converted_date), how='left').drop(orderStatusesDF.order_id).drop(df.converted_date).drop(orderStatusesDF.converted_date)
        
        df = df.withColumnRenamed('created_at', 'status_created_at')
        
        df = df.dropDuplicates((['order_id']))
        
        df_final = df_final.join(df, df_final.order_id == df.order_id, how='left').drop(df.order_id)
        
        if self.checkDuplicates(df_final):
            dbutils.notebook.exit('ERROR: Existem linhas duplicadas')
            
        if self.checkColumnsNull(df_final, ['cpf', 'order_id']):
            dbutils.notebook.exit('ERROR: Existem chaves com valores nulos')
        
        df_final.cache()   
        spark_udf = udf(FlowBase.encrypt_value, StringType())
        
        df_final = df_final.withColumn('cpf', spark_udf(col('cpf')))
        df_final = df_final.withColumn('customer_name', spark_udf(col('customer_name')))
        
        return df_final