In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
import os
import pandas as pd
from abc import ABC, abstractmethod

from lib.template.land_to_raw import *
from lib.template.raw_to_audit import *
from lib.template.audit_to_historic import *

In [0]:
rundate = '20130208_000000'
domain = 'domain'
environment = 'dev'
catalog_name = domain + '_' + environment
base_directory = f'/Workspace/Users/armando.n90@gmail.com/users_case/lakehouse/landing/visitas/{rundate}/'

for filename in os.listdir(base_directory):
    path = os.path.join(base_directory, filename)
    
    if os.path.isfile(path):

        elements = filename.split('/')
        batch_id = elements[-1].split('.')[0].split('_')[1]


        ## LAND TO RAW
        schema_name = 'bronze_analytics'
        table_name = 'raw_visitas'
        
        print('Processing LAND to RAW: ' + table_name + ' with batch ' + batch_id)    
        factory_land_to_raw = FactoryLandToRawFromCsv(path)
        factory_land_to_raw.create()

        #CHECK IF THE BATCH HAS ALREADY BEEN PROCESSED
        check = factory_land_to_raw.asset_interactor.has_batch_been_processed(catalog_name, schema_name, table_name, batch_id)  

        if check == True:
            print(f'The batch {batch_id} for {catalog_name}.{schema_name}.{table_name} has already been processed')
            continue

        table_processor = LandToRawTemplate(catalog_name=catalog_name, schema_name=schema_name, table_name=table_name, 
                                            rundate=rundate, batch_id=batch_id)  
        table_processor.set_component_factory(factory_land_to_raw) 
        table_processor.process()


        ## RAW TO AUDIT
        table_name = 'audit_visitas'
        schema_name = 'silver_analytics'

        print('Processing RAW to AUDIT: ' + table_name + ' with batch ' + batch_id)
        factory_raw_to_audit = FactoryRawToAuditForVisit()
        factory_raw_to_audit.create()

        table_processor = RawToAuditTemplate(catalog_name=catalog_name, schema_name=schema_name, table_name=table_name, 
                                             rundate=rundate, batch_id=batch_id)  
        table_processor.set_component_factory(factory_raw_to_audit) 
        table_processor.process()
        

        ## AUDIT TO HISTORIC
        schema_name = 'silver_analytics'
        table_name = 'historic_visitas'

        print('Processing AUDIT to HISTORIC ' + table_name)
        factory_audit_to_historic = FactoryAuditToHistoric()
        factory_audit_to_historic.create()

        table_processor = AuditToHistoricTemplate(catalog_name=catalog_name, schema_name=schema_name, table_name=table_name)  
        table_processor.set_component_factory(factory_audit_to_historic) 
        table_processor.process()

In [0]:
test = spark.read.table('governance_prod.metrics.ingestions')
print(test.count())
test.show(20, truncate=False)

In [0]:
test = spark.read.table('governance_prod.metrics.event_errors')
print(test.count())
test.show(20)

In [0]:
test = spark.read.table('domain_dev.silver_analytics.historic_visitas')
print(test.count())
test.show(20, truncate=False)

In [0]:
spark.sql('DROP TABLE IF EXISTS domain_dev.silver_analytics.historic_visitas')
spark.sql('DROP TABLE IF EXISTS domain_dev.silver_analytics.audit_visitas')
spark.sql('DROP TABLE IF EXISTS domain_dev.bronze_analytics.raw_visitas')
spark.sql('DELETE FROM governance_prod.metrics.ingestions')