In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
import pandas as pd
from abc import ABC, abstractmethod

import json

import pyspark
from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *
from delta.tables import DeltaTable

from lib.gateway.file import *
from lib.gateway.database import *
from lib.interactor.governance import *
from lib.interactor.asset import *

In [0]:
## FACTORIES FOR COMPONENTS REQUIRED BY THE TEMPLATE

#ABTRACT FACTORY WITH METHODS TO BE IMPLEMENTED
class AbstractFactoryRawToAudit(ABC):

    @abstractmethod
    def create(self):
        pass

#CONCRETE FACTORY FOR TEMPLATE BASED ON CSV FILE WITH COMMA SEPARATOR
class FactoryRawToAudit(AbstractFactoryRawToAudit):

    def __init__(self):
        pass
        
    def create(self):

        database_gateway = SparkSQLDatabaseGateway()
        #self.table_reader = CsvFileReader(file_path=self.path)
  
        self.governance_interactor = GovernanceInteractor(database_gateway=database_gateway)
        self.asset_interactor = AssetInteractor(database_gateway=database_gateway)
     

class Demo(ABC):

    def get_is_not_null_failures(self, catalog_name: str, schema_name: str, table_name: str, column_name: str):
        pass

    def get_is_unique_failures(self, catalog_name: str, schema_name: str, table_name: str, column_names: list):
        pass

    def get_is_in_bounds_failures(self, catalog_name: str, schema_name: str, table_name: str, column_name: str, 
                                  min_value: object, max_value: object, data_type: str):
        pass

    def get_is_in_list_failures(self, catalog_name: str, schema_name: str, table_name: str, column_name: str, list_values: list):
        pass

    def get_is_date_format_failures(self, catalog_name: str, schema_name: str, table_name: str, column_name: str, date_format: str):
        pass

    def get_is_not_lower_than_failures(self, catalog_name: str, schema_name: str, table_name: str, column_name: str, reference_column_name: str):
        pass


In [0]:
## TEMPLATE FOR EXTRACTING A TABLE AND METADATA, PROCESSING THE TABLE AND EXPORTING THE TABLE WITH METADATA AND METRICS

#INTERFACE FOR THE TEMPLETE
class InterfaceRawToAuditTemplate(ABC):

    #@abstractmethod
    #def set_component_factory(self, component_factory: AbstractFactoryLandToRaw):
    #    pass

    @abstractmethod
    def process(self):
        pass

#IMPLEMENTATION FOR THE TEMPLATE
class RawToAuditTemplate(InterfaceRawToAuditTemplate):

    def __init__(self, catalog_name: str, schema_name: str, table_name: str, batch_id: str):

        self.catalog_name = catalog_name
        self.table_name = table_name
        self.schema_name = schema_name
        self.batch_id = batch_id

        #self.table_reader = None
        self.governance_interactor = None
        self.asset_interactor = None

        self.dataframe = None
        self.metadata = None
        
        self.metrics = {}
        self.metrics['batch_id'] = batch_id
        self.metrics['loaded_at'] = pd.Timestamp.utcnow().strftime('%Y-%m-%d %H:%M:%S')
        self.metrics['initial_rows'] = None
        self.metrics['final_rows'] = None
        self.metrics['accumulated_rows'] = None

        self.dataframe_ingestion_metrics = None

    def set_component_factory(self, component_factory: AbstractFactoryRawToAudit):
    #    self.table_reader = component_factory.table_reader
        self.governance_interactor = component_factory.governance_interactor
        self.asset_interactor = component_factory.asset_interactor
  
    def process(self):

        print('Starting land to raw process ...')

        #EXTRACTION STEPS
        self.extract_metadata()
        self.extract_table()

        #PROCESSING STEPS
        self.rename_columns()
        #self.validate_required_columns()
        #self.cast_data_types()
        #self.validate_non_nullable_columns()
        #self.add_metadata()

        #EXPORTING STEPS
        #self.write_table()
        #self.get_accumulated_rows()
        #self.generate_ingestion_metrics()
        #self.write_metrics()

        print('Table, metadata and ingestion metrics exported successfully')

    #EXTRACT METADATA
    def extract_metadata(self):
     
        print('Extracting metadata ...')
        self.metadata = self.governance_interactor.get_table_metadata(catalog_name=self.catalog_name, 
                                                                      schema_name=self.schema_name, 
                                                                      table_name=self.table_name)

    #EXTRACT TABLE
    def extract_table(self):

        #EXTRACT LANDING FILE
        print('Extracting landing file ...')

        raw_schema_name = self.schema_name.replace('silver', 'bronze')
        raw_table_name = self.table_name.replace('audit', 'raw')
        params = {'read_mode': 'read_partition', 'batch_id': self.batch_id}

        self.dataframe = self.asset_interactor.read_table(catalog_name=self.catalog_name, 
                                                          schema_name=raw_schema_name, 
                                                          table_name=raw_table_name, 
                                                          params=params)
        
        self.dataframe = self.dataframe.drop('metadata_batch_id', 'metadata_loaded_at', 'metadata_etl_module')
        self.dataframe = self.dataframe.withColumn('created_at', lit(None))
        self.dataframe = self.dataframe.withColumn('updated_at', lit(None))
        self.dataframe = self.dataframe.withColumn('deleted_at', lit(None))
        self.dataframe = self.dataframe.withColumn('audit_id', lit(None))
        self.dataframe = self.dataframe.withColumn('audit_passed', lit(True))
        self.dataframe = self.dataframe.withColumn('row_temp_id', monotonically_increasing_id())
      
        self.metrics['initial_rows'] = self.dataframe.count()

    #RENAME COLUMNS
    def rename_columns(self):

        print('Renaming columns ...')
        for column in self.metadata['field_data'].keys():
            
            rename_from = table_processor.metadata['field_data'][column]['rename_from']
            self.dataframe = self.dataframe.withColumnRenamed(rename_from, column)

    #VALIDATE REQUIRED COLUMNS
    def validate_required_columns(self):

        print('Validating required columns ...')
        required_columns = self.metadata['field_data'].keys()
        dataframe_columns = self.dataframe.columns

        for column in required_columns:
            if column not in dataframe_columns:
                raise ValueError(f"Missing column in dataframe: {column}")
            
        for column in dataframe_columns:
            if column not in required_columns:
                raise ValueError(f"Missing column in metadata: {column}")

    #CAST DATA TYPES
    def cast_data_types(self):
        
        print('Casting data types ...')
        for column in self.metadata['field_data'].keys():

            plain_data_type = self.metadata['field_data'][column]['data_type']
            data_type = None

            if plain_data_type == 'string':
                data_type = StringType()
            elif plain_data_type == 'json':
                data_type = StringType()
            elif plain_data_type == 'timestamp':
                data_type = TimestampType()
            elif plain_data_type == 'integer':
                data_type = IntegerType()
            elif plain_data_type == 'boolean':
                data_type = BooleanType()
            else:
                raise ValueError(f"Invalid data type: {plain_data_type}")

            self.dataframe = self.dataframe.withColumn(column, self.dataframe[column].cast(data_type))

    #VALIDATE NON NULLABLE COLUMNS
    def validate_non_nullable_columns(self):

        print('Validating non nullable columns ...')
        for column in self.metadata['field_data'].keys():

            #NON NULLABLE
            if self.metadata['field_data'][column]['is_nullable'] == False:
                count_null = self.dataframe.filter(col(column).isNull()).count()

                if count_null > 0:
                    raise ValueError(f"Null value found in column: {column}") 

    #ADDING METADATA
    def add_metadata(self):

        #ADD COMMENTS
        print('Adding metadata ...')
        for column in self.metadata['field_data'].keys():
            self.dataframe = self.dataframe.withMetadata(column, {'comment': self.metadata['field_data'][column]['comment']})

        #ADD METADATA COLUMNS
        self.dataframe = self.dataframe.withColumn('metadata_batch_id', lit(self.metrics['batch_id']))
        self.dataframe = self.dataframe.withColumn('metadata_loaded_at', lit(self.metrics['loaded_at']))
        self.dataframe = self.dataframe.withColumn('metadata_etl_module', lit(self.metadata['etl_module']))

        self.dataframe = self.dataframe.withColumn('metadata_loaded_at', to_timestamp(col('metadata_loaded_at'), "yyyy-MM-dd HH:mm:ss"))

        self.dataframe = self.dataframe.withMetadata('metadata_batch_id', {'comment': 'Identifier of the batch process'})
        self.dataframe = self.dataframe.withMetadata('metadata_loaded_at', {'comment': 'Timestamp when the record was loaded into the table'})
        self.dataframe = self.dataframe.withMetadata('metadata_etl_module', {'comment': 'ETL module used to process this record'}) 

        self.metrics['final_rows'] = self.dataframe.count()

    #WRITE TABLE
    def write_table(self):

        print('Writing table ...')
        params = {'write_mode': self.metadata['write_mode'], 'batch_id': self.metrics['batch_id']}
        
        self.asset_interactor.write_table(dataframe=self.dataframe, 
                                          catalog_name=self.metadata['catalog_name'], 
                                          schema_name=self.metadata['schema_name'],
                                          table_name=self.metadata['table_name'],
                                          params = params)

    #GET ACCUMULATED ROWS
    def get_accumulated_rows(self):

        print('Computing accumulated rows for metrics ...')
        self.metrics['accumulated_rows'] = self.asset_interactor.get_total_rows(catalog_name=self.metadata['catalog_name'], 
                                                                                schema_name=self.metadata['schema_name'], 
                                                                                table_name=self.metadata['table_name'])

    #GENERATE INGESTION METRICS
    def generate_ingestion_metrics(self):

        print('Generating ingestion metrics ...')
        columns = ['table_id', 'catalog_name', 'schema_name', 'table_name', 'batch_id', 'loaded_at', 
                   'etl_module', 'write_mode', 'initial_rows', 'final_rows', 'accumulated_rows', 'quality']

        ingestion_metrics = [(self.metadata['table_id'],
                              self.metadata['catalog_name'], 
                              self.metadata['schema_name'], 
                              self.metadata['table_name'], 
                              self.metrics['batch_id'], 
                              self.metrics['loaded_at'], 
                              self.metadata['etl_module'], 
                              self.metadata['write_mode'], 
                              self.metrics['initial_rows'], 
                              self.metrics['final_rows'], 
                              self.metrics['accumulated_rows'],
                              self.metadata['quality'])]

        self.dataframe_ingestion_metrics = spark.createDataFrame(ingestion_metrics, columns)

    #WRITE INGESTION METRICS
    def write_metrics(self):

        print('Writing ingestion metrics ...')
        self.asset_interactor.merge_dataframe(self.dataframe_ingestion_metrics, catalog_name='governance_prod', 
                                              schema_name='metrics', table_name='ingestions', 
                                              match_columns=['catalog_name', 'schema_name', 'table_name', 'batch_id'])


In [0]:
domain = 'domain'
environment = 'dev'

schema_name = 'silver_analytics'
catalog_name = domain + '_' + environment

table_name = 'audit_visitas'
batch_id = '7'

print('Processing ' + table_name + ' with batch ' + batch_id)
factory_raw_to_audit = FactoryRawToAudit()
factory_raw_to_audit.create()

table_processor = RawToAuditTemplate(catalog_name=catalog_name, schema_name=schema_name, table_name=table_name, batch_id=batch_id)  
table_processor.set_component_factory(factory_raw_to_audit) 
table_processor.process()

print('')
print(table_processor.metadata)
print(table_processor.metrics)
print('')
table_processor.dataframe.show(3)
print('')

In [0]:
parsed_validations = {}
parsed_validations['is_not_null'] = []
parsed_validations['unique'] = {}
parsed_validations['is_in_list'] = {}
parsed_validations['is_in_foreign_column'] = {}
parsed_validations['is_date_format'] = {}
parsed_validations['is_email_format'] = []
parsed_validations['is_in_bounds'] = {}
parsed_validations['is_not_lower_than'] = {}

for column in table_processor.metadata['field_data'].keys():
    
    validations = table_processor.metadata['field_data'][column]['validations']
    data_type = table_processor.metadata['field_data'][column]['data_type']

    print(column, data_type)
    if pd.isnull(validations):
        continue

    validations = json.loads(validations)

    for i in range(0, len(validations)):
        print(validations[i])

        if validations[i]['validation'] == 'is_not_null':
            parsed_validations['is_not_null'].append(column)

        elif validations[i]['validation'] == 'unique':
            unique_group = validations[i]['unique_group']

            if unique_group in parsed_validations['unique'].keys():
                parsed_validations['unique'][unique_group].append(column)
            else:
                parsed_validations['unique'][unique_group] = [column]

        elif validations[i]['validation'] == 'is_in_list':

            if 'allowed' in validations[i].keys():
                parsed_validations['is_in_list'][column] = validations[i]['allowed'].split(',')

            elif 'reference' in validations[i].keys():
                parsed_validations['is_in_foreign_column'][column] = {'schema': validations[i]['schema'], 
                                                                      'table': validations[i]['table'], 
                                                                      'reference': validations[i]['reference']}
                
        elif validations[i]['validation'] == 'is_date_format':
            parsed_validations['is_date_format'][column] = validations[i]['format']

        elif validations[i]['validation'] == 'is_email_format':
            parsed_validations['is_email_format'].append(column)

        elif validations[i]['validation'] == 'is_in_bounds':
            parsed_validations['is_in_bounds'][column] = {'min_allowed': validations[i]['min_allowed'], 
                                                          'max_allowed': validations[i]['max_allowed'],
                                                          'data_type': data_type}
            
        elif validations[i]['validation'] == 'is_not_lower_than':
            parsed_validations['is_not_lower_than'][column] = {'reference': validations[i]['reference'],
                                                               'data_type': data_type}

In [0]:
for key in parsed_validations.keys():
    print(key)
    print(parsed_validations[key])
    print()

In [0]:
import json

def conver_to_json_string(text):

    if pd.isnull(text):
        return None

    elements = text.split(',')
    for element in elements:
        if pd.isnull(element) or element == '':
            elements.remove(element)
    return json.dumps(elements)

conver_to_json_string_udf = udf(conver_to_json_string, StringType())

In [0]:
df = table_processor.dataframe

columns = ['FechaOpen', 'FechaClick', 'Links', 'IPs', 'Navegadores', 'Plataformas']
for column in columns:
    df = df.withColumn(column, regexp_replace(col(column), 'unknown', ''))
    df = df.withColumn(column, when(col(column) == '-', lit(None)).otherwise(col(column)))
    df = df.withColumn(column, when(col(column) == '', lit(None)).otherwise(col(column)))

columns = ['Links', 'IPs', 'Navegadores', 'Plataformas']
for column in columns:
    df = df.withColumn(column, conver_to_json_string_udf(col(column)))

#FAKE ERRORS
df = df.withColumn('Baja', when((col('email') == 'jhosebh_19@yahoo.com') &
                                (col('baja') == 'SI') &
                                (col('FechaEnvio') == '08/02/2013 18:30'), lit('YES')).otherwise(col('Baja')))

df = df.withColumn('Clicks', when((col('email') == 'jhosebh_19@yahoo.com') &
                                (col('FechaEnvio') == '08/02/2013 18:30'), lit(1000)).otherwise(col('Clicks')))

df = df.withColumn('FechaEnvio', when((col('email') == 'irmis_20@yahoo.com') &
                                (col('FechaEnvio') == '08/02/2013 18:30'), lit('2013-02-08 18:30')).otherwise(col('FechaEnvio')))

schema = StructType([
    StructField("row_temp_id", IntegerType(), False),
    StructField("value", StringType(), True),
    StructField("screen_code", StringType(), False)
])
dataframe_errors = spark.createDataFrame([], schema)

class AbstractScreenValidator(ABC):

    def __init__(self, dataframe, column, dataframe_errors):
        self.dataframe = dataframe
        self.column = column
        self.dataframe_errors = dataframe_errors
        self.validation_code = self.get_validation_code()

    @abstractmethod
    def filter(self):
        pass

    @abstractmethod
    def get_validation_code(self) -> str:
        pass

    def validate(self):

        print('Validating column: ' + self.column + ' with screen ' + self.validation_code)
        
        fails = self.filter()
        fails = fails.select('row_temp_id', self.column)
        fails = fails.withColumnRenamed(self.column, 'value')
        fails = fails.withColumn('screen_code', lit(self.validation_code))
        fails = fails.withColumn('value', col('value').cast(StringType()))

        self.dataframe_errors = self.dataframe_errors.union(fails)

        fails_list = fails.select('row_temp_id').collect()
        fails_list = [getattr(row, 'row_temp_id') for row in fails_list]

        self.dataframe = self.dataframe.withColumn('audit_passed', when(col('row_temp_id').isin(fails_list), False).otherwise(col('audit_passed')))
        self.dataframe = self.dataframe.withColumn(column, when(col('row_temp_id').isin(fails_list), lit(None)).otherwise(col(column)))

        return self.dataframe, self.dataframe_errors

class IsNotNullScreenValidator(AbstractScreenValidator):

    def filter(self):
        return self.dataframe.where(col(self.column).isNull()).select('row_temp_id', self.column)

    def get_validation_code(self):
        return 'is_not_null'
    
class IsInListScreenValidator(AbstractScreenValidator):

    def set_allowed_values(self, allowed_values):
        self.allowed_values = allowed_values

    def filter(self):
        return self.dataframe.where(~col(self.column).isin(self.allowed_values))

    def get_validation_code(self):
        return 'is_in_list'
    
class IsEmailFormatScreenValidator(AbstractScreenValidator):

    def filter(self):
        return self.dataframe.where(~regexp_extract(col(column), r'^.+@.+\..+$', 0).cast('string').isNotNull())

    def get_validation_code(self):
        return 'is_email_format'
    
class IsInBoundsScreenValidator(AbstractScreenValidator):

    def set_bounds(self, min_allowed, max_allowed):
        self.min_allowed = min_allowed
        self.max_allowed = max_allowed

    def filter(self):
        return self.dataframe.where((col(self.column) < self.min_allowed) | (col(self.column) > self.max_allowed))

    def get_validation_code(self):
        return 'is_in_bounds'

class IsDateFormatScreenValidator(AbstractScreenValidator):

    def set_format(self, format):
        self.format = format

    def filter(self):

        fails = df.withColumn('temp', when((try_to_date(col(column), self.format).isNotNull()) |
                                           (col(column).isNull()), True).otherwise(False))
        return fails.where((col('temp') == False))
    
    def get_validation_code(self):
        return 'is_date_format'

#IS NOT NULL SCREEN
for column in parsed_validations['is_not_null']:  

    screen_validator = IsNotNullScreenValidator(df, column, dataframe_errors)
    df, dataframe_errors = screen_validator.validate()

#IS IN LIST SCREEN
for column in parsed_validations['is_in_list']:

    screen_validator = IsInListScreenValidator(df, column, dataframe_errors)
    screen_validator.set_allowed_values(parsed_validations['is_in_list'][column])
    df, dataframe_errors = screen_validator.validate()

#IS EMAIL FORMAT
for column in parsed_validations['is_email_format']:
    
    screen_validator = IsEmailFormatScreenValidator(df, column, dataframe_errors)
    df, dataframe_errors = screen_validator.validate()

#IS IN BOUNDS SCREEN
for column in parsed_validations['is_in_bounds']:

    screen_validator = IsInBoundsScreenValidator(df, column, dataframe_errors)
    screen_validator.set_bounds(parsed_validations['is_in_bounds'][column]['min_allowed'], 
                                parsed_validations['is_in_bounds'][column]['max_allowed'])
    df, dataframe_errors = screen_validator.validate()

#IS DATE FORMAT SCREEN
for column in parsed_validations['is_date_format']:

    screen_validator = IsDateFormatScreenValidator(df, column, dataframe_errors)
    screen_validator.set_format(parsed_validations['is_date_format'][column])
    df, dataframe_errors = screen_validator.validate()

df.show(5) 

In [0]:
subset = df.where(col('audit_passed') == False)
print(subset.count())
subset.show(3)

print(dataframe_errors.show(5))

In [0]:
def screen_data(self):
        print('Screening data...')

        certificate_numbers = self.context['list_values']['certificate_numbers']
        cie_identifiers = self.context['list_values']['cie_identifiers']
        
        self.facade_screens.setup(data=self.data, table_name='claims', identifier='claim_id')
        
        #CHECK NULL VALUES
        columns = ['state', 'cie_id', 'diagnosis', 'incident_date', 'payments', 'coinsurance',
                   'ivarec', 'deductible', 'incident_reason', 'cve_mes', 'month_cont_date', 'payment_type', 'provider', 'certificate_number']
        for column in columns:
            self.facade_screens.apply_screen_is_missing_value(column)

        #CHECK UNIQUE VALUES
        self.facade_screens.apply_screen_is_not_unique('claim_id')

        #CHECK NOT DIGIT STRING VALUES
        self.facade_screens.apply_screen_is_not_digit_string('certificate_number', 6)

        #CHECK NOT DATE FORMAT VALUES
        self.facade_screens.apply_screen_is_not_date_format('incident_date', '%d/%m/%Y')
        self.facade_screens.apply_screen_is_not_date_format('payment_date', '%d/%m/%Y')
        self.facade_screens.apply_screen_is_not_date_format('first_expense_date', '%d/%m/%Y')
        self.facade_screens.apply_screen_is_not_date_format('month_cont_date', '%d/%m/%Y')

        self.data['incident_date'] = pd.to_datetime(self.data['incident_date'], format='%d/%m/%Y')
        self.data['payment_date'] = pd.to_datetime(self.data['payment_date'], format='%d/%m/%Y')
        self.data['first_expense_date'] = pd.to_datetime(self.data['first_expense_date'], format='%d/%m/%Y')
        self.data['month_cont_date'] = pd.to_datetime(self.data['month_cont_date'], format='%d/%m/%Y')

        #CHECK OUT OF BOUNDS VALUES
        self.facade_screens.apply_screen_is_out_of_bounds_value('incident_date', pd.Timestamp('2020-01-01'), pd.Timestamp('2030-12-31'))
        self.facade_screens.apply_screen_is_out_of_bounds_value('payment_date', pd.Timestamp('2020-01-01'), pd.Timestamp('2030-12-31'))
        self.facade_screens.apply_screen_is_out_of_bounds_value('first_expense_date', pd.Timestamp('2020-01-01'), pd.Timestamp('2030-12-31'))
        self.facade_screens.apply_screen_is_out_of_bounds_value('month_cont_date', pd.Timestamp('2020-01-01'), pd.Timestamp('2030-12-31'))

        self.facade_screens.apply_screen_is_out_of_bounds_value('ocurrido', -1000000, 10000000)
        self.facade_screens.apply_screen_is_out_of_bounds_value('payments', 0, 10000000)
        self.facade_screens.apply_screen_is_out_of_bounds_value('coinsurance', 0, 1000000)
        self.facade_screens.apply_screen_is_out_of_bounds_value('ivarec', 0, 1000000)
        self.facade_screens.apply_screen_is_out_of_bounds_value('deductible', 0, 1000000)

        #CHECK CRONOLOGICAL ORDER
        self.facade_screens.apply_screen_is_lower_than('month_cont_date', 'payment_date')
        self.facade_screens.apply_screen_is_lower_than('payment_date', 'first_expense_date')
        self.facade_screens.apply_screen_is_lower_than('first_expense_date', 'incident_date')

        self.data['incident_date_id'] = self.data['incident_date'].dt.strftime('%Y%m%d')
        self.data['payment_date_id'] = self.data['payment_date'].dt.strftime('%Y%m%d')
        self.data['first_expense_date_id'] = self.data['first_expense_date'].dt.strftime('%Y%m%d')
        self.data['month_cont_date_id'] = self.data['month_cont_date'].dt.strftime('%Y%m%d')

        self.data['incident_date_id'] = self.data['incident_date_id'].fillna('-2').astype(int)
        self.data['payment_date_id'] = self.data['payment_date_id'].fillna('-2').astype(int)
        self.data['first_expense_date_id'] = self.data['first_expense_date_id'].fillna('-2').astype(int)
        self.data['month_cont_date_id'] = self.data['month_cont_date_id'].fillna('-2').astype(int)

        #CHECK OUT OF LIST VALUES
        self.facade_screens.apply_screen_is_out_of_list_value('certificate_number', certificate_numbers)
        self.facade_screens.apply_screen_is_out_of_list_value('incident_reason', ['Enfermedad', 'Accidente'])
        self.facade_screens.apply_screen_is_out_of_list_value('payment_type', ['Pago Directo'])
        self.facade_screens.apply_screen_is_out_of_list_value('cie_id', cie_identifiers)

        self.errors = self.facade_screens.get_error_events_detail()
        self.data = self.data.drop(columns=['__screen__'])

        #ADD AUDIT FACT COLUMN
        self.data['audit_passed'] = 'SÃ­'
        audit_dim_assembler = AuditDimensionAssembler(self.errors, 'claims')
        unsolved_rows = audit_dim_assembler.get_unsolved_rows()
        self.data.loc[self.data['claim_id'].isin(unsolved_rows), 'audit_passed'] = 'No'
     
