In [0]:
from delta.tables import DeltaTable
from typing import Dict, Tuple

table_name = "sesion_11.gold.ventas"

def get_metrics(
    table_name: str
) -> Tuple[Dict[str, int], str]:	
    delta_table = DeltaTable.forName(spark, table_name)
                                    
    history_df = delta_table.history()

    for row in history_df.collect():
        #print(row.operation)
        if row.operation in ("MERGE","WRITE"):
            raw_metrics = row.asDict().get("operationMetrics",{})
            
            numeric_metrics = {}
            for k,v in raw_metrics.items():
            #print(f"{k} : {v}")
                numeric_metrics[k] = int(v)
                
            return numeric_metrics, table_name
    
get_metrics(table_name)


In [0]:
ob_id BIGINT NOT NULL COMMENT 'Job id del workflow',
  job_run_id BIGINT NOT NULL COMMENT 'Job run id del workflow',
  task_run_id BIGINT NOT NULL COMMENT 'Task run id del workflow',
  job_start_time TIMESTAMP NOT NULL COMMENT 'Fecha inicio del workflow',
  job_end_time TIMESTAMP NOT NULL COMMENT 'Fecha fin del workflow',
  job_duration_seconds BIGINT NOT NULL COMMENT 'DuraciÃ³n del workflow',
  job_status STRING NOT NULL COMMENT 'Estado del workflow',

In [0]:
def insert_metrics(
    metrics_tuple: Tuple[Dict[str, int], str]
) -> None:
    metrics, table_name = metrics_tuple

    job_id = 0
    job_run = 0
    task_run = 0
    job_start_time = ''
    
    catalog, schema, table = table_name.split('.')

    df_metrics = (
        spark.createDataFrame([metrics])
        .withColumn('job_id', lit(job_id))
        .withColumn('job_run', lit(job_run))
        .withColumn('task_run', lit(task_run))
        .withColumn('job_start_time', lit(job_start_time).cast('timestamp')
        .withColumn('job_end_time', current_timestamp())
        .withColumn('job_duration_seconds', col('job_end_time').cast('long') - col('job_start_time').cast('long'))
        .withColumn('file_byte', col("numTargetBytesAdded"))
        .withColumn('job_status', 
                    when(col("numTargetFilesAdded") > 0, lit("success"))
                    .otherwise(lit("failed")))
        .withColumn('table', lit(table))
        .withColumn('layer', lit(schema))
        .withColumn('rows_in', col("numTargetRowsInserted"))
        .withColumn('rows_inserted', col("numTargetRowsInserted"))
        .withColumn('rows_updated', col("numTargetRowsUpdated"))
        .withColumn('rows_deleted', col("numTargetRowsDeleted"))
        .withColumn('merge_duration_seconds', col("executionTimeMs")/lit(1000))
        
    )
    