# Imports and Variables

In [0]:
import json
from pyspark.sql import *
from pyspark.sql.functions import *
from pyspark.sql.functions import explode

# Function Definition

In [0]:
def updateSilverTable(table_name, columns_to_extract, columns_alias, MonthYear, recent_dataframe):

    if recent_dataframe.count() > 0:
        # Select all the columns within the bronze columns
        temp_df = recent_dataframe.select(columns_to_extract.split(","))

        clm_paths = columns_to_extract.split(",")
        clm_alias = columns_alias.split(",")
        temp_df = temp_df.select(*[col(clm_paths[idx]).alias(clm_alias[idx]) for idx in range(len(clm_paths))])

        #display(temp_df)

        # Treat Multiple Merge Columns
        #merge_keys = columns_condition_source.split(",")

        #if len(merge_keys) > 1:
        #    columns_condition_source = "pk_MergeColumn"
        #    temp_df = temp_df.withColumn(columns_condition_source, concat_ws(",", array(*merge_keys)))
        #    temp_df = temp_df.withColumn(columns_condition_source, sha2(col(columns_condition_source), 256))
        

        # Creates Temporary View of Latest Bronze Records for a given table "table_name"
        temp_df.createOrReplaceTempView("vw_LatestBronzeRecords")
        
        spark.sql(f"DELETE FROM silver.{table_name} WHERE LastExtractionMonth={MonthYear}")

        spark.sql(f"INSERT INTO silver.{table_name} SELECT * FROM vw_LatestBronzeRecords")

        # Update Integration Control LastExtractionDate
        spark.sql(f"UPDATE control.integrationcontrolcosts SET LastExtractionDate = (SELECT MAX(DISTINCT LastExtractionDate) FROM vw_LatestBronzeRecords) WHERE control.integrationcontrolcosts.TableName LIKE '{table_name}' and Silver=1")
        
        return 0

# Getting the elements to the extracted from the control table

In [0]:
tables_to_extract = spark.sql("SELECT TableName, ColumnsToExtract, ColumnsAlias, LastExtractionDate FROM control.integrationcontrolcosts WHERE Active = 1 AND Silver = 1 ORDER BY TableName ASC").cache()
display(tables_to_extract)

TableName,ColumnsToExtract,ColumnsAlias,LastExtractionDate
azure_costs_2,"DepartmentName,AccountName,AccountOwnerId,SubscriptionGuid,SubscriptionName,ResourceGroup,ResourceLocation,AvailabilityZone,UsageDateTime,ProductName,MeterCategory,MeterSubcategory,MeterId,MeterName,MeterRegion,UnitOfMeasure,UsageQuantity,ResourceRate,PreTaxCost,CostCenter,ConsumedService,ResourceType,InstanceId,Tags,OfferId,AdditionalInfo,ServiceInfo1,ServiceInfo2,Currency,FilePath,LastExtractionDate,LastExtractionMonth","DepartmentName,AccountName,AccountOwnerId,SubscriptionGuid,SubscriptionName,ResourceGroup,ResourceLocation,AvailabilityZone,UsageDateTime,ProductName,MeterCategory,MeterSubcategory,MeterId,MeterName,MeterRegion,UnitOfMeasure,UsageQuantity,ResourceRate,PreTaxCost,CostCenter,ConsumedService,ResourceType,InstanceId,Tags,OfferId,AdditionalInfo,ServiceInfo1,ServiceInfo2,Currency,FilePath,LastExtractionDate,LastExtractionMonth",2025-02-02T01:56:28Z


# Explode Bronze Tables "Values" into a Silver Table

In [0]:

for table in tables_to_extract.collect():
     month_to_extract = spark.sql(
         f"SELECT DISTINCT LastExtractionMonth FROM bronze.{table['TableName']} WHERE LastExtractionDate > '{table['LastExtractionDate']}'"
     )
     #print(month_to_extract)

     for month in month_to_extract.collect():
         # Extracts the most recent entries in the Bronze table
         recent_entries = spark.sql(
             f"SELECT * FROM bronze.{table['TableName']} WHERE LastExtractionMonth = '{month['LastExtractionMonth']}' AND LastExtractionDate = (SELECT MAX(LastExtractionDate) FROM bronze.{table['TableName']} WHERE LastExtractionMonth = '{month['LastExtractionMonth']}')"
         )

#          Updates Silver Layer Tables with the Latest Bronze Records
         tn = updateSilverTable(
             table['TableName'],
             table['ColumnsToExtract'],
             table['ColumnsAlias'],
             month['LastExtractionMonth'],
             recent_entries
         )

#         display(recent_entries)

In [0]:
%sql
select distinct filepath, lastExtractiondate, lastextractionmonth from silver.azure_costs_2 order by lastExtractiondate desc

filepath,lastExtractiondate,lastextractionmonth
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/enxcf-CF_Subscription_Costs/20250101-20250131/enxcf-CF_Subscription_Costs_308e1884-6ed9-4823-bd81-cd59140c5dd9.csv,2025-02-03T01:58:09Z,202501
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/enxcf-CF_Subscription_Costs/20250201-20250228/enxcf-CF_Subscription_Costs_752b65e9-3e08-49e2-a7b4-54d38fbc5512.csv,2025-02-03T01:57:18Z,202502
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20241101-20241130/nov2024,2025-01-12T14:57:38Z,202411
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20241201-20241231/dez2024,2025-01-10T17:00:53Z,202412
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20240501-20240531/powerbi-costs_may2024.csv,2024-12-02T14:39:26Z,202405
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20240301-20240331/powerbi-costs_mar2024.csv,2024-12-02T14:39:08Z,202403
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20240601-20240630/powerbi-costs_jun2024.csv,2024-12-02T14:38:52Z,202406
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20240701-20240731/powerbi-costs_jul2024.csv,2024-12-02T14:38:22Z,202407
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20240201-20240229/powerbi-costs_feb2024.csv,2024-12-02T14:37:39Z,202402
abfss://costs@enxpowerbidatalakeqa.dfs.core.windows.net/powerbi-costs/powerbi-costs/20240801-20240831/powerbi-costs_ago2024.csv,2024-12-02T14:37:13Z,202408
