# Imports and Variables

## Necessary imports

In [0]:
import json
from pyspark.sql import *
from pyspark.sql.functions import *
from pyspark.sql.functions import explode
import pyspark.sql.types as t
from pyspark.sql.window import Window
from pyspark.sql.types import StructField, StructType, ArrayType, StringType, MapType
import datetime

## Variables

In [0]:
dbutils.widgets.text("StorageAccountName", "enxpowerbidatalakeqa", "The name of the storage account.")
dbutils.widgets.text("FileSystemName", "costs", "The name of the file system within the storage account.")

storageAccountName = dbutils.widgets.get("StorageAccountName");
fileSystemName = dbutils.widgets.get("FileSystemName");

#Data Lake Connection

In [0]:
# Datalake access configuration
accountKey = dbutils.secrets.get(scope = "dataconnections", key = "connectionKey")
spark.conf.set("fs.azure.account.key." + storageAccountName + ".dfs.core.windows.net", accountKey)
spark.sql("set spark.sql.caseSensitive=false")

DataFrame[key: string, value: string]

# Getting the elements to the extracted from the control table

In [0]:
tables_to_extract = spark.sql("SELECT TableName, LastExtractionDate, Active FROM control.integrationcontrolcosts WHERE Bronze = 1 and Active=1 ORDER BY TableName ASC").cache()
display(tables_to_extract)

TableName,LastExtractionDate,Active
azure_costs_2,2025-02-02T01:56:28Z,True


# Copy the Target Files from the Data Lake to the Delta Lake

In [0]:
for table in tables_to_extract.collect():
    # Copy file
    spark.sql(f"""
    COPY INTO delta.`dbfs:/Bronze/{table['TableName']}` 
    FROM (
        SELECT *, 
               _metadata.file_path AS FilePath, 
               _metadata.file_modification_time AS LastExtractionDate,  
               SUBSTRING(SPLIT(FilePath, '/', 7)[5], 1, 6) AS LastExtractionMonth
        FROM 'abfss://{fileSystemName}@{storageAccountName}.dfs.core.windows.net/powerbi-costs'
    ) 
    FILEFORMAT = CSV 
    FORMAT_OPTIONS (
        'recursiveFileLookup' = 'true', 
        'mergeSchema' = 'true', 
        'delimiter' = ',', 
        'escape' = '"', 
        'header' = 'true'
    ) 
    COPY_OPTIONS ('mergeSchema' = 'true')
    """)
    
    # Creates table from it
    spark.sql(f"CREATE TABLE IF NOT EXISTS bronze.{table['TableName']} USING DELTA LOCATION 'dbfs:/Bronze/{table['TableName']}'")

In [0]:
%sql
--drop table bronze.azure_costs_2

In [0]:
#dbutils.fs.rm('dbfs:/Bronze/azure_costs_2',recurse=True)