In [0]:
import unittest

In [0]:
# Arrange - Date Range
import calendar
from datetime import date, timedelta

yesterday = date.today() - timedelta(days=2)
year = yesterday.year
month = yesterday.month

start_date = date(year, month, 1).replace(day=1)
end_date = date(year, month, calendar.monthrange(year, month)[1])
date_range = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
print(start_date)

In [0]:
# Arrange - Source files
blob_account_name = "ftksitecoresq4tpdxwsgq3a"
blob_container_name = "ingestion"
blob_relative_path = f"providers/Microsoft.Billing/billingAccounts/66b5002e-800e-4fe8-805a-1048ad2ca95b:219d7890-c082-4a34-96c4-6d6929eb8c15_2019-05-31/{date_range}/amortizedcost/"
blob_sas_token = dbutils.secrets.get("mca_storage", "accesskey")
raw_azure = "raw_azure"

In [0]:
wasbs_path = f"abfss://{blob_container_name}@{blob_account_name}.dfs.core.windows.net/{blob_relative_path}"
spark.conf.set(f"fs.azure.account.key.{blob_account_name}.dfs.core.windows.net", blob_sas_token)

In [0]:
# Act
source_data = spark.sql(f"""
    SELECT SubscriptionId, SubscriptionName, ResourceGroupName AS ResourceGroup, ResourceLocation, Date::DATE AS Date, CostInBillingCurrency, Tags, PricingModel, InvoiceSectionName, MeterCategory, PublisherName
    FROM parquet.`{wasbs_path}`
    WHERE SubscriptionId IS NOT NULL AND Date = '{yesterday.strftime("%Y-%m-%d")}'
    ORDER BY Date DESC
    LIMIT 1
""").first()
print(source_data)

In [0]:
destination_data = spark.sql(f"""
    SELECT SubscriptionId, SubscriptionName, ResourceGroupName AS ResourceGroup, ResourceLocation, Date::DATE AS Date, CostInBillingCurrency, Tags, PricingModel, InvoiceSectionName
    FROM {raw_azure}
    WHERE YEAR(Date) = {source_data.Date.year} AND MONTH(Date) = {source_data.Date.month} AND DAY(Date) = {source_data.Date.day}
    AND SubscriptionId = '{source_data.SubscriptionId}' AND SubscriptionName = '{source_data.SubscriptionName}' AND ResourceGroupName = '{source_data.ResourceGroup}' AND ResourceLocation = '{source_data.ResourceLocation}' 
    AND CostInBillingCurrency = {source_data.CostInBillingCurrency}
    AND PricingModel = '{source_data.PricingModel}' AND InvoiceSectionName = '{source_data.InvoiceSectionName}' AND MeterCategory = '{source_data.MeterCategory}' AND PublisherName = '{source_data.PublisherName}'
""").first()
print(destination_data)

In [0]:
# Assert
class TestTransform(unittest.TestCase):

    def test_source_data_exist(self):
        assert source_data is not None, f"Source data for {yesterday} is empty"
    
    def test_destination_data_exist(self):
        assert destination_data is not None, f"Destination data for {yesterday} is empty"

In [0]:
# Run Test 
test_transform = unittest.TestLoader().loadTestsFromTestCase(TestTransform)
suite = unittest.TestSuite([test_transform])

assert unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful(), "Test failed. Refer logs"