In [0]:
import unittest

In [0]:
# Arrange - Date Range
import calendar
from datetime import date, timedelta

yesterday = date.today() - timedelta(days=2)
year = yesterday.year
month = yesterday.month

start_date = date(year, month, 1).replace(day=1)
end_date = date(year, month, calendar.monthrange(year, month)[1])
date_range = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
print(start_date)

In [0]:
# Arrange - Source files
blob_account_name = "ftksitecoresq4tpdxwsgq3a"
blob_container_name = "ingestion"
blob_relative_path = f"providers/Microsoft.Billing/billingAccounts/66b5002e-800e-4fe8-805a-1048ad2ca95b:219d7890-c082-4a34-96c4-6d6929eb8c15_2019-05-31/{date_range}/amortizedcost/"
blob_sas_token = dbutils.secrets.get("mca_storage", "accesskey")
raw_azure = "raw_azure"
azure_data = "azure_data"

In [0]:
wasbs_path = f"abfss://{blob_container_name}@{blob_account_name}.dfs.core.windows.net/{blob_relative_path}"
spark.conf.set(f"fs.azure.account.key.{blob_account_name}.dfs.core.windows.net", blob_sas_token)

In [0]:
# Act
source_data = spark.sql(f"""
    SELECT SubscriptionId, SubscriptionName, ResourceGroupName AS ResourceGroup, ResourceLocation, Date::DATE AS Date, CostInBillingCurrency::DOUBLE AS CostInBillingCurrency, PricingModel, InvoiceSectionName, Tags:sc_system, Tags:sc_component, Tags:sc_region, Tags:sc_env, Tags:sc_type, Tags:sc_costowner, Tags:sc_createdby, Tags:sc_provider, MeterCategory, PublisherName
    FROM {raw_azure}
    WHERE SubscriptionId IS NOT NULL AND Date = '{yesterday.strftime("%Y-%m-%d")}'
    AND Tags:sc_system != '' AND Tags:sc_component != '' AND Tags:sc_region != '' AND Tags:sc_env != '' AND Tags:sc_type != '' AND Tags:sc_costowner != '' AND Tags:sc_createdby != '' AND Tags:sc_provider != ''
    ORDER BY Date DESC
    LIMIT 1
""").first()
print(source_data)

In [0]:
destination_data = spark.sql(f"""
    SELECT * FROM {azure_data}
    WHERE Year = {source_data.Date.year} AND Month = {source_data.Date.month} AND DAY(Date) = {source_data.Date.day}
    AND SubscriptionId = '{source_data.SubscriptionId}' AND SubscriptionName = '{source_data.SubscriptionName}' AND ResourceGroup = '{source_data.ResourceGroup}' AND ResourceLocation = '{source_data.ResourceLocation}' AND Cost = {source_data.CostInBillingCurrency}
    AND PricingModel = '{source_data.PricingModel}' AND InvoiceSectionName = '{source_data.InvoiceSectionName}' AND VendorService = '{source_data.MeterCategory}' AND Publisher = '{source_data.PublisherName}'
""").first()
print(destination_data)

In [0]:
# Assert
class TestTransform(unittest.TestCase):
    def test_source_data_exist(self):
        assert source_data is not None, f"Source data for {yesterday} is empty"
    
    def test_destination_data_exist(self):
        assert destination_data is not None, f"Destination data for {yesterday} is empty"

    def test_source_to_destination_tag_matches(self):
        assert source_data.sc_system == destination_data.sc_system, f"sc_system Source: {source_data.sc_system} not equal to Destination: {destination_data.sc_system}"
        assert source_data.sc_component == destination_data.sc_component, f"sc_component Source: {source_data.sc_component} not equal to Destination: {destination_data.sc_component}"
        assert source_data.sc_region == destination_data.sc_region, f"sc_region Source: {source_data.sc_region} not equal to Destination: {destination_data.sc_region}"
        assert source_data.sc_env == destination_data.sc_env, f"sc_env Source: {source_data.sc_env} not equal to Destination: {destination_data.sc_env}"
        assert source_data.sc_type == destination_data.sc_type, f"sc_type Source: {source_data.sc_type} not equal to Destination: {destination_data.sc_type}"
        assert source_data.sc_costowner == destination_data.sc_costowner, f"sc_costowner Source: {source_data.sc_costowner} not equal to Destination: {destination_data.sc_costowner}"
        assert source_data.sc_createdby == destination_data.sc_createdby, f"sc_createdby Source: {source_data.sc_createdby} not equal to Destination: {destination_data.sc_createdby}"
        assert source_data.sc_provider == destination_data.sc_provider, f"sc_provider Source: {source_data.sc_provider} not equal to Destination: {destination_data.sc_provider}"

    def test_source_SubscriptionName_to_destination_SubAccount_matches(self):
        assert source_data.SubscriptionName == destination_data.SubAccount, f"SubAccount: {destination_data.SubAccount} not equal to SubcriptionName: {source_data.SubscriptionName}"

In [0]:
# Run Test 
test_transform = unittest.TestLoader().loadTestsFromTestCase(TestTransform)
suite = unittest.TestSuite([test_transform])

assert unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful(), "Test failed. Refer logs"