In [0]:
import unittest

In [0]:
# Arrange
from datetime import date, timedelta

data_types = {
'AdditionalInfo' : 'string', 
'BenefitId' : 'string', 
'BenefitName' : 'string', 
'BillingAccountId' : 'string', 
'BillingAccountName' : 'string', 
'BillingCurrency' : 'string', 
'BillingPeriodEndDate' : 'timestamp', 
'BillingPeriodStartDate' : 'timestamp', 
'BillingProfileId' : 'string', 
'BillingProfileName' : 'string', 
'ChargeType' : 'string', 
'ConsumedService' : 'string', 
'CostAllocationRuleName' : 'string', 
'CostCenter' : 'string', 
'CostInBillingCurrency' : 'decimal(38,18)', 
'CostInPricingCurrency' : 'decimal(38,18)', 
'CostInUsd' : 'decimal(38,18)', 
'Date' : 'timestamp', 
'EffectivePrice' : 'decimal(38,18)', 
'ExchangeRateDate' : 'timestamp', 
'ExchangeRatePricingToBilling' : 'decimal(38,18)', 
'Frequency' : 'string', 
'InvoiceId' : 'string', 
'InvoiceSectionId' : 'string', 
'InvoiceSectionName' : 'string', 
'IsAzureCreditEligible' : 'string', 
'Location' : 'string', 
'MeterCategory' : 'string', 
'MeterId' : 'string', 
'MeterName' : 'string', 
'MeterRegion' : 'string', 
'MeterSubCategory' : 'string', 
'PaygCostInBillingCurrency' : 'decimal(38,18)', 
'PaygCostInUsd' : 'decimal(38,18)', 
'PayGPrice' : 'decimal(38,18)', 
'PreviousInvoiceId' : 'string', 
'PricingCurrency' : 'string', 
'PricingModel' : 'string', 
'ProductId' : 'string', 
'ProductName' : 'string', 
'ProductOrderId' : 'string', 
'ProductOrderName' : 'string', 
'Provider' : 'string', 
'PublisherId' : 'string', 
'PublisherName' : 'string', 
'PublisherType' : 'string', 
'Quantity' : 'decimal(38,18)', 
'ResellerMpnId' : 'string', 
'ResellerName' : 'string', 
'ReservationId' : 'string', 
'ReservationName' : 'string', 
'ResourceGroupName' : 'string', 
'ResourceId' : 'string', 
'ResourceLocation' : 'string', 
'ServiceFamily' : 'string', 
'ServiceInfo1' : 'string', 
'ServiceInfo2' : 'string', 
'ServicePeriodEndDate' : 'timestamp', 
'ServicePeriodStartDate' : 'timestamp', 
'SubscriptionId' : 'string', 
'SubscriptionName' : 'string', 
'Tags' : 'string', 
'Term' : 'string', 
'UnitOfMeasure' : 'string', 
'UnitPrice' : 'decimal(38,18)'
}
all_columns = list(data_types.keys())
today = date.today()
last_month = today.replace(day=1) - timedelta(days=1)
raw_azure = "raw_azure"

In [0]:
assert spark.catalog.tableExists(f"{raw_azure}"), f"{raw_azure} not found"

In [0]:
# Act
df_last_month = spark.sql(f"""
    SELECT * FROM {raw_azure}
    WHERE YEAR(Date) = {last_month.year} AND MONTH(Date) = {last_month.month} 
    LIMIT 1
""")

df_this_month = spark.sql(f"""
    SELECT * FROM {raw_azure}
    WHERE YEAR(Date) = {today.year} AND MONTH(Date) = {today.month}
    LIMIT 1
""")
raw_azure_columns = df_this_month.columns

In [0]:
# Assert
class TestTable(unittest.TestCase):

    def test_column_names_data_types_exist(self):
        for column_name, data_type in data_types.items():
            assert column_name in df_this_month.columns, f"{column_name} not found in {raw_azure}"
            assert data_type == df_this_month.select(column_name).dtypes[0][1], f"{column_name} : {data_type} not found in {raw_azure}"

    def test_columns_not_exist(self):
        missing_columns = set(all_columns) - set(raw_azure_columns)
        assert len(missing_columns) == 0, f"{raw_azure} missing: {missing_columns}"

    def test_extra_columns_exist(self):
        extra_columns = set(raw_azure_columns) - set(all_columns)
        assert len(extra_columns) == 0, f"{raw_azure} extra: {extra_columns}"			

In [0]:
# Assert
class TestData(unittest.TestCase):

    def test_last_month_data_exist(self):
        assert df_last_month.count() != 0, f"{raw_azure} {last_month.year}-{last_month.month} is empty"
    
    def test_this_month_data_exist(self):
        assert df_this_month.count() != 0, f"{raw_azure} {today.year}-{today.month} is empty"

In [0]:
# Run Test 
test_table = unittest.TestLoader().loadTestsFromTestCase(TestTable)
test_data = unittest.TestLoader().loadTestsFromTestCase(TestData)
suite = unittest.TestSuite([test_table, test_data])

assert unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful(), "Test failed. Refer logs"