In [0]:
import unittest

In [0]:
# Arrange
from datetime import date, timedelta

data_types = {
  'SubscriptionId' : 'string',
  'SubscriptionName' : 'string',
  'SubAccount' : 'string',
  'ResourceGroup' : 'string',
  'ResourceLocation' : 'string',
  'Date' : 'date',
  'Cost' : 'double',
  'PricingModel' : 'string',
  'InvoiceSectionName' : 'string',
  'CostType' : 'string',
  'System' : 'string',
  'Product' : 'string',
  'Vendor' : 'string',
  'VendorService' : 'string',
  'Publisher' : 'string',
  'sc_system' : 'string',
  'sc_component' : 'string',
  'sc_env' : 'string',
  'sc_region' : 'string',
  'sc_type' : 'string',
  'sc_costowner' : 'string',
  'sc_createdby' : 'string',
  'sc_provider' : 'string',
  'Year' : 'int',
  'Month' : 'int',
}
all_columns = list(data_types.keys())
today = date.today()
last_month = today.replace(day=1) - timedelta(days=1)
azure_data = "default.azure_data"

In [0]:
assert spark.catalog.tableExists(f"{azure_data}"), f"{azure_data} not found"

In [0]:
# Act
df_last_month = spark.sql(f"""
    SELECT * FROM {azure_data}
    WHERE YEAR(Date) = {last_month.year} AND MONTH(Date) = {last_month.month} 
    LIMIT 1
""")

df_this_month = spark.sql(f"""
    SELECT * FROM {azure_data}
    WHERE YEAR(Date) = {today.year} AND MONTH(Date) = {today.month}
    LIMIT 1
""")
azure_data_columns = df_this_month.columns

In [0]:
# Assert
class TestTable(unittest.TestCase):

    def test_column_names_data_types_exist(self):
        for column_name, data_type in data_types.items():
            self.assertIn(column_name, df_this_month.columns, f"{column_name} not found in {azure_data}")
            self.assertEqual(data_type, df_this_month.select(column_name).dtypes[0][1], f"{column_name} : {data_type} not found in {azure_data}")

    def test_columns_not_exist(self):
        missing_columns = set(all_columns) - set(azure_data_columns)
        assert len(missing_columns) == 0, f"{azure_data} missing: {missing_columns}"

    def test_extra_columns_exist(self):
        extra_columns = set(azure_data_columns) - set(all_columns)
        assert len(extra_columns) == 0, f"{azure_data} extra: {extra_columns}"			

In [0]:
# Assert
class TestData(unittest.TestCase):

    def test_last_month_data_exist(self):
        self.assertIsNot(df_last_month.count(), 0, f"{azure_data} {last_month.year}-{last_month.month} is empty")
    
    def test_this_month_data_exist(self):
        assert df_this_month.count() != 0, f"{azure_data} {today.year}-{today.month} is empty"

In [0]:
# Run Test 
test_table = unittest.TestLoader().loadTestsFromTestCase(TestTable)
test_data = unittest.TestLoader().loadTestsFromTestCase(TestData)
suite = unittest.TestSuite([test_table, test_data])

assert unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful(), "Test failed. Refer logs"