In [0]:
%run ./03-invoice-stream

In [0]:
spark = SparkSession.builder \
    .appName("InvoiceStreamApp").getOrCreate()
#     .config("spark.jars.packages", "io.delta:delta-core_2.12:2.1.0") \
#     .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
#     .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \


In [0]:
dbutils.fs.rm(f"/FileStore/data_spark_streaming/chekpoint", True)

True

In [0]:
class invoiceStreamTestSuite():
    def __init__(self):
        self.base_data_dir = "/FileStore/data_spark_streaming"

    def cleanTests(self):
        print(f"Starting Cleanup...", end='')
        spark.sql("drop table if exists invoice_line_items")
        dbutils.fs.rm("/user/hive/warehouse/invoice_line_items", True)

        dbutils.fs.rm(f"{self.base_data_dir}/checkpoint/invoices", True)
        dbutils.fs.rm(f"{self.base_data_dir}/data/results/invoices", True)

        dbutils.fs.mkdirs(f"{self.base_data_dir}/data/results/invoices")
        print("Done")

    def ingestData(self, itr):
        print(f"\tStarting Ingestion...", end='')
        dbutils.fs.cp(f"{self.base_data_dir}/data/invoices_{itr}.json", f"{self.base_data_dir}/data/results/invoices/")
        print("Done")

    def assertResult(self, expected_count):
        print(f"\tStarting validation...", end='')
        actual_count = spark.sql("select count(*) from invoice_line_items").collect()[0][0]
        assert expected_count == actual_count, f"Test failed! actual count is {actual_count}"
        print("Done")

    def waitForMicroBatch(self, sleep=30):
        import time
        print(f"\tWaiting for {sleep} seconds...", end='')
        time.sleep(sleep)
        print("Done.")

    def runTests(self):
        self.cleanTests()
        iStream = invoiceStream()
        streamQuery = iStream.process()

        print("Testing first iteration of invoice stream...") 
        self.ingestData(1)
        self.waitForMicroBatch()        
        self.assertResult(1253)
        print("Validation passed.\n")

        print("Testing second iteration of invoice stream...") 
        self.ingestData(2)
        self.waitForMicroBatch()
        self.assertResult(2510)
        print("Validation passed.\n") 

        print("Testing third iteration of invoice stream...") 
        self.ingestData(3)
        self.waitForMicroBatch()
        self.assertResult(3994)
        print("Validation passed.\n")

        streamQuery.stop()


In [0]:
# # COMMAND ----------
isTS = invoiceStreamTestSuite()
isTS.runTests()	

Starting Cleanup...Done
Starting Invoice Processing Stream...Done.

Testing first iteration of invoice stream...
	Starting Ingestion...Done
	Waiting for 30 seconds...Done.
	Starting validation...Done
Validation passed.

Testing second iteration of invoice stream...
	Starting Ingestion...Done
	Waiting for 30 seconds...Done.
	Starting validation...Done
Validation passed.

Testing third iteration of invoice stream...
	Starting Ingestion...Done
	Waiting for 30 seconds...Done.
	Starting validation...Done
Validation passed.

