# The setup

In [None]:
#Define the variables.
data_directory_clean =  'Files/sampledata/Data/Wide World Importers/Clean/'
azure_sql_url = "jdbc:sqlserver://servername.database.windows.net"
database_name = "WideWorldImporters"
db_username = ''
db_password = ''

# Process the data

In [None]:
def fn_write_data (table_name, query, number_of_partitions):
    
    destination_directory_delta = f'{data_directory_clean}/Delta/{table_name}'
    destination_directory_json = f'{data_directory_clean}/JSON/{table_name}'
    destination_directory_parquet = f'{data_directory_clean}/Parquet/{table_name}'

    #Read the files.
    df = spark.read \
        .format("com.microsoft.sqlserver.jdbc.spark") \
        .option("url", azure_sql_url) \
        .option("query", query) \
        .option("databaseName", database_name) \
        .option("username", db_username) \
        .option("password", db_password) \
        .option("encrypt", "true") \
        .option("hostNameInCertificate", "*.database.windows.net") \
        .load()

    print ('Source File: ' + table_name)

    #Create the delta table.
    df.write.mode('overwrite').option('overwriteSchema', 'true').format("Delta").save(destination_directory_delta)
    print ('Creating Delta Table Complete: ' + table_name)

    #Create the JSON files.
    if number_of_partitions == 0:
        df.write.mode('overwrite').json(destination_directory_json)
    else:
        df.repartition(number_of_partitions).write.mode('overwrite').json(destination_directory_json)
    print ('Creating JSON File Complete: ' + table_name)
    
    #Create the Parquet files.
    if number_of_partitions == 0:
        df.write.mode('overwrite').parquet(destination_directory_parquet)
    else:
        df.repartition(number_of_partitions).write.mode('overwrite').parquet(destination_directory_parquet)
    print ('Creating Parquet File Complete: ' + table_name)
    print ('')

In [None]:
table_list = [("Application_TransactionTypes", "SELECT * FROM [Application].[TransactionTypes]"),
("Application_SystemParameters", "SELECT * FROM [Application].[SystemParameters]"),
("Purchasing_PurchaseOrderLines", "SELECT * FROM [Purchasing].[PurchaseOrderLines]"),
("Purchasing_PurchaseOrders", "SELECT * FROM [Purchasing].[PurchaseOrders]"),
("Purchasing_Suppliers", "SELECT * FROM [Purchasing].[Suppliers]"),
("Purchasing_SupplierTransactions", "SELECT * FROM [Purchasing].[SupplierTransactions]"),
("Sales_OrderLines", "SELECT * FROM [Sales].[OrderLines]"),
("Sales_Orders", "SELECT * FROM [Sales].[Orders]"),
("Sales_BuyingGroups", "SELECT * FROM [Sales].[BuyingGroups]"),
("Sales_CustomerTransactions", "SELECT * FROM [Sales].[CustomerTransactions]"),
("Sales_SpecialDeals", "SELECT * FROM [Sales].[SpecialDeals]"),
("Sales_CustomerCategories", "SELECT * FROM [Sales].[CustomerCategories]"),
("Sales_InvoiceLines", "SELECT * FROM [Sales].[InvoiceLines]"),
("Sales_Invoices", "SELECT * FROM [Sales].[Invoices]"),
("Warehouse_StockItemStockGroups", "SELECT * FROM [Warehouse].[StockItemStockGroups]"),
("Warehouse_VehicleTemperatures", "SELECT * FROM [Warehouse].[VehicleTemperatures]"),
("Warehouse_StockItemTransactions", "SELECT * FROM [Warehouse].[StockItemTransactions]"),
("Warehouse_PackageTypes", "SELECT * FROM [Warehouse].[PackageTypes]"),
("Warehouse_StockItemHoldings", "SELECT * FROM [Warehouse].[StockItemHoldings]"),
("Warehouse_ColdRoomTemperatures", "SELECT * FROM [Warehouse].[ColdRoomTemperatures]"),
("Warehouse_StockItems", "SELECT * FROM [Warehouse].[StockItems]"),
("Application_People", "SELECT * FROM [Application].[People]"),
("Application_PaymentMethods", "SELECT * FROM [Application].[PaymentMethods]"),
("Application_DeliveryMethods", "SELECT * FROM [Application].[DeliveryMethods]"),
("Application_StateProvinces", "SELECT * FROM [Application].[StateProvinces]"),
("Application_Cities", "SELECT * FROM [Application].[Cities]"),
("Application_Countries", "SELECT * FROM [Application].[Countries]"),
("Purchasing_SupplierCategories", "SELECT * FROM [Purchasing].[SupplierCategories]"),
("Sales_Customers", "SELECT * FROM [Sales].[Customers]"),
("Warehouse_Colors", "SELECT * FROM [Warehouse].[Colors]"),
("Warehouse_StockGroups", "SELECT * FROM [Warehouse].[StockGroups]")]

In [None]:
for table in table_list:
    fn_write_data (table[0], table[1], 0)