## Read Files From DBFS Location

In [0]:
from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder \
.appName("ReadExcelWithHeader") \
.config("spark.jars.packages", "com.:spark-excel_2.12:3.1.3_0.18.4") \
.getOrCreate()

In [0]:
order_path = '/FileStore/tables/Orders.json'
product_path ='/FileStore/tables/Products.csv'
customer_path = '/FileStore/tables/Customer.xlsx'

In [0]:
#Order DataFrame
order_df = spark.read.option('multiline', True)\
    .option('inferSchema','true')\
        .option('header','true')\
        .json(order_path)
display(order_df)

In [0]:
#Product Dataframe

product_df = spark.read.format('csv')\
    .option('header','true')\
        .option('inferSchema','true')\
            .load(product_path)
display(product_df)

In [0]:
spark

In [0]:
#Customer Dataframe

customer_df = spark.read \
    .format("com.crealytics.spark.excel") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load(customer_path)
display(customer_df)

## Create raw tables for each source dataset

In [0]:
order_df.columns
# order_df.write.mode('overwrite').saveAsTable('ORDER_RAW')

In [0]:
order_df = order_df.withColumnRenamed('Customer ID','Customer_ID')\
    .withColumnRenamed('Order ID','Order_ID')\
    .withColumnRenamed('Order Date','Order_Date')\
    .withColumnRenamed('Product ID','Product_ID')\
    .withColumnRenamed('Ship Date','Shipping_Date')\
    .withColumnRenamed('Ship Mode','Shipping_Mode')

In [0]:
product_df.columns

In [0]:
product_df = product_df.withColumnRenamed('Product ID','Product_ID')\
    .withColumnRenamed('Sub-Category','Sub_Category')\
    .withColumnRenamed('Product Name','Product_Name')\
    .withColumnRenamed('Price per product','Price_Per_Product')

In [0]:
customer_df.columns

In [0]:
customer_df = customer_df.withColumnRenamed('Customer ID','Customer_ID')\
    .withColumnRenamed('Customer Name','Customer_Name')\
    .withColumnRenamed('Postal Code','Postal_Code')

In [0]:
spark.sql('show catalogs').show()
spark.sql('use catalog pei_adb_proj')
spark.sql('show databases').show()
spark.sql('use bronze')

**Manual Testing Has Been Done on Schema Check**

In [0]:
# Write the Order DataFrame to create Order_RAW Table
order_df.write.format('delta')\
    .option('delta.columnMapping.mode','name')\
    .option('overwriteSchema','true')\
    .mode('overwrite')\
    .saveAsTable("pei_adb_proj.bronze.Order_RAW")

In [0]:
# Write Customer DataFrame to create Customer_RAW Table
customer_df.write.format('delta')\
    .option('delta.columnMapping.mode','name')\
    .option('overwriteSchema','true')\
    .mode('overwrite')\
    .saveAsTable("pei_adb_proj.bronze.Customer_RAW")

In [0]:
# Write Product DataFrame to create Product_RAW Table
product_df.write.format('delta')\
    .option('delta.columnMapping.mode','name')\
    .option('overwriteSchema','true')\
    .mode('overwrite')\
    .saveAsTable("pei_adb_proj.bronze.Product_RAW")