# Bronze Layer Exploration

## Suppliers

In [0]:
from pyspark.sql.functions import current_timestamp, to_timestamp_ntz 
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

In [0]:
suppliers_schema = StructType([
    StructField('SupplierID', IntegerType(), True), 
    StructField('Supplier', StringType(), True)
])

In [0]:
# ============================================================================
# CONFIGURATION
# ============================================================================
# Storage configuration
STORAGE_ACCOUNT = "sd0212"
CONTAINER = "bronze"
CATALOG = "ap"
SCHEMA = "bronze"
TABLE = "suppliers"

In [0]:
# Path to storage
path = f"abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/{TABLE}"
df = spark.read.format('delta').load(path)
display(df)

In [0]:
# Path to storage
path = f"abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/{TABLE}"

# Read data from storage
df = (
    spark
    .read
    .format("delta")
    .option("header", True)
    .load(path)
    .withColumn("ingest_time", to_timestamp_ntz(current_timestamp()))
    .drop("_rescued_data")
)

display(df)

In [0]:
# Renaming columns to lowercase
df = (
    df
    .toDF(*[c.lower() for c in df.columns])
    .withColumnRenamed("supplierid", "supplier_id")
)
display(df)

In [0]:
# Write data to table
(
    df
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(f"{CATALOG}.{SCHEMA}.{TABLE}")
)

spark.read.table(f"{CATALOG}.{SCHEMA}.{TABLE}").display()

## Python Files

### Suppliers

In [0]:
"""
Bronze layer - Suppliers
===================================

"""

# ============================================================================
# DEPENDENCIES
# ============================================================================
from pyspark.sql.functions import current_timestamp, to_timestamp_ntz 

# ============================================================================
# CONFIGURATION
# ============================================================================
# Storage configuration
STORAGE_ACCOUNT = "sd0212"
CONTAINER = "bronze"
CATALOG = "ap"
SCHEMA = "bronze"
TABLE = "suppliers"


# ============================================================================
# TABLE CREATION
# ============================================================================
# Path to storage
path = f"abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/{TABLE}"

# Read data from storage
df = (
    spark
    .read
    .format("delta")
    .option("header", True)
    .load(path)
    .withColumn("ingest_time", to_timestamp_ntz(current_timestamp()))
    .drop("_rescued_data")
)

# Renaming columns to lowercase
df = (
    df
    .toDF(*[c.lower() for c in df.columns])
    .withColumnRenamed("supplierid", "supplier_id")
)

# Write data to table
(
    df
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(f"{CATALOG}.{SCHEMA}.{TABLE}")
)


# Display table
spark.read.table(f"{CATALOG}.{SCHEMA}.{TABLE}").display()

In [0]:
"""
Bronze layer - GL Control Totals
===================================

"""

# ============================================================================
# DEPENDENCIES
# ============================================================================
from pyspark.sql.functions import current_timestamp, to_timestamp_ntz 

# ============================================================================
# CONFIGURATION
# ============================================================================
# Storage configuration
STORAGE_ACCOUNT = "sd0212"
CONTAINER = "bronze"
CATALOG = "ap"
SCHEMA = "bronze"
TABLE = "gl_control_totals"


# ============================================================================
# TABLE CREATION
# ============================================================================
# Path to storage
path = f"abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/{TABLE}"

# Read data from storage
df = (
    spark
    .read
    .format("delta")
    .option("header", True)
    .load(path)
    .withColumn("ingest_time", to_timestamp_ntz(current_timestamp()))
    .drop("_rescued_data")
)

# Renaming columns to lowercase
df = (
    df
    .toDF(*[c.lower() for c in df.columns])
)

# Write data to table
(
    df
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(f"{CATALOG}.{SCHEMA}.{TABLE}")
)


# Display table
spark.read.table(f"{CATALOG}.{SCHEMA}.{TABLE}").display()

### AP Invoices

In [0]:
"""
Bronze layer - AP Invoices
===================================

"""

# ============================================================================
# DEPENDENCIES
# ============================================================================
from pyspark.sql.functions import current_timestamp, to_timestamp_ntz 

# ============================================================================
# CONFIGURATION
# ============================================================================
# Storage configuration
STORAGE_ACCOUNT = "sd0212"
CONTAINER = "bronze"
CATALOG = "ap"
SCHEMA = "bronze"
TABLE = "ap_invoices"


# ============================================================================
# TABLE CREATION
# ============================================================================
# Path to storage
path = f"abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/{TABLE}"

# Read data from storage
df = (
    spark
    .read
    .format("delta")
    .option("header", True)
    .load(path)
    .withColumn("ingest_time", to_timestamp_ntz(current_timestamp()))
    .drop("_rescued_data")
    .drop("currency")
)

# Renaming columns to lowercase
df = (
    df
    .toDF(*[c.lower() for c in df.columns])
    .withColumnRenamed("invoiceid", "invoice_id")
    .withColumnRenamed("invoicedate", "invoice_date")
    .withColumnRenamed("duedate","due_date")
    .withColumnRenamed("paiddate","paid_date")
    .withColumnRenamed("supplierid", "supplier_id")
    .withColumnRenamed("costcenter", "cost_center")
    .withColumnRenamed("invoiceamount", "invoice_amount")
    .withColumnRenamed("unitprice_po", "unit_price_po")
    .withColumnRenamed("unitprice_invoice", "unit_price_inv")
)

# Write data to table
(
    df
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(f"{CATALOG}.{SCHEMA}.{TABLE}")
)


# Display table
spark.read.table(f"{CATALOG}.{SCHEMA}.{TABLE}").display()