# Gold Explorations

## Accounts Payable & Supplier Join

In [0]:
# Accounts Payable Supplier Join

# Read tables to join
df_invoices = spark.table("ap.silver.ap_invoices")
df_suppliers = spark.table("ap.silver.suppliers")

# Join tables
df_ap = (
    df_invoices
    .join(
        df_suppliers, 
        on= df_invoices.supplier_id == df_suppliers.supplier_id,
        how='inner'
    )
    .drop(df_suppliers.supplier_id)
    .drop(df_invoices.supplier_id)
)

# Reorder columns
cols = df_ap.columns
cols.remove('supplier')
cols.insert(4, 'supplier')
df_ap = df_ap.select(*cols)

display(df_ap)

## AP vs GL Control Totals

In [0]:
df_gl = spark.table("ap.silver.gl_control_totals")
display(df_gl)

In [0]:
from pyspark.sql.functions import sum, round

df_invoices = spark.table("ap.silver.ap_invoices")

# Calculate actual spend
df_ap_agg = (
    df_invoices
    .groupBy('month')
    .agg(
        round(sum('invoice_amount'),2)
        .alias('actual_spend')
    )
    .orderBy('month')
)

display(df_ap_agg)

In [0]:
# Check actual spend vs gl_control_totals

from pyspark.sql.functions import col

df_variance = (
    df_ap_agg
    .join(df_gl, on='month', how='inner')
    .withColumn("difference", col("actual_spend") - col("gl_approved_spend"))
)

display(df_variance)
