### Imports

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import datetime
import json

### Read Raw Tables

In [0]:
supplier_schema = StructType([
    StructField("supplier_id", StringType()),
    StructField("supplier_name", StringType()),
    StructField("contact_name", StringType()),
    StructField("contact_email", StringType()),
    StructField("phone_number", StringType()),
    StructField("country", StringType()),
    StructField("registration_date", StringType()),
    StructField("credit_score", StringType()),
    StructField("supplier_type", StringType()),
    StructField("industry", StringType()),
    StructField("annual_revenue", StringType()),
    StructField("tax_id", StringType()),
    StructField("rating", StringType()),
    StructField("is_active", StringType()),
    StructField("supplier_gender", StringType())
])

purchase_orders_schema = StructType([
    StructField("po_id", StringType(), True),                
    StructField("supplier_id", StringType(), True),          
    StructField("po_number", StringType(), True),            
    StructField("po_date", StringType(), True),              
    StructField("currency", StringType(), True),             
    StructField("total_amount", StringType(), True),         
    StructField("payment_terms", StringType(), True),        
    StructField("approval_status", StringType(), True),      
    StructField("delivery_country", StringType(), True),     
    StructField("buyer_name", StringType(), True),           
    StructField("freight_charges", StringType(), True),      
    StructField("discount_percent", StringType(), True),     
    StructField("tax_percent", StringType(), True),          
    StructField("priority_level", StringType(), True),       
    StructField("po_category", StringType(), True)   
])

order_line_items_schema = StructType([
    StructField("line_item_id", StringType(), True),                
    StructField("po_id", StringType(), True),          
    StructField("item_id", StringType(), True),            
    StructField("item_description", StringType(), True),              
    StructField("item_category", StringType(), True),             
    StructField("quantity_ordered", StringType(), True),         
    StructField("unit_price", StringType(), True),        
    StructField("total_price", StringType(), True),      
    StructField("expected_delivery_date", DateType(), True),     
    StructField("unit_of_measure", StringType(), True),           
    StructField("line_status", StringType(), True),      
    StructField("item_weight_kg", StringType(), True),     
    StructField("item_volume_cm3", StringType(), True),          
    StructField("discount_applied", StringType(), True),       
    StructField("batch_code", StringType(), True)   
])

inventory_stock_schema = StructType([
    StructField("inventory_id", StringType(), True),                
    StructField("item_id", StringType(), True),          
    StructField("warehouse_id", StringType(), True),            
    StructField("stock_quantity", StringType(), True),              
    StructField("reorder_level", StringType(), True),             
    StructField("safety_stock", StringType(), True),         
    StructField("last_updated", StringType(), True),        
    StructField("batch_code", StringType(), True),      
    StructField("location_code", StringType(), True),     
    StructField("is_damaged", StringType(), True),           
    StructField("shelf_life_days", StringType(), True),      
    StructField("temperature_required", StringType(), True),     
    StructField("item_condition", StringType(), True),          
    StructField("in_transit", StringType(), True),       
    StructField("is_available", StringType(), True)   
])

audit_schema = StructType([
    StructField("env", StringType(), True),
    StructField("table_name", StringType(), True),
    StructField("source_path", StringType(), True),
    StructField("target_path", StringType(), True),
    StructField("failed_path", StringType(), True),
    StructField("quarantine_path", StringType(), True),
    StructField("load_type", StringType(), True),
    StructField("is_incremental", BooleanType(), True),
    StructField("load_timestamp", TimestampType(), True),
    StructField("total_records", LongType(), True),
    StructField("passed_records", LongType(), True),
    StructField("failed_records", LongType(), True),
    StructField("status", StringType(), True),
    StructField("message", StringType(), True)
])



In [0]:
dbutils.widgets.text("file_name", "suppliers_raw")
dbutils.widgets.text("app_password", "qfswthychmohjkim")
dbutils.widgets.text("env", "dev")
p_file_name = dbutils.widgets.get("file_name")
app_password = dbutils.widgets.get("app_password")
env = dbutils.widgets.get("env")
env = env.strip().lower()

In [0]:
p_file_name = p_file_name.strip().lower()

schema_map = {
    "suppliers_raw": supplier_schema,
    "purchase_orders": purchase_orders_schema,
    "order_line_items": order_line_items_schema,
    "inventory_stock": inventory_stock_schema
}

schema = schema_map.get(p_file_name)
if schema is None:
    raise ValueError(f"No schema found for cleaned p_file_name: '{p_file_name}'. Available keys: {list(schema_map.keys())}")

In [0]:
table_name = f"scm_foreign_cata.raw.{p_file_name}"
df_raw = spark.read.table(table_name)

In [0]:
df_parsed = df_raw.withColumn("json", from_json(col("RAW_DATA"), schema)).select("json.*")
df_parsed.display()

### Load DQ rules

In [0]:
with open("/Workspace/Users/avadhootd.business@gmail.com/SCM/config/dq_rules.json", "r") as f:
    dq_config = json.load(f)
rules = dq_config.get(p_file_name, {}).get("bronze_rules", {})
failed_rules = dq_config.get(p_file_name, {}).get("failed_rules", [])
quarantine_rules = dq_config.get(p_file_name, {}).get("quarantine_rules", [])

In [0]:
%run /Workspace/Users/avadhootd.business@gmail.com/SCM/utils/bronze_rule_validation

In [0]:
df_clean, df_failed = validate_bronze_rules(df_parsed, rules, failed_rules)

# Show results
print(df_clean.count())
print(df_failed.count())


In [0]:
with open("/Workspace/Users/avadhootd.business@gmail.com/SCM/config/config.json", "r") as f:
    config = json.load(f)

In [0]:
df_clean.write.format("delta").mode("overwrite").saveAsTable(config[env][p_file_name]["pass_target"])
df_failed.write.format("delta").mode("overwrite").saveAsTable(config[env][p_file_name]["failed_target"])


In [0]:
# %run /Workspace/Users/avadhootd.business@gmail.com/SCM/utils/send_alert_email


In [0]:
# failed_table = config[env][p_file_name]["failed_target"]
# quarantine_table = config[env][p_file_name]["quarantine_target"]

In [0]:
# app_password = app_password.strip()

# if df_quarantine.count() > 0 or df_failed.count() > 0:
#     send_gmail_email(
#     app_password="qfswthychmohjkim",
#     from_email="avadhootdarbhe@gmail.com",
#     to_email="avadhootd.in@mouritech.com",
#     tables=[failed_table, quarantine_table]
#     )


### Audit Logs

In [0]:
%run /Workspace/Users/avadhootd.business@gmail.com/SCM/utils/audit_status



In [0]:
total = df_parsed.count()
passed = df_clean.count()
failed = df_failed.count()

status, message = get_audit_status_and_message(total, passed, failed)


In [0]:
audit_data = [{
    "env": env,
    "table_name": p_file_name,
    "source_path": f"scm_foreign_cata.raw.{p_file_name}",
    "target_path": config[env][p_file_name]["pass_target"],
    "failed_path": config[env][p_file_name]["failed_target"],
    "quarantine_path": config[env][p_file_name]["quarantine_target"],
    "is_incremental": config[env][p_file_name]["is_incremental"],
    "load_timestamp": datetime.now(),
    "total_records": df_parsed.count(),
    "passed_records": df_clean.count(),
    "failed_records": df_failed.count(),
    "status": status,
    "message": message
}]

audit_df = spark.createDataFrame(audit_data, audit_schema)

audit_df.write.format("delta").mode("append").saveAsTable(config[env][p_file_name]["audit_log_target"])