### Imports

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import datetime
import json

In [0]:
tb_name = "inventory_stock"
silver_tb_name = "silver_inventory_stock"

### Read Raw Tables

In [0]:
inventory_stock_schema = StructType([
    StructField("inventory_id", IntegerType(), True),                
    StructField("item_id", StringType(), True),          
    StructField("warehouse_id", StringType(), True),            
    StructField("stock_quantity", IntegerType(), True),              
    StructField("reorder_level", IntegerType(), True),             
    StructField("safety_stock", IntegerType(), True),         
    StructField("last_updated", DateType(), True),        
    StructField("batch_code", StringType(), True),      
    StructField("location_code", IntegerType(), True),     
    StructField("is_damaged", StringType(), True),           
    StructField("shelf_life_days", IntegerType(), True),      
    StructField("temperature_required", StringType(), True),     
    StructField("item_condition", StringType(), True),          
    StructField("in_transit", StringType(), True),       
    StructField("is_available", StringType(), True)   
])

audit_schema = StructType([
    StructField("env", StringType(), True),
    StructField("table_name", StringType(), True),
    StructField("source_path", StringType(), True),
    StructField("target_path", StringType(), True),
    StructField("quarantine_path", StringType(), True),
    StructField("load_timestamp", TimestampType(), True),
    StructField("total_records", LongType(), True),
    StructField("passed_records", LongType(), True),
    StructField("quarantine_records", LongType(), True),
    StructField("status", StringType(), True),
    StructField("message", StringType(), True)
])



In [0]:
dbutils.widgets.text("env", "dev")
env = dbutils.widgets.get("env").strip().lower()

In [0]:
with open("/Workspace/Users/avadhootd.business@gmail.com/sum/SCM/config/config.json", "r") as f:
    config = json.load(f)

In [0]:
table_name = config[env][tb_name]["pass_target"]
df_raw = spark.read.table(table_name)

In [0]:
df_parsed = df_raw \
    .withColumn("inventory_id", when(col("inventory_id").rlike("^[0-9]+$"), col("inventory_id").cast(IntegerType())).otherwise(None)) \
     .withColumn("stock_quantity", when(col("stock_quantity").rlike("^[0-9]+$"), col("stock_quantity").cast(IntegerType())).otherwise(None)) \
     .withColumn("reorder_level", when(col("reorder_level").rlike("^[0-9]+$"), col("reorder_level").cast(IntegerType())).otherwise(None)) \
     .withColumn("safety_stock", when(col("safety_stock").rlike("^[0-9]+$"), col("safety_stock").cast(IntegerType())).otherwise(None)) \
     .withColumn("location_code", when(col("location_code").rlike("^[0-9]+$"), col("location_code").cast(IntegerType())).otherwise(None)) \
     .withColumn("shelf_life_days", when(col("shelf_life_days").rlike("^[0-9]+$"), col("shelf_life_days").cast(IntegerType())).otherwise(None)) \
    .withColumn("last_updated", when(col("last_updated").rlike("^[0-9]{4}-[0-9]{2}-[0-9]{2}$"), col("last_updated").cast(DateType())).otherwise(None)) \
    .withColumn("temperature_required", lower(col("temperature_required")))\
    .withColumn("is_damaged", lower(col("is_damaged")))\
    .withColumn("is_available", lower(col("is_available")))\
    .withColumn("in_transit", lower(col("in_transit")))\
    .drop("failed_raw", "failed_reasons", "all_reasons")


### Load DQ rules

In [0]:
with open("/Workspace/Users/avadhootd.business@gmail.com/sum/SCM/config/dq_rules.json", "r") as f:
    dq_config = json.load(f)
rules = dq_config.get(tb_name, {}).get("silver_rules", {})
quarantine_rules = dq_config.get(tb_name, {}).get("quarantine_rules", [])

In [0]:
%run /Workspace/Users/avadhootd.business@gmail.com/sum/SCM/utils/bronze_rule_validation

In [0]:
df_clean, df_quarantine = validate_silver_rules(df_parsed, rules, quarantine_rules)
df_clean = df_clean.drop("quarantine_raw", "quarantine_reasons", "reasons_str")


In [0]:
df_clean = df_clean.drop("quarantine_raw", "quarantine_reasons", "reasons_str", "batch_code")\
                   .withColumn(
                         "stock_category",
                         when(col("stock_quantity") >= 4000, "High Stock")
                         .when((col("stock_quantity") >= 2000) & (col("stock_quantity") < 4000), "Moderate Stock")
                         .when((col("stock_quantity") >= 1) & (col("stock_quantity") < 2000), "Low Stock")
                         .otherwise("Invalid or Zero")
                    )\
                    .withColumn(
                         "reorder_urgency",
                         when(col("reorder_level") < 80, "Critical")
                         .when((col("reorder_level") >= 80) & (col("reorder_level") < 150), "Moderate")
                         .when(col("reorder_level") >= 150, "Safe")
                         .otherwise("Unknown")
                    )\
                    .withColumn(
                         "safety_stock_level",
                         when(col("safety_stock") < 10, "Critical")
                         .when((col("safety_stock") >= 10) & (col("safety_stock") < 50), "Low")
                         .when(col("safety_stock") >= 50, "Adequate")
                         .otherwise("Unknown")
                    )\
                   .withColumn("year", month(col("last_updated")))\
                   .withColumn("month", month(col("last_updated")))\
                   .withColumn("day", dayofmonth(col("last_updated")))\
                   .withColumn(
                         "shelf_life_category",
                         when(col("shelf_life_days") <= 180, "Short-Term")
                         .when(col("shelf_life_days").between(181, 365), "Medium-Term")
                         .when(col("shelf_life_days") > 365, "Long-Term")
                         .otherwise("Unknown"))\
                   .withColumn(
                         "temperature_required",
                         when(lower(col("temperature_required")) == "room temp", "Room Temperature")
                         .when(lower(col("temperature_required")) == "ambient", "Ambient")
                         .when(lower(col("temperature_required")) == "cold", "Cold")
                         .when(lower(col("temperature_required")) == "frozen", "Frozen")
                         .otherwise("Unknown"))\
                   .withColumn(
                         "item_condition",
                         when(lower(col("item_condition")) == "new", "New")
                         .when(lower(col("item_condition")) == "used", "Used")
                         .otherwise("Unknown"))\
                   .withColumn(
                         "is_damaged",
                         when(lower(col("is_damaged")).isin("yes", "y"), "Yes")
                         .when(lower(col("is_damaged")).isin("no", "n"), "No")
                         .otherwise("Unknown"))\
                   .withColumn(
                         "in_transit",
                         when(lower(col("in_transit")).isin("yes", "y"), "Yes")
                         .when(lower(col("in_transit")).isin("no", "n"), "No")
                         .otherwise("Unknown"))\
                   .withColumn(
                         "is_available",
                         when(lower(col("is_available")).isin("yes", "y"), "Yes")
                         .when(lower(col("is_available")).isin("no", "n"), "No")
                         .otherwise("Unknown"))

In [0]:
df_clean = df_clean.withColumn(
    "watermark_column",
    sha2(concat_ws("||", col("inventory_id").cast("string"), col("item_id"), col("last_updated").cast("string")), 256)
)

In [0]:
catalog_name = config[env]["catalog"]
silver_table = f"{catalog_name}.silver.silver_inventory_stock"
silver_quarantine_table = f"{catalog_name}.silver.silver_inventory_stock_quarantine"
silver_supplier_audit_table = f"{catalog_name}.silver.silver_inventory_stock_audit"
if spark.catalog.tableExists(silver_table):
    existing_df = spark.table(silver_table).select("watermark_column")
    df_new = df_clean.join(existing_df, on="watermark_column", how="left_anti")
else:
    df_new = df_clean



In [0]:
df_new.write.format("delta").mode("append").option("mergeSchema", "true").saveAsTable(silver_table)
df_quarantine.write.format("delta").mode("overwrite").saveAsTable(silver_quarantine_table)


In [0]:
# %run /Workspace/Users/avadhootd.business@gmail.com/SCM/utils/send_alert_email


In [0]:
# failed_table = config[env][p_file_name]["failed_target"]
# quarantine_table = config[env][p_file_name]["quarantine_target"]

In [0]:
# app_password = app_password.strip()

# if df_quarantine.count() > 0 or df_failed.count() > 0:
#     send_gmail_email(
#     app_password="qfswthychmohjkim",
#     from_email="avadhootdarbhe@gmail.com",
#     to_email="avadhootd.in@mouritech.com",
#     tables=[failed_table, quarantine_table]
#     )


### Audit Logs

In [0]:
%run /Workspace/Users/avadhootd.business@gmail.com/sum/SCM/utils/audit_status



In [0]:
total = df_parsed.count()
passed = df_clean.count()
quarantine = df_quarantine.count()

status, message = get_audit_status_and_message(total, passed, 0, quarantined=quarantine)

print(status, message)
print(f"Total Records: {total}")
print(f"Passed Records: {passed}")
print(f"Quarantine Records: {quarantine}")

In [0]:
audit_data = [{
    "env": env,
    "table_name": silver_tb_name,
    "source_path": config[env][tb_name]["pass_target"],
    "target_path": silver_table,
    "quarantine_path": silver_quarantine_table,
    "load_timestamp": datetime.now(),
    "total_records": df_parsed.count(),
    "passed_records": df_clean.count(),
    "quarantine_records": df_quarantine.count(),
    "status": status,
    "message": message
}]

audit_df = spark.createDataFrame(audit_data, audit_schema)

audit_df.write.format("delta").mode("append").saveAsTable(silver_supplier_audit_table)