In [None]:
USE DATABASE DEV;
USE SCHEMA DATASCIENCE;

In [None]:
CREATE OR REPLACE FILE FORMAT my_parquet_format
  TYPE = 'PARQUET';

In [None]:
CREATE OR REPLACE STAGE bronze_csv_delta
URL = 's3://datascience-output-bucket/bronze/'
STORAGE_INTEGRATION = aws_snowflake_connect
FILE_FORMAT = my_parquet_format;

In [None]:
CREATE OR REPLACE TABLE bronze_customers_delta (
  customer_id   INT,
  customer_name STRING,
  start_date    TIMESTAMP_NTZ,
  delete_flag   STRING
);

CREATE OR REPLACE TABLE bronze_products_delta (
  product_id    STRING,         
  product_name  STRING,
  start_date    TIMESTAMP_NTZ,
  delete_flag   STRING
);

CREATE OR REPLACE TABLE bronze_orders_delta (
  order_id     STRING,        
  customer_id  INT,
  product_id   STRING,
  start_date   TIMESTAMP_NTZ,
  delete_flag  STRING
);

In [None]:
from datetime import datetime
#from datetime import datetime, timedelta
from snowflake.snowpark import Session

# Step 1: Use UTC to avoid local timezone mismatch
folder_path = datetime.utcnow().strftime("%Y/%b/%d/").title()  # e.g. 2025/Aug/20/
print("📌 Using folder path:", folder_path)

#tomorrow = datetime.utcnow() + timedelta(days=-1)
#folder_path = tomorrow.strftime("%Y/%b/%d/").title()
#print("📌 Using folder path:", folder_path)

# Step 2: Snowflake connection config
connection_parameters = {
    "account": "MJ13681.ap-southeast-1",
    "user": "mzs1988",
    "password": "Datalabs@193001",
    "role": "ACCOUNTADMIN",
    "warehouse": "COMPUTE_WH",
    "database": "DEV",
    "schema": "DATASCIENCE"
}

try:
    # Step 3: Create session
    session = Session.builder.configs(connection_parameters).create()

    # =====================
    # Customers
    # =====================
    copy_customers = f"""
    COPY INTO bronze_customers_delta
    FROM @bronze_csv/customers/Delta_load/csv_files/{folder_path}
    FILE_FORMAT = (FORMAT_NAME = my_parquet_format)
    MATCH_BY_COLUMN_NAME = CASE_INSENSITIVE
    PATTERN = '.*Customers_.*?/part-.*\\.parquet';
    """
    session.sql(copy_customers).collect()
    print("✅ Bronze Customers loaded")

    # =====================
    # Products
    # =====================
    copy_products = f"""
    COPY INTO bronze_products_delta
    FROM @bronze_csv/products/Delta_load/csv_files/{folder_path}
    FILE_FORMAT = (FORMAT_NAME = my_parquet_format)
    MATCH_BY_COLUMN_NAME = CASE_INSENSITIVE
    PATTERN = '.*Products_.*?/part-.*\\.parquet';
    """
    session.sql(copy_products).collect()
    print("✅ Bronze Products loaded")

    # =====================
    # Orders
    # =====================
    copy_orders = f"""
    COPY INTO bronze_orders_delta
    FROM @bronze_csv/orders/Delta_load/csv_files/{folder_path}
    FILE_FORMAT = (FORMAT_NAME = my_parquet_format)
    MATCH_BY_COLUMN_NAME = CASE_INSENSITIVE
    PATTERN = '.*Orders_.*?/part-.*\\.parquet';
    """
    session.sql(copy_orders).collect()
    print("✅ Bronze Orders loaded")

finally:
    # Step 7: Always close session
    session.sql("COMMIT").collect()
    session.close()
    print("🔒 Session closed")

In [None]:
df = session.table("bronze_orders_delta").limit(10)
df.show()

In [None]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, sql_expr, lit
from datetime import datetime

# ---- Snowflake connection ----
connection_parameters = {
    "account": "MJ13681.ap-southeast-1",
    "user": "mzs1988",
    "password": "Datalabs@193001",
    "role": "ACCOUNTADMIN",
    "warehouse": "COMPUTE_WH",
    "database": "DEV",
    "schema": "DATASCIENCE"
}

# ---- Session ----
session = Session.builder.configs(connection_parameters).create()

# ---- Current UTC timestamp (seconds precision) ----
now = sql_expr("TO_TIMESTAMP_NTZ(TO_CHAR(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()), 'YYYY-MM-DD HH24:MI:SS'))")
end_date = lit("2099-12-31 23:59:59").cast("timestamp")

# ---- Bronze Delta -> Silver Delta (cleaning, dedupe, no delete_flag in output) ----
customers_delta = session.table("bronze_customers_delta")
products_delta  = session.table("bronze_products_delta")
orders_delta    = session.table("bronze_orders_delta")

silver_customers_delta = (
    customers_delta
    .filter(
        (col("delete_flag") == lit("N")) &
        col("customer_id").is_not_null() &
        (col("customer_id") > 0) &
        col("customer_name").is_not_null() &
        col("start_date").is_not_null() &
        (col("start_date") <= now)
    )
    .distinct()
    .select(
        col("customer_id").cast("int").alias("customer_id"),
        sql_expr("TRIM(customer_name)").alias("customer_name"),
        col("start_date").cast("timestamp").alias("start_date"),
        now.alias("load_date"),
        end_date.alias("end_date")
    )
)

silver_products_delta = (
    products_delta
    .filter(
        (col("delete_flag") == lit("N")) &
        col("product_id").is_not_null() &
        col("product_name").is_not_null() &
        col("start_date").is_not_null() &
        (col("start_date") <= now)
    )
    .distinct()
    .select(
        col("product_id").cast("string").alias("product_id"),
        sql_expr("TRIM(product_name)").alias("product_name"),
        col("start_date").cast("timestamp").alias("start_date"),
        now.alias("load_date"),
        end_date.alias("end_date")
    )
)

silver_orders_delta = (
    orders_delta
    .filter(
        (col("delete_flag") == lit("N")) &
        col("order_id").is_not_null() &
        col("customer_id").is_not_null() &
        col("product_id").is_not_null() &
        col("start_date").is_not_null() &
        (col("start_date") <= now)
    )
    .distinct()
    .select(
        col("order_id").cast("string").alias("order_id"),
        col("customer_id").cast("int").alias("customer_id"),
        col("product_id").cast("string").alias("product_id"),
        col("start_date").cast("timestamp").alias("start_date"),
        now.alias("load_date"),
        end_date.alias("end_date")
    )
)

# ---- Write Silver Delta Tables ----
silver_customers_delta.write.mode("overwrite").save_as_table("silver_customers_delta")
silver_products_delta.write.mode("overwrite").save_as_table("silver_products_delta")
silver_orders_delta.write.mode("overwrite").save_as_table("silver_orders_delta")

print("✅ Silver Delta tables created (delete_flag removed, UTC load_date, deduped).")

# ---- Close session ----
session.close()
print("🔒 Session closed")


In [None]:
df = session.table("silver_customers_delta").order_by("customer_id").limit(10)
df.show()