In [None]:
import snowflake.snowpark as snowpark
from snowflake.snowpark import Session
from snowflake.snowpark.functions import *
from snowflake.snowpark.types import *
import pandas as pd
from datetime import datetime, timedelta

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# # Set warehouse if needed
# session.sql("USE WAREHOUSE YOUR_WAREHOUSE_NAME").collect()
# session.sql("USE DATABASE YOUR_DATABASE_NAME").collect() 
# session.sql("USE SCHEMA YOUR_SCHEMA_NAME").collect()


In [None]:
def get_last_partition(session, table_name):
    """
    Get the last partition from a table in Snowflake
    Note: Snowflake doesn't use Hive-style partitions, so this might need adjustment
    based on your table structure
    """
    try:
        # If your table has a partition column, adjust this query accordingly
        result = session.sql(f"""
            SELECT MAX(partition_column) as last_partition 
            FROM {table_name}
        """).collect()
        
        if result:
            return result[0]['LAST_PARTITION']
        else:
            return None
    except Exception as e:
        print(f"Error getting last partition: {e}")
        return None

def process_data(session, env):
    """
    Process data in Snowflake equivalent to the original PySpark job
    Based on merge_revenue_ifrs_dd_nbp configuration
    """
    
    # Define tables - Snowflake uses database.schema.table format
    table_1 = f'{env["table_1"]["database"]}.{env["table_1"]["schema"]}.{env["table_1"]["table"]}'
    table_2 = f'{env["table_2"]["database"]}.{env["table_2"]["schema"]}.{env["table_2"]["table"]}'
    table_3 = f'{env["table_3"]["database"]}.{env["table_3"]["schema"]}.{env["table_3"]["table"]}'
    
    # Define periode (matching the catalog filter patterns)
    event_date = env["table_1"]["filter_d2"]  # day=2 (2 days ago)
    load_date = env["table_1"]["filter_d0"]   # day=0 (today)
    
    print(f"Running for event_date={event_date} and load_date={load_date}")
    
    # Snowflake SQL query - converted from the original PySpark SQL
    sql_query = f"""
    SELECT 
        trx_date,
        trx_date AS purchase_date,
        '' AS transaction_id,
        subs_id,
        a.msisdn,
        offer_id::INTEGER AS price_plan_id,
        brand,
        pre_post_flag::INTEGER AS pre_post_flag,
        cust_type_desc,
        cust_subtype_desc,
        '' AS customer_sub_segment,
        lac,
        ci,
        lacci_id,
        node_type AS node,
        CASE 
            WHEN area_sales IS NULL OR area_sales = '' THEN 'UNKNOWN' 
            ELSE area_sales 
        END AS area_sales,
        CASE 
            WHEN region_sales IS NULL OR region_sales = '' THEN 'UNKNOWN' 
            ELSE region_sales 
        END AS region_sales,
        CASE 
            WHEN branch IS NULL OR branch = '' THEN 'UNKNOWN' 
            ELSE branch 
        END AS branch,
        CASE 
            WHEN subbranch IS NULL OR subbranch = '' THEN 'UNKNOWN' 
            ELSE subbranch 
        END AS subbranch,
        CASE 
            WHEN cluster_sales IS NULL OR cluster_sales = '' THEN 'UNKNOWN' 
            ELSE cluster_sales 
        END AS cluster_sales,
        CASE 
            WHEN provinsi IS NULL OR provinsi = '' THEN 'UNKNOWN' 
            ELSE provinsi 
        END AS provinsi,
        CASE 
            WHEN kabupaten IS NULL OR kabupaten = '' THEN 'UNKNOWN' 
            ELSE kabupaten 
        END AS kabupaten,
        CASE 
            WHEN kecamatan IS NULL OR kecamatan = '' THEN 'UNKNOWN' 
            ELSE kecamatan 
        END AS kecamatan,
        CASE 
            WHEN kelurahan IS NULL OR kelurahan = '' THEN 'UNKNOWN' 
            ELSE kelurahan 
        END AS kelurahan,
        lacci_closing_flag::INTEGER AS lacci_closing_flag,
        SUBSTR(content_id, 1, 8) AS sigma_business_id,
        SUBSTR(pack_id, 9, 5) AS sigma_rules_id,
        '' AS sku,
        '' AS l1_payu,
        '' AS l2_service_type,
        '' AS l3_allowance_type,
        '' AS l4_product_category,
        '' AS l5_product,
        l1_name AS l1_ias,
        l2_name AS l2_ias,
        l3_name AS l3_ias,
        '' AS commercial_name,
        '' AS channel,
        '' AS pack_validity,
        SUM(rev/1.11)::DECIMAL(38,15) AS rev_per_usage,
        SUM(0)::DECIMAL(38,15) AS rev_seized,
        SUM(dur)::INTEGER AS dur,
        SUM(trx)::INTEGER AS trx,
        SUM(vol)::BIGINT AS vol,
        NULL AS cust_id,
        '' AS profile_name,
        '' AS quota_name,
        '' AS service_filter,
        offer AS price_plan_name,
        activation_channel_id AS channel_id,
        '' AS site_id,
        '' AS site_name,
        region_hlr,
        city_hlr,
        '{load_date}' AS load_date,
        a.event_date,
        'WISDOM-NBP' AS SOURCE

    FROM (
        SELECT a.* 
        FROM (
            SELECT a.* 
            FROM (
                SELECT *
                FROM {table_1}
                WHERE event_date = '{event_date}'
                  AND pre_post_flag = '1'
                  AND l1_name = 'Digital Services'
                  AND LOWER(l3_name) NOT LIKE 'phonebook%backup'
                  AND SOURCE IN ('CHG_GOOD','CHG_REJECT')
            ) a

            INNER JOIN (
                SELECT 
                    event_date,
                    LPAD(business_id, 8, '0') AS bid
                FROM {table_2}
                WHERE event_date = '{event_date}'
                  AND status_flag = 'Y'
                  AND LOWER(ifrs_flag) = 'true'
                GROUP BY 1,2
            ) c9
            ON a.event_date = c9.event_date
              AND SUBSTR(pack_id, 1, 8) = c9.bid
        ) a

        LEFT JOIN (
            SELECT sigma_business_id
            FROM {table_3}
            WHERE event_date = '{event_date}'
              AND SOURCE IN ('CHG','SEIZED','UPCC-SEIZED','UPCC_USAGE','PAYU','WISDOM')
            GROUP BY 1
        ) b
        ON SUBSTR(pack_id, 1, 8) = b.sigma_business_id
        WHERE b.sigma_business_id IS NULL
    ) a

    GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,45,46,47,48,49,50,51,52,53,54,55,56,57
    """
    
    # Execute the query
    df = session.sql(sql_query)
    
    return df

In [None]:
try:
    env = {
        "table_1": {
            "database": "TELKOMSEL_POC",                        # From catalog
            "schema": "RAW",                           # Update with your schema
            "table": "MERGE_REVENUE_DD_POC_TOKENIZED",    # From catalog
            "filter_d2": "2025-04-01",                   # Hardcoded: event_date (2 days ago)
            "filter_d0": "2025-04-01"                    # Hardcoded: load_date (today)
        },
        "table_2": {
            "database": "TELKOMSEL_POC",                        # From catalog
            "schema": "RAW",                           # Update with your schema
            "table": "PRODUCT_CATALOG_IFRS_C2C_DD_POC_TOKENIZED",  # From catalog
            "filter_d2": "2025-04-01",                   # Hardcoded: event_date
        },
        "table_3": {
            "database": "TELKOMSEL_POC",                        # From catalog
            "schema": "RAW",                           # Update with your schema
            "table": "MERGE_REVENUE_IFRS_DD_POC_TOKENIZED",  # From catalog
            "filter_d2": "2025-04-01",                   # Hardcoded: event_date
        }
    }
    
    # Process the data
    result_df = process_data(session, env)
    
    # Show results
    print("Processing completed successfully!")
    result_df.show(10)  # Show first 10 rows
    
except Exception as e:
    print(f"Error processing data: {e}")
    raise