In [None]:
import snowflake.snowpark as snowpark
from snowflake.snowpark import Session
from snowflake.snowpark.functions import *
from snowflake.snowpark.types import *
import pandas as pd
from datetime import datetime, timedelta

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# # Set warehouse if needed
# session.sql("USE WAREHOUSE YOUR_WAREHOUSE_NAME").collect()
# session.sql("USE DATABASE YOUR_DATABASE_NAME").collect() 
# session.sql("USE SCHEMA YOUR_SCHEMA_NAME").collect()


In [None]:
def get_last_partition(session, table_name):
    """
    Get the last partition from a table in Snowflake
    Note: Snowflake doesn't use Hive-style partitions, so this might need adjustment
    based on your table structure
    """
    try:
        # If your table has a partition column, adjust this query accordingly
        result = session.sql(f"""
            SELECT MAX(partition_column) as last_partition 
            FROM {table_name}
        """).collect()
        
        if result:
            return result[0]['LAST_PARTITION']
        else:
            return None
    except Exception as e:
        print(f"Error getting last partition: {e}")
        return None

def process_data(session, env):
    """
    Process data in Snowflake equivalent to the original PySpark job
    Based on merge_revenue_ifrs_dd_accrual configuration
    """
    
    # Define table - Snowflake uses database.schema.table format
    table_1 = f'{env["table_1"]["database"]}.{env["table_1"]["schema"]}.{env["table_1"]["table"]}'
    
    # Define periode (matching the catalog filter patterns)
    event_date = env["table_1"]["filter_d2"]  # day=2 (2 days ago)
    load_date = env["table_1"]["filter_d0"]   # day=0 (today)
    
    print(f"Running for event_date={event_date} and load_date={load_date}")
    
    # Snowflake SQL query - converted from the original PySpark SQL
    sql_query = f"""
    SELECT 
        DATEADD(day, -1, trx_date) AS trx_date,
        -- Simplified purchase_date calculation (hex conversion may need adjustment)
        CASE 
            WHEN SUBSTR(transaction_id, 3, 3) = '000' 
            THEN SUBSTR(transaction_id, 1, 10)  -- Temporary simplified version
            ELSE SUBSTR(transaction_id, 1, 10)  -- Temporary simplified version
        END AS purchase_date,
        transaction_id,
        subscriber_id AS subs_id,
        msisdn,
        CAST(price_plan_id AS INTEGER) AS price_plan_id,
        brand,
        2 AS pre_post_flag,
        cust_type_desc,
        cust_subtype_desc,
        customer_group AS customer_sub_segment,
        lac,
        ci,
        lacci_id,
        node,
        CASE 
            WHEN area_sales IS NULL OR area_sales = '' THEN 'UNKNOWN' 
            ELSE area_sales 
        END AS area_sales,
        CASE 
            WHEN region_sales IS NULL OR region_sales = '' THEN 'UNKNOWN' 
            ELSE region_sales 
        END AS region_sales,
        CASE 
            WHEN branch IS NULL OR branch = '' THEN 'UNKNOWN' 
            ELSE branch 
        END AS branch,
        CASE 
            WHEN subbranch IS NULL OR subbranch = '' THEN 'UNKNOWN' 
            ELSE subbranch 
        END AS subbranch,
        CASE 
            WHEN cluster_sales IS NULL OR cluster_sales = '' THEN 'UNKNOWN' 
            ELSE cluster_sales 
        END AS cluster_sales,
        CASE 
            WHEN provinsi IS NULL OR provinsi = '' THEN 'UNKNOWN' 
            ELSE provinsi 
        END AS provinsi,
        CASE 
            WHEN kabupaten IS NULL OR kabupaten = '' THEN 'UNKNOWN' 
            ELSE kabupaten 
        END AS kabupaten,
        CASE 
            WHEN kecamatan IS NULL OR kecamatan = '' THEN 'UNKNOWN' 
            ELSE kecamatan 
        END AS kecamatan,
        CASE 
            WHEN kelurahan IS NULL OR kelurahan = '' THEN 'UNKNOWN' 
            ELSE kelurahan 
        END AS kelurahan,
        CAST(lacci_closing_flag AS INTEGER) AS lacci_closing_flag,
        sigma_business_id,
        sigma_rules_id,
        SUBSTR(transaction_id, 19, 13) AS sku,
        l1_payu,
        l2_service_type,
        l3_allowance_type,
        l4_product_category,
        l5_product,
        '' AS l1_ias,
        '' AS l2_ias,
        '' AS l3_ias,
        commercial_name,
        channel,
        validity AS pack_validity,
        CAST(SUM(rev) AS DECIMAL(38,15)) AS rev_per_usage,
        CAST(SUM(0) AS DECIMAL(38,15)) AS rev_seized,
        CAST(SUM(0) AS INTEGER) AS dur,
        CAST(COUNT(DISTINCT transaction_id) AS INTEGER) AS trx,
        CAST(SUM(0) AS BIGINT) AS vol,
        CAST(customer_id AS INTEGER) AS cust_id,
        charge_code AS profile_name,
        amdd_charge_code AS quota_name,
        proration_ind AS service_filter,
        offer_name AS price_plan_name,
        channel_id,
        '' AS site_id,
        '' AS site_name,
        region_hlr,
        city_hlr,
        '{load_date}' AS load_date,
        DATEADD(day, -1, '{event_date}') AS event_date,
        'ACCRUAL' AS SOURCE
    FROM {table_1}
    WHERE event_date = DATEADD(day, -1, '{load_date}')
      AND amdd_charge_code = 'SFEE'
    GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,45,46,47,48,49,50,51,52,53,54,55,56,57
    """
    
    # Execute the query
    df = session.sql(sql_query)
    
    return df

In [None]:
try:
    env = {
        "table_1": {
            "database": "TELKOMSEL_POC",        # Your database name
            "schema": "RAW",     # Your schema name  
            "table": "IFRS_ACCRUAL_PRODUCT_DAILY_POC_TOKENIZED",       # Your table name
            "filter_d2": "2025-04-01",         # Your event date
            "filter_d0": "2025-04-02"          # Your load date
        }
    }
    # Process the data
    result_df = process_data(session, env)
    
    # Show results
    print("Processing completed successfully!")
    result_df.show(10)  # Show first 10 rows
    
except Exception as e:
    print(f"Error processing data: {e}")
    raise