In [None]:
import snowflake.snowpark as snowpark
from snowflake.snowpark import Session
from snowflake.snowpark.functions import *
from snowflake.snowpark.types import *
import pandas as pd
from datetime import datetime, timedelta

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# # Set warehouse if needed
# session.sql("USE WAREHOUSE YOUR_WAREHOUSE_NAME").collect()
# session.sql("USE DATABASE YOUR_DATABASE_NAME").collect() 
# session.sql("USE SCHEMA YOUR_SCHEMA_NAME").collect()


In [None]:
def get_last_partition(session, table_name):
    """
    Get the last partition from a table in Snowflake
    Note: Snowflake doesn't use Hive-style partitions, so this might need adjustment
    based on your table structure
    """
    try:
        # If your table has a partition column, adjust this query accordingly
        result = session.sql(f"""
            SELECT MAX(partition_column) as last_partition 
            FROM {table_name}
        """).collect()
        
        if result:
            return result[0]['LAST_PARTITION']
        else:
            return None
    except Exception as e:
        print(f"Error getting last partition: {e}")
        return None

def process_data(session, env):
    """
    Process data in Snowflake equivalent to the original PySpark CHG job
    Based on merge_revenue_ifrs_dd_chg configuration
    """
    
    # Define table - Snowflake uses database.schema.table format
    table_1 = f'{env["table_1"]["database"]}.{env["table_1"]["schema"]}.{env["table_1"]["table"]}'
    
    # Define periode (matching the catalog filter patterns)
    event_date = env["table_1"]["filter_d2"]  # day=2 (2 days ago)
    load_date = env["table_1"]["filter_d0"]   # day=0 (today)
    
    print(f"Running for event_date={event_date} and load_date={load_date}")
    
    # Snowflake SQL query - converted from the original PySpark SQL
    sql_query = f"""
    SELECT 
        trx_date,
        CONCAT(SUBSTR(timestamp_ifrs, 1, 4), '-', SUBSTR(timestamp_ifrs, 5, 2), '-', SUBSTR(timestamp_ifrs, 7, 2)) AS purchase_date,
        transaction_id,
        subs_id,
        msisdn,
        price_plan_id,
        brand,
        pre_post_flag,
        cust_type_desc,
        cust_subtype_desc,
        customer_sub_segment,
        lac,
        ci,
        lacci_id_ifrs AS lacci_id,
        node,
        CASE 
            WHEN area_sales IS NULL OR area_sales = '' THEN 'UNKNOWN' 
            ELSE area_sales 
        END AS area_sales,
        CASE 
            WHEN region_sales IS NULL OR region_sales = '' THEN 'UNKNOWN' 
            ELSE region_sales 
        END AS region_sales,
        CASE 
            WHEN branch IS NULL OR branch = '' THEN 'UNKNOWN' 
            ELSE branch 
        END AS branch,
        CASE 
            WHEN subbranch IS NULL OR subbranch = '' THEN 'UNKNOWN' 
            ELSE subbranch 
        END AS subbranch,
        CASE 
            WHEN cluster_sales IS NULL OR cluster_sales = '' THEN 'UNKNOWN' 
            ELSE cluster_sales 
        END AS cluster_sales,
        CASE 
            WHEN provinsi IS NULL OR provinsi = '' THEN 'UNKNOWN' 
            ELSE provinsi 
        END AS provinsi,
        CASE 
            WHEN kabupaten IS NULL OR kabupaten = '' THEN 'UNKNOWN' 
            ELSE kabupaten 
        END AS kabupaten,
        CASE 
            WHEN kecamatan IS NULL OR kecamatan = '' THEN 'UNKNOWN' 
            ELSE kecamatan 
        END AS kecamatan,
        CASE 
            WHEN kelurahan IS NULL OR kelurahan = '' THEN 'UNKNOWN' 
            ELSE kelurahan 
        END AS kelurahan,
        lacci_closing_flag,
        sigma_business_id,
        sigma_rules_id,
        sku,
        l1_payu,
        l2_service_type,
        l3_allowance_type,
        l4_product_category,
        l5_product,
        '' AS l1_ias,
        '' AS l2_ias,
        '' AS l3_ias,
        commercial_name,
        channel,
        validity AS pack_validity,
        SUM(rev_per_usage)::DECIMAL(38,15) AS rev_per_usage,
        SUM(0)::DECIMAL(38,15) AS rev_seized,
        SUM(call_duration)::INTEGER AS dur,
        SUM(event_allowance_consumed)::INTEGER AS trx,
        SUM(total_volume)::BIGINT AS vol,
        cust_id,
        '' AS profile_name,
        item_id AS quota_name,
        service_filter,
        price_plan_name,
        channel_id,
        site_id,
        site_name,
        region_hlr,
        city_hlr,
        '{load_date}' AS load_date,
        event_date,
        'CHG' AS SOURCE
    FROM {table_1}
    WHERE event_date = '{event_date}'
      AND pre_post_flag = '1'
    GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,45,46,47,48,49,50,51,52,53,54,55,56,57
    """
    
    # Execute the query
    df = session.sql(sql_query)
    
    return df

In [None]:
try:
    env = {
        "table_1": {
            "database": "TELKOMSEL_POC",                      # From catalog
            "schema": "RAW",                         # Update with your schema
            "table": "IFRS_TC_CHG_POC_TOKENIZED",      # From catalog
            "filter_d2": "2025-04-02",                 # Hardcoded: 1st April 2025 (event_date)
            "filter_d0": "tc_chg_poc"                  # Hardcoded: 2nd April 2025 (load_date)
        }
    }
    
    # Process the data
    result_df = process_data(session, env)
    
    # Show results
    print("Processing completed successfully!")
    result_df.show(10)  # Show first 10 rows
    
except Exception as e:
    print(f"Error processing data: {e}")
    raise