In [None]:
import snowflake.snowpark as snowpark
from snowflake.snowpark import Session
from snowflake.snowpark.functions import *
from snowflake.snowpark.types import *
import pandas as pd
from datetime import datetime, timedelta

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# # Set warehouse if needed
# session.sql("USE WAREHOUSE YOUR_WAREHOUSE_NAME").collect()
# session.sql("USE DATABASE YOUR_DATABASE_NAME").collect() 
# session.sql("USE SCHEMA YOUR_SCHEMA_NAME").collect()


In [None]:
def get_last_partition(session, table_name):
    """
    Get the last partition from a table in Snowflake
    Note: Snowflake doesn't use Hive-style partitions, so this might need adjustment
    based on your table structure
    """
    try:
        # If your table has a partition column, adjust this query accordingly
        result = session.sql(f"""
            SELECT MAX(partition_column) as last_partition 
            FROM {table_name}
        """).collect()
        
        if result:
            return result[0]['LAST_PARTITION']
        else:
            return None
    except Exception as e:
        print(f"Error getting last partition: {e}")
        return None

def process_data(session, env):
    """
    Process data in Snowflake equivalent to the original PySpark job
    Based on merge_revenue_ifrs_dd_overusage configuration
    """
    
    # Define table - Snowflake uses database.schema.table format
    table_1 = f'{env["table_1"]["database"]}.{env["table_1"]["schema"]}.{env["table_1"]["table"]}'
    
    # Define periode (matching the catalog filter patterns)
    event_date = env["table_1"]["filter_d2"]  # day=2 (2 days ago)
    load_date = env["table_1"]["filter_d0"]   # day=0 (today)
    
    print(f"Running for event_date={event_date} and load_date={load_date}")
    
    # Snowflake SQL query - converted from the original PySpark SQL
    sql_query = f"""
    SELECT 
        '{event_date}' AS trx_date,
        purchase_date_2 AS purchase_date,
        transaction_id,
        '' AS subs_id,
        msisdn,
        reserve1::INTEGER AS price_plan_id,
        brand,
        1 AS pre_post_flag,
        cust_type AS cust_type_desc,
        cust_subtype AS cust_subtype_desc,
        '' AS customer_sub_segment,
        '' AS lac,
        '' AS ci,
        '' AS lacci_id,
        node_type AS node,
        'UNKNOWN' AS area_sales,
        CASE 
            WHEN region IS NULL OR region = '' THEN 'UNKNOWN' 
            ELSE region 
        END AS region_sales,
        CASE 
            WHEN branch IS NULL OR branch = '' THEN 'UNKNOWN' 
            ELSE branch 
        END AS branch,
        'UNKNOWN' AS subbranch,
        CASE 
            WHEN cluster IS NULL OR cluster = '' THEN 'UNKNOWN' 
            ELSE cluster 
        END AS cluster_sales,
        'UNKNOWN' AS provinsi,
        'UNKNOWN' AS kabupaten,
        'UNKNOWN' AS kecamatan,
        'UNKNOWN' AS kelurahan,
        NULL AS lacci_closing_flag,
        bid AS sigma_business_id,
        '' AS sigma_rules_id,
        '' AS sku,
        '' AS l1_payu,
        '' AS l2_service_type,
        '' AS l3_allowance_type,
        '' AS l4_product_category,
        '' AS l5_product,
        l1_ias,
        l2_ias,
        l3_ias,
        '' AS commercial_name,
        '' AS channel,
        '' AS pack_validity,
        SUM(pi_value_final)::DECIMAL(38,15) AS rev_per_usage,
        SUM(0)::DECIMAL(38,15) AS rev_seized,
        SUM(0)::INTEGER AS dur,
        SUM(0)::INTEGER AS trx,
        SUM(0)::BIGINT AS vol,
        NULL AS cust_id,
        '' AS profile_name,
        '' AS quota_name,
        '' AS service_filter,
        '' AS price_plan_name,
        SUBSTR(transaction_id, 1, 2) AS channel_id,
        '' AS site_id,
        '' AS site_name,
        '' AS region_hlr,
        '' AS city_hlr,
        '{load_date}' AS load_date,
        '{event_date}' AS event_date,
        'OVERUSAGE' AS SOURCE
    FROM {table_1}
    WHERE load_date = '{event_date}'
      AND expiry_month > '{event_date}'
      AND flag = 'alloc_<_rev'
    GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,45,46,47,48,49,50,51,52,53,54,55,56,57
    """
    
    # Execute the query
    df = session.sql(sql_query)
    
    return df

In [None]:
try:
    env = {
        "table_1": {
            "database": "TELKOMSEL_POC",                                        # From catalog
            "schema": "RAW",                                           # Update with your schema
            "table": "IFRS_CONS_RATE_AND_OUTSTANDING_DD_POC_TOKENIZED",   # From catalog
            "filter_d2": "2025-04-01",     # Hardcoded: 1st April 2025 (event_date)
            "filter_d0": "2025-04-01"      # Hardcoded: 2nd April 2025 (load_date)
        }
    }
    
    # Process the data
    result_df = process_data(session, env)
    
    # Show results
    print("Processing completed successfully!")
    result_df.show(10)  # Show first 10 rows
    
except Exception as e:
    print(f"Error processing data: {e}")
    raise