#### 25HK010BN - Distribution - Agency - Data analytics review on Agency Pre-sales Controls (Python / Databricks)

##### import packages / files from Sharepoint

In [None]:
import pandas as pd
import pyspark.pandas as ps
!pip install openpyxl

In [None]:
# read param file from GIA blob 
df_param = pd.read_excel('/Volumes/aiahk_dna_p_catalog/dna_gia_blob/gia_blob_volume/Agency_PD/parameters.xlsx', engine='openpyxl', index_col='param_name')
review_period_start = df_param.loc["review_period_start"]["param_value"]
review_period_end = df_param.loc["review_period_end"]["param_value"]

In [None]:
# read AMCM active intermediary file from GIA blob
import glob
file_list = glob.glob('/Volumes/aiahk_dna_p_catalog/dna_gia_blob/gia_blob_volume/Agency_PD/Intermediary*.xlsx')
df_AMCM_intermediary_raw = ps.read_excel(file_list[0], engine='openpyxl')
# replace special character in column names
df_AMCM_intermediary_raw.columns = [col.replace('\n', '_').replace('(', '_').replace(')', '_').replace(' ', '_') for col in df_AMCM_intermediary_raw.columns]

# filter for life category (ANG, APS) and non-null life validity
df_AMCM_active = df_AMCM_intermediary_raw[(df_AMCM_intermediary_raw['Category'].isin(['APS'])) & (df_AMCM_intermediary_raw['Life__valid_until_'].notnull())]
display(df_AMCM_active.head(10))

# read AMCM cancelled license file from GIA blob
file_list = glob.glob('/Volumes/aiahk_dna_p_catalog/dna_gia_blob/gia_blob_volume/Agency_PD/Licensecancelled*.xlsx')
df_AMCM_cancelled_raw = ps.read_excel(file_list[0], engine='openpyxl')
# replace special character in column names
df_AMCM_cancelled_raw.columns = [col.replace('\n', '_').replace('(', '_').replace(')', '_').replace(' ', '_') for col in df_AMCM_cancelled_raw.columns]

# filter for life category (ANG, APS) and non-null life until date
df_AMCM_cancelled = df_AMCM_cancelled_raw[(df_AMCM_cancelled_raw['Category'].isin(['APS'])) & (df_AMCM_cancelled_raw['Life_cancel_date'].notnull())]
display(df_AMCM_cancelled.head(10))

In [None]:
df_product_lic_1 = ps.read_excel('/Volumes/aiahk_dna_p_catalog/dna_gia_blob/gia_blob_volume/Agency_PD/PGS product license.xlsx', engine='openpyxl', sheet_name='Internal product license')
#df_product_lic_2 = ps.read_excel('/Volumes/aiahk_dna_p_catalog/dna_gia_blob/gia_blob_volume/Agency_PD/PGS product license.xlsx', engine='openpyxl', sheet_name='5x license')
#df_product_lic = ps.concat([df_product_lic_1,df_product_lic_2])
df_product_lic_1 = df_product_lic_1.drop_duplicates().dropna(subset=['PLAN_CODE'])
#df_product_lic_1_grouped = df_product_lic_1.groupby('PLAN_CODE')['LICENSE_CODE'].apply(lambda x: '|'.join(x.astype(str))).reset_index()
df_product_lic_1 = ps.DataFrame(df_product_lic_1)
display(df_product_lic_1)

df_product_lic_1.to_table('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_product_license', mode='overwrite', overwrite_schema=True)

##### Testing scripts

###### [HK & Macau] AGT_002 Identify new agents without an effective regulatory license (life) record on DAS within the calendar month of contract date.

In [None]:
%sql
SELECT DISTINCT CHANNEL_TYPE
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master

In [None]:
df_T02 = spark.sql(
    f"""
    SELECT agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.CHANNEL_TYPE, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT, agt_lic.EFF_DT, agt_lic.LICENSE_STATUS, agt_lic.LICENSE_DESC, agt_lic.LICENSE_TYPE, agt_lic.LICENSE_NO
    FROM (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
        WHERE AGT_CONTRACT_DT >= '{df_param.loc["review_period_start"]["param_value"]}' -- filter: contract date after review period start
          AND AGT_CONTRACT_DT <= '{df_param.loc["review_period_end"]["param_value"]}' -- filter: contract date before review period end
    ) agt_mas
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '03' -- filter: license type 03 (life)
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD
    WHERE agt_lic.LICENSE_NO IS NULL  -- filter: no license
      OR (
        MONTH(agt_mas.AGT_CONTRACT_DT) != MONTH(agt_lic.EFF_DT)
        OR YEAR(agt_mas.AGT_CONTRACT_DT) != YEAR(agt_lic.EFF_DT)
      )
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T02_ps = df_T02.pandas_api()
display(df_T02_ps.head())
display(df_T02_ps.shape[0])

df_T02.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T02')

###### [Macau only] AGT_003 Identify inforce agents with regulatory license expired or cancelled in AMCM agent register.

In [None]:
df_T03 = spark.sql(
    f"""
    SELECT agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT, agt_lic.EFF_DT, agt_lic.EXP_DT, agt_lic.LICENSE_STATUS, agt_lic.LICENSE_DESC, agt_lic.LICENSE_TYPE, agt_lic.LICENSE_NO
    FROM (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
        WHERE REGION = 'MAC' -- filter: region is Macau
    ) agt_mas
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '03' -- filter: license type 03 (life)
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD
    WHERE agt_lic.LICENSE_NO IS NOT NULL  -- filter: with license no

    """
)

# put _sqldf into df_das as pyspark pandas df
df_T03_ps = df_T03.pandas_api()
display(df_T03_ps.head())
display(df_T03_ps.shape[0])

In [None]:
# 1. map to AMCM active agent list by matching LICENSE_NO with No
df_T03_agt_amcm_active = df_T03_ps.merge(df_AMCM_active, left_on='LICENSE_NO', right_on='No', how='left')
df_T03_agt_amcm_active.head()

# identify inforce agents not inforce on AMCM agent register:
# - agents whose LICENSE_NO is not found in AMCM active list (No is null)
# - or agents whose Name contains the character '註' (indicating annotation or exception)
df_T03_inforce_agt_amcm_active = df_T03_agt_amcm_active[df_T03_agt_amcm_active['AGT_STATUS'].str.contains('00|10')]
df_T03_agt_amcm_active_exception = df_T03_inforce_agt_amcm_active[(pd.isnull(df_T03_inforce_agt_amcm_active['No'])) | (df_T03_inforce_agt_amcm_active['Name'].str.contains(u'註'))]
display(df_T03_agt_amcm_active_exception.head())
display(df_T03_agt_amcm_active_exception.shape[0])



In [None]:
# 2. map to AMCM cancelled agent list by matching LICENSE_NO with No
df_T03_agt_amcm_cancelled = df_T03_ps.merge(df_AMCM_cancelled, left_on='LICENSE_NO', right_on='No', how='inner')
# identify agents still inforce or license cancellation date < agent termination date:
df_T03_agt_amcm_cancelled_exception = df_T03_agt_amcm_cancelled[(df_T03_agt_amcm_cancelled['AGT_STATUS'].str.contains('00|10')) | 
                                                                (df_T03_agt_amcm_cancelled['Life_cancel_date'] < df_T03_agt_amcm_cancelled['TERMINATION_DT'])]
display(df_T03_agt_amcm_cancelled_exception)
display(df_T03_agt_amcm_cancelled_exception.shape[0])

In [None]:
# output exceptions to table - AMCM active agent
if len(df_T03_agt_amcm_active_exception) > 0:
    df_to_write = df_T03_agt_amcm_active_exception
else:
    df_to_write = ps.DataFrame([], columns=df_T03_agt_amcm_active_exception.columns)
    
df_to_write.to_table('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T03a_AMCM_inforce',mode='overwrite',overwrite_schema=True,**{
        "mergeSchema": "true"
    }
)

# output exceptions to table - AMCM cancelled agent
if len(df_T03_agt_amcm_cancelled_exception) > 0:
    df_to_write = df_T03_agt_amcm_cancelled_exception
else:
    df_to_write = ps.DataFrame([], columns=df_T03_agt_amcm_cancelled_exception.columns)

df_to_write.to_table('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T03b_AMCM_cancelled',mode='overwrite',overwrite_schema=True,**{
        "mergeSchema": "true"
    }
)

To do: add test result to comulative result log

###### [Macau only] AGT_011 Identify new policies which the writing agents were expired or cancelled on AMCM agent register as of date of policy application.


In [None]:
df_T11 = spark.sql(
    f"""
    SELECT UI.POLICY_NO, UI.APP_DATE, agt_map.agent_no, agt_map.AGT_CODE, agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT, agt_lic.EFF_DT, agt_lic.LICENSE_STATUS, agt_lic.LICENSE_DESC, agt_lic.LICENSE_TYPE, agt_lic.LICENSE_NO
    FROM 
    (
       SELECT * FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period 
       WHERE SUBSTRING(POLICY_NO, 2, 1) = '4' -- Macau poiicies
    ) UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    LEFT JOIN
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '03' -- filter: license type 03 (life)
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T11_ps = df_T11.pandas_api()
display(df_T11_ps.head())
display(df_T11_ps.shape[0])

In [None]:
# map to AMCM active agent list by matching LICENSE_NO with No
df_T11_agt_amcm_active = df_T11_ps.merge(df_AMCM_active, left_on='LICENSE_NO', right_on='No', how='left')
df_T11_agt_amcm_active.head()

# identify policies with writing agents not inforce on AMCM agent register:
# - agents whose LICENSE_NO is not found in AMCM active list ('No' is null)
# - or agents whose Name contains the character '註' (indicating annotation or exception)
df_T11_agt_amcm_active_exception = df_T11_agt_amcm_active[(pd.isnull(df_T11_agt_amcm_active['No'])) | (df_T11_agt_amcm_active['Name'].str.contains(u'註'))]
display(df_T11_agt_amcm_active_exception.head())
display(df_T11_agt_amcm_active_exception.shape[0])



In [None]:
# 2. map to AMCM cancelled agent list by matching LICENSE_NO with No
df_T11_agt_amcm_cancelled = df_T11_ps.merge(df_AMCM_cancelled, left_on='LICENSE_NO', right_on='No', how='inner')
# identify agents still inforce or license cancellation date < agent termination date:
df_T11_agt_amcm_cancelled_exception = df_T11_agt_amcm_cancelled[(df_T11_agt_amcm_cancelled['AGT_STATUS'].str.contains('00|10')) | 
                                                                (df_T11_agt_amcm_cancelled['Life_cancel_date'] < df_T11_agt_amcm_cancelled['APP_DATE'])]
display(df_T11_agt_amcm_cancelled_exception.head())
display(df_T11_agt_amcm_cancelled_exception.shape[0])

In [None]:
# output exceptions to table - AMCM active agent
if len(df_T11_agt_amcm_active_exception) > 0:
    df_to_write = df_T11_agt_amcm_active_exception
else:
    df_to_write = ps.DataFrame([], columns=df_T11_agt_amcm_active_exception.columns)
    
df_to_write.to_table('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T11a_AMCM_inforce',mode='overwrite',overwrite_schema=True)

# output exceptions to table - AMCM cancelled agent
if len(df_T11_agt_amcm_cancelled_exception) > 0:
    df_to_write = df_T11_agt_amcm_cancelled_exception
else:
    df_to_write = ps.DataFrame([], columns=df_T11_agt_amcm_cancelled_exception.columns)

df_to_write.to_table('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T11b_AMCM_cancelled',mode='overwrite',overwrite_schema=True)


###### [HK & Macau] AGT_004 Identify new agents who did not complete business ethics training within 4 months since agent contract date, but the internal business license (licence #85 - business ethics ) was not terminated.



In [None]:
df_T04 = spark.sql(
    f"""
    SELECT agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT, agt_lic.EFF_DT, agt_lic.LICENSE_STATUS, agt_lic.LICENSE_DESC, agt_lic.LICENSE_TYPE, agt_lic.LICENSE_NO, agt_lic.EXP_DT, iac.*
    FROM 
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
        WHERE AGT_CONTRACT_DT >= '{df_param.loc["review_period_start"]["param_value"]}' -- filter: contract date after review period start
          AND AGT_CONTRACT_DT <= '{df_param.loc["review_period_end"]["param_value"]}' -- filter: contract date before review period end      
    ) agt_mas
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '85' -- filter: license type 85 (business ethics)
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD
    LEFT JOIN (
        -- first completion of training
        SELECT traineeID
        , MIN(STATUSDATE) OVER (PARTITION BY traineeID) AS EARLIEST_STATUSDATE
        FROM aiahk_dna_p_catalog.dna_gia_blob._iacademy_completed_training 
        WHERE module_license_id = '85' or course_license_id = '85'
    ) iac
    on agt_mas.AGT_CD = iac.traineeID
    where iac.EARLIEST_STATUSDATE IS NULL
      AND (
      ( -- case 1: agt contract date between Apr and Dec, expiry date is 4 months after contract date
          (month(agt_mas.AGT_CONTRACT_DT) >= 4 AND month(agt_mas.AGT_CONTRACT_DT) <= 12) AND
          agt_lic.EXP_DT > last_day(add_months(agt_mas.AGT_CONTRACT_DT, 4))
      )
      OR
      (-- case 2: agt contract date between Jan and Mar, expiry date is 30 Apr of next year
          (month(agt_mas.AGT_CONTRACT_DT) >= 1 AND month(agt_mas.AGT_CONTRACT_DT) <= 3) AND
          agt_lic.EXP_DT > date_add(make_date(year(add_months(agt_mas.AGT_CONTRACT_DT, 12)), 4, 30), 0)
      )
      )
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T04_ps = df_T04.pandas_api()
display(df_T04_ps.head())
display(df_T04_ps.shape[0])

df_T04.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T04')

###### [HK & Macau] AGT_005 
[HK] - Identify agent submitted life policy application in which the policy application date was not within the effective date of internal business licence (licence #85 - business ethics and licence #88 - CRS). <br><br> [Macau] Identify agent submitted life policy application in which the policy application date was not within the effective date of internal business licence (licence #85 - business ethics; _**licence #84 - FATCA;**_ and licence #88 - CRS).


In [None]:

df_T05 = spark.sql(
    f"""
    SELECT 
    CASE WHEN SUBSTRING(UI.POLICY_NO, 2, 1) <> '4' THEN 'HK' ELSE 'Macau' END HK_MACAU,
    UI.POLICY_NO, UI.APP_DATE, UI.PROPOSAL_TYPE, agt_map.AGT_CODE as UI_AGT_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT

    , agt_lic_85.LICENSE_DESC AS LICENSE_DESC_85, agt_lic_85.EFF_DT AS EFF_DATE_85, agt_lic_85.EXP_DT AS EXP_DATE_85
    , (agt_lic_85.LICENSE_TYPE IS NULL) as Exception_85
    , CASE WHEN agt_lic_85.LICENSE_TYPE IS NULL THEN agt_lic_85_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF_85
    , CASE WHEN agt_lic_85.LICENSE_TYPE IS NULL THEN agt_lic_85_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP_85

    , agt_lic_88.LICENSE_DESC AS LICENSE_DESC_88, agt_lic_88.EFF_DT AS EFF_DATE_88, agt_lic_88.EXP_DT AS EXP_DATE_88
    , (agt_lic_88.LICENSE_TYPE IS NULL) as Exception_88
    , CASE WHEN agt_lic_88.LICENSE_TYPE IS NULL THEN agt_lic_88_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF_88
    , CASE WHEN agt_lic_88.LICENSE_TYPE IS NULL THEN agt_lic_88_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP_88

    , agt_lic_84.LICENSE_DESC AS LICENSE_DESC_84, agt_lic_84.EFF_DT AS EFF_DATE_84, agt_lic_84.EXP_DT AS EXP_DATE_84
    , (SUBSTRING(UI.POLICY_NO, 2, 1) = '4' AND agt_lic_84.AGT_CD IS NULL) as Exception_84
    , CASE WHEN agt_lic_84.LICENSE_TYPE IS NULL THEN agt_lic_84_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF_84
    , CASE WHEN agt_lic_84.LICENSE_TYPE IS NULL THEN agt_lic_84_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP_84

     FROM 
    (
       SELECT * FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period 
       WHERE PROPOSAL_TYPE not in ('G','P')
       /*
       L = Life 
        G = GI
        I = ILAS
        P = Personal Accident
       */
    ) UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '85' -- filter: license type 85 (business ethics)
    ) agt_lic_85
      ON agt_mas.AGT_CD = agt_lic_85.AGT_CD
      AND (agt_lic_85.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic_85.EFF_DT AND agt_lic_85.EXP_DT) -- BE 85
    LEFT JOIN (
        SELECT AGT_CD, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '85' -- filter: license type 85 (business ethics)
        GROUP BY AGT_CD
    ) agt_lic_85_all
      ON agt_mas.AGT_CD = agt_lic_85_all.AGT_CD

    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '88' -- filter: license type 88 (CRS)
    ) agt_lic_88
      ON agt_mas.AGT_CD = agt_lic_88.AGT_CD
      AND (agt_lic_88.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic_88.EFF_DT AND agt_lic_88.EXP_DT) -- CRS 88
    LEFT JOIN (
        SELECT AGT_CD, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '88' -- filter: license type 88 (CRS)
        GROUP BY AGT_CD
    ) agt_lic_88_all
      ON agt_mas.AGT_CD = agt_lic_88_all.AGT_CD

    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '84' -- filter: license type 84 (FATCA)
    ) agt_lic_84
      ON agt_mas.AGT_CD = agt_lic_84.AGT_CD
      AND (SUBSTRING(UI.POLICY_NO, 2, 1) = '4' -- Macau poiicies 
        AND (agt_lic_84.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic_84.EFF_DT AND agt_lic_84.EXP_DT) -- FATCA 84
        )
    LEFT JOIN (
        SELECT AGT_CD, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '84' -- filter: license type 84 (FATCA)
        GROUP BY AGT_CD
    ) agt_lic_84_all
      ON agt_mas.AGT_CD = agt_lic_84_all.AGT_CD
 

    WHERE agt_lic_85.AGT_CD IS NULL 
      OR  agt_lic_88.AGT_CD IS NULL 
      OR  (SUBSTRING(UI.POLICY_NO, 2, 1) = '4' AND agt_lic_84.AGT_CD IS NULL)
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T05_ps = df_T05.pandas_api()
display(df_T05_ps)
display(df_T05_ps.shape[0])

df_T05.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T05')

###### [HK & Macau] AGT_015 Identify agent submitted policy application after agent's termination date on DAS

In [None]:

df_T15 = spark.sql(
    f"""
    SELECT 
    CASE WHEN SUBSTRING(UI.POLICY_NO, 2, 1) <> '4' THEN 'HK' ELSE 'Macau' END HK_MACAU,
    UI.POLICY_NO, UI.APP_DATE, UI.PROPOSAL_TYPE, agt_map.AGT_CODE as UI_AGT_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT
    , agt_lic.LICENSE_DESC, agt_lic.EFF_DT as EFF_DATE, agt_lic.EXP_DT as EXP_DATE, agt_lic.LICENSE_NO
    
    FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD

    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '03' -- filter: license type 03 (life)
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD

    WHERE agt_mas.TERMINATION_DT < UI.APP_DATE
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T15_ps = df_T15.pandas_api()
display(df_T15_ps)
display(df_T15_ps.shape[0])

df_T15.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T15')

###### [HK & Macau] AGT_017 Identify agents with more than one inforce agent code in DAS.

In [None]:
review_period_start = df_param.loc["review_period_start"]["param_value"]
review_period_end = df_param.loc["review_period_end"]["param_value"]

query = f"""
CREATE OR REPLACE TEMP VIEW agent_joined_view AS
SELECT
    ad_agtmas.AGT_CD,
    ad_agtmas.AGT_FULL_NAME, 
    ad_agtmas.CHANNEL_TYPE,
    ad_agtmas.REGION,
    ad_agtmas.AGT_STATUS,
    ad_agtmas.AGT_CONTRACT_DT,
    ad_agtmas.TERMINATION_DT,
    
    aa_agtmas.INTPERSONSEQ,
    aa_person.TXTNRID_ORIGIN_CLN_ENCRYPT
FROM (
    SELECT *
    FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master
    WHERE AGT_STATUS IN ('00', '10') -- inforce agents only
) ad_agtmas
INNER JOIN (
    SELECT *
    FROM aiahk_dna_p_catalog.dna_con_aa_delta.aa_tblagtmaster
    WHERE TXTRCDSTATUS = '03'
) aa_agtmas
ON ad_agtmas.AGT_CD = aa_agtmas.TXTAGTCD
LEFT JOIN (
    SELECT INTPERSONSEQ, TXTNRID_ORIGIN_CLN_ENCRYPT
    FROM aiahk_dna_p_catalog.dna_con_aa_delta.aa_tblperson
    WHERE TXTRCDSTATUS = '03'
) aa_person
ON aa_agtmas.INTPERSONSEQ = aa_person.INTPERSONSEQ
"""

# Execute the query
spark.sql(query)


In [None]:
df_T17 = spark.sql(f"""
select duplicates.count_agent_codes, agt_view.*
from agent_joined_view agt_view
inner join (
    select TXTNRID_ORIGIN_CLN_ENCRYPT, count(distinct AGT_CD) as count_agent_codes
    from agent_joined_view
    group by TXTNRID_ORIGIN_CLN_ENCRYPT
    having count(distinct AGT_CD) > 1
) duplicates
on agt_view.TXTNRID_ORIGIN_CLN_ENCRYPT = duplicates.TXTNRID_ORIGIN_CLN_ENCRYPT
""")

df_T17.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T17')

###### [HK & Macau] AGT_005a
[HK & MAcau] Identify agent submitted life policy application in which the policy application date was not within the effective date of regulatory licence for life


In [None]:

df_T05a = spark.sql(
    f"""
    SELECT 
    CASE WHEN SUBSTRING(UI.POLICY_NO, 2, 1) <> '4' THEN 'HK' ELSE 'Macau' END HK_MACAU,
    UI.POLICY_NO, UI.APP_DATE, UI.POL_DATE, UI.PROPOSAL_TYPE, agt_map.AGT_CODE as UI_AGT_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT

    , agt_lic_03.LICENSE_DESC AS LICENSE_DESC_03, agt_lic_03.EFF_DT AS EFF_DATE_03, agt_lic_03.EXP_DT AS EXP_DATE_03
    , (agt_lic_03.LICENSE_TYPE IS NULL) as Exception_03
    , CASE WHEN agt_lic_03.LICENSE_TYPE IS NULL THEN agt_lic_03_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF_03
    , CASE WHEN agt_lic_03.LICENSE_TYPE IS NULL THEN agt_lic_03_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP_03

    , agt_lic_life.LICENSE_DESC AS LICENSE_DESC_life, agt_lic_life.EFF_DT AS EFF_DATE_life, agt_lic_life.EXP_DT AS EXP_DATE_life
    , (agt_lic_life.LICENSE_TYPE IS NULL) as Exception_life

     FROM 
    (
       SELECT * FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period 
       WHERE PROPOSAL_TYPE not in ('G','P')
       /*
       L = Life 
        G = GI
        I = ILAS
        P = Personal Accident
       */
    ) UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD
   
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '03' -- filter: license type 3 (regulatory life)
    ) agt_lic_03
      ON agt_mas.AGT_CD = agt_lic_03.AGT_CD
      AND  (agt_lic_03.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic_03.EFF_DT AND agt_lic_03.EXP_DT) 
    LEFT JOIN (
        SELECT AGT_CD, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '03' -- filter: license type 3 (regulatory life)
        GROUP BY AGT_CD
    ) agt_lic_03_all
      ON agt_mas.AGT_CD = agt_lic_03_all.AGT_CD

    LEFT JOIN (
        SELECT *
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE BETWEEN '01' AND '04' -- filter: license type 01-04 (regulatory life)
    ) agt_lic_life
      ON agt_mas.AGT_CD = agt_lic_life.AGT_CD
      AND  (agt_lic_life.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic_life.EFF_DT AND agt_lic_life.EXP_DT) 
      
    WHERE agt_lic_03.AGT_CD IS NULL 
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T05a_ps = df_T05a.pandas_api()
display(df_T05a_ps)
display(df_T05a_ps.shape[0])

df_T05a.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T05a')

###### [HK & Macau] AGT_007 Identify agent submitted ILAS policy application in which the policy application date was not within the effective date of regulatory licence for ILAS.


In [None]:
df_T07 = spark.sql(
    f"""
    SELECT 
    CASE WHEN SUBSTRING(UI.POLICY_NO, 2, 1) <> '4' THEN 'HK' ELSE 'Macau' END HK_MACAU,
    UI.POLICY_NO, UI.APP_DATE, UI.POL_DATE, UI.PROPOSAL_TYPE, agt_map.AGT_CODE as UI_AGT_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT

    , agt_lic_55.LICENSE_DESC AS LICENSE_DESC_55, agt_lic_55.EFF_DT AS EFF_DATE_55, agt_lic_55.EXP_DT AS EXP_DATE_55
    , (agt_lic_55.LICENSE_TYPE IS NULL) as Exception_55
    , CASE WHEN agt_lic_55.LICENSE_TYPE IS NULL THEN agt_lic_55_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF_55
    , CASE WHEN agt_lic_55.LICENSE_TYPE IS NULL THEN agt_lic_55_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP_55

     FROM 
    (
       SELECT * FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period 
       WHERE PROPOSAL_TYPE = 'I'
       /*
       L = Life 
        G = GI
        I = ILAS
        P = Personal Accident
       */
    ) UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '55' -- filter: license type 55 (ILAS)
    ) agt_lic_55
      ON agt_mas.AGT_CD = agt_lic_55.AGT_CD
      AND (agt_lic_55.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic_55.EFF_DT AND agt_lic_55.EXP_DT) 
    LEFT JOIN (
        SELECT AGT_CD, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '55' -- filter: license type 55 (ILAS)
        GROUP BY AGT_CD
    ) agt_lic_55_all
      ON agt_mas.AGT_CD = agt_lic_55_all.AGT_CD

    WHERE agt_lic_55.AGT_CD IS NULL 
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T07_ps = df_T07.pandas_api()
display(df_T07_ps)
display(df_T07_ps.shape[0])

df_T07.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T07')

###### [HK & Macau] AGT_008 Identify agent submitted MCV policy application in which the policy application date was not within the effective date of internal business license (licence #65 - Insurance Business for Mainland China Visitors)



In [None]:
df_T08 = spark.sql(
    f"""
    SELECT 
    CASE WHEN SUBSTRING(UI.POLICY_NO, 2, 1) <> '4' THEN 'HK' ELSE 'Macau' END HK_MACAU,
    UI.POLICY_NO, UI.APP_DATE, UI.POL_DATE, UI.PROPOSAL_TYPE, agt_map.AGT_CODE as UI_AGT_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT

    , agt_lic_65.LICENSE_DESC AS LICENSE_DESC_65, agt_lic_65.EFF_DT AS EFF_DATE_65, agt_lic_65.EXP_DT AS EXP_DATE_65
    , (agt_lic_65.LICENSE_TYPE IS NULL) as Exception_65
    , CASE WHEN agt_lic_65.LICENSE_TYPE IS NULL THEN agt_lic_65_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF_65
    , CASE WHEN agt_lic_65.LICENSE_TYPE IS NULL THEN agt_lic_65_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP_65

     FROM 
    (
       SELECT * FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period 
       WHERE PROPOSAL_TYPE not in ('G','P') AND 
       (trim(policy_no) like 'G%' or trim(policy_no) like 'M380%' or trim(policy_no) like 'M680%' or trim(policy_no) like 'M980%' or trim(policy_no) like 'M480%')
       /*
       L = Life 
        G = GI
        I = ILAS
        P = Personal Accident
       */
    ) UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '65' -- filter: license type 65 MCV
    ) agt_lic_65
      ON agt_mas.AGT_CD = agt_lic_65.AGT_CD
      AND (agt_lic_65.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic_65.EFF_DT AND agt_lic_65.EXP_DT) 
    LEFT JOIN (
        SELECT AGT_CD, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '65' -- filter: license type 65 MCV
        GROUP BY AGT_CD
    ) agt_lic_65_all
      ON agt_mas.AGT_CD = agt_lic_65_all.AGT_CD

    WHERE agt_lic_65.AGT_CD IS NULL 
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T08_ps = df_T08.pandas_api()
display(df_T08_ps)
display(df_T08_ps.shape[0])

df_T08.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T08')

###### [HK & Macau] AGT_009 Identify agents submitted Vitality policy issued application in which the policy issue date was not within the effective date of the internal business licence (licence #86 - AIA Vitality).



In [None]:
df_T09 = spark.sql(
    f"""
    SELECT 
    CASE WHEN SUBSTRING(UI.POLICY_NO, 2, 1) <> '4' THEN 'HK' ELSE 'Macau' END HK_MACAU,
    UI.POLICY_NO, UI.APP_DATE, UI.POL_DATE, UI.ISS_DATE, UI.PROPOSAL_TYPE, UI.POLICY_STATUS, UI.VITALITY_IND, UI.plan_code, agt_map.AGT_CODE as UI_AGT_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT

    , agt_lic_86.LICENSE_DESC AS LICENSE_DESC_86, agt_lic_86.EFF_DT AS EFF_DATE_86, agt_lic_86.EXP_DT AS EXP_DATE_86
    , (agt_lic_86.LICENSE_TYPE IS NULL) as Exception_86
    , CASE WHEN agt_lic_86.LICENSE_TYPE IS NULL THEN agt_lic_86_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF_86
    , CASE WHEN agt_lic_86.LICENSE_TYPE IS NULL THEN agt_lic_86_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP_86

     FROM 
    (
       SELECT * FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period 
       WHERE PROPOSAL_TYPE not in ('G','P') AND 
       VITALITY_IND = 'Y' AND
       POLICY_STATUS = 'DLR'
       /*
       L = Life 
        G = GI
        I = ILAS
        P = Personal Accident
       */
    ) UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD
    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '86' -- filter: license type 86 Vitality
    ) agt_lic_86
      ON agt_mas.AGT_CD = agt_lic_86.AGT_CD
      AND (agt_lic_86.LICENSE_TYPE IS NOT NULL AND UI.ISS_DATE BETWEEN agt_lic_86.EFF_DT AND agt_lic_86.EXP_DT) -- 86 Vitality
    LEFT JOIN (
        SELECT AGT_CD, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE = '86' -- filter: license type 86 Vitality
        GROUP BY AGT_CD
    ) agt_lic_86_all
      ON agt_mas.AGT_CD = agt_lic_86_all.AGT_CD

    WHERE agt_lic_86.AGT_CD IS NULL 
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T09_ps = df_T09.pandas_api()
display(df_T12_ps)
display(df_T12_ps.shape[0])

df_T09.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T09')

In [None]:
%sql 
select b.VT_IND, * from aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T09 result 
left join aiahk_dna_p_catalog.dna_gia_blob.`_ui_policy_app_7yrs_coverage` a
on result.POLICY_NO = a.POLICY_NO
left join aiahk_dna_p_catalog.dna_gia_blob.`_das_product` b
on a.PLAN_SHORT_NAME = b.PLAN_SHORT_NAME
--where vt_ind is null or vt_ind <> 0

In [None]:
%sql
select distinct VT_IND, PLAN_FULL_NAME from aiahk_dna_p_catalog.dna_gia_blob.`_das_product`
where vt_ind = 0

In [None]:
%sql
select * from aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T09 result
left join aiahk_dna_p_catalog.dna_con_ui_delta.ui_tui_pol_vitality vit
on result.POLICY_NO = vit.POLICY_NO


##### [HK & Macau] AGT_001 Identify new agents with agent contract date before the completion of pre-code eLearning on iAcademy.

In [None]:
%sql

select TRAINEEID, TRAININGID, training_module.MODULEREF, STATUS, STATUSDATE, TITLE, training_module.CODE, right('00000'|| agentcode_cand,10) as agentcode_cand, name, CANDIDATETYPEID from 
    (
        select * from dna_con_iac_delta.iac_training_trainingstatus
        where TRAININGTYPE = 'Module'
        and (lower(status) like 'pass%' or lower(status) like 'complete%')
    ) training_record
    inner join dna_con_iac_delta.iac_training_module training_module
    on training_record.TRAININGID = training_module.ID
    inner join
    (
        select * from dna_con_iac_delta.iac_training_moduletemplate 
        where lower(CODE) like '%pre-%'
    ) pre_training_module
    on training_module.MODULEREF = pre_training_module.ID
    left join 
    (
        select ID, AGENTCODE as AGENTCODE_cand, NAME, CANDIDATETYPEID from dna_con_iac_delta.iac_rec_can_can 
    ) candidate
    on training_record.TRAINEEID = candidate.ID

In [None]:
%sql 
select *
from aiahk_dna_p_catalog.dna_gia_blob._iacademy_completed_training
        where TRAININGTYPE = 'Module'
        and (lower(status) like 'pass%' or lower(status) like 'complete%')
        and lower(mod_code) like '%pre-%'

In [None]:
review_period_start = df_param.loc["review_period_start"]["param_value"]
review_period_end = df_param.loc["review_period_end"]["param_value"]

df_T01 = spark.sql(f"""
SELECT * FROM
(
    select AGT_CD, REGION, AGT_STATUS, AGT_NAME, AGT_CONTRACT_DT, TERMINATION_DT 
    from dna_gia_blob.`_das_agent_master`
    where AGT_CONTRACT_DT >= '{review_period_start}' AND AGT_CONTRACT_DT <= '{review_period_end}'
) DAS
LEFT JOIN 
(
    select TRAINEEID, TRAININGID, training_module.MODULEREF, STATUS, STATUSDATE, TITLE, training_module.CODE, right('00000'|| agentcode_cand,10) as agentcode_cand, name, CANDIDATETYPEID from 
    (
        select * from dna_con_iac_delta.iac_training_trainingstatus
        where TRAININGTYPE = 'Module'
        and (lower(status) like 'pass%' or lower(status) like 'complete%')
    ) training_record
    inner join iac_training_module training_module
    on training_record.TRAININGID = training_module.ID
    inner join
    (
        select * from dna_con_iac_delta.iac_training_moduletemplate 
        where lower(CODE) like '%pre-%'
    ) pre_training_module
    on training_module.MODULEREF = pre_training_module.ID
    left join 
    (
        select ID, AGENTCODE as AGENTCODE_cand, NAME, CANDIDATETYPEID from dna_con_iac_delta.iac_rec_can_can 
    ) candidate
    on training_record.TRAINEEID = candidate.ID
) iAC
on (trim(iAC.TRAINEEID) = trim(DAS.AGT_CD) or trim(iAC.agentcode_cand) = trim(DAS.AGT_CD)) and statusdate <= last_day(DAS.AGT_CONTRACT_DT)
where iAC.status is null
""")

# put _sqldf into df_das as pyspark pandas df
df_T01_ps = df_T01.pandas_api()
display(df_T01_ps)
display(df_T01_ps.shape[0])

df_T01.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T01')

In [None]:

df_T01_details = spark.sql(f"""
SELECT exception.AGT_CD, exception.AGT_CONTRACT_DT, exception.AGT_NAME, exception.AGT_STATUS, iAC.* FROM dna_gia_blob.ca_agy_T01 exception
LEFT JOIN 
(
    select TRAINEEID, TRAININGID, training_module.MODULEREF, STATUS, STATUSDATE, TITLE, training_module.CODE, right('00000'|| agentcode_cand,10) as agentcode_cand, name, CANDIDATETYPEID from 
    (
        select * from dna_con_iac_delta.iac_training_trainingstatus
        where TRAININGTYPE = 'Module'
    ) training_record
    inner join iac_training_module training_module
    on training_record.TRAININGID = training_module.ID
    inner join
    (
        select * from dna_con_iac_delta.iac_training_moduletemplate 
    ) pre_training_module
    on training_module.MODULEREF = pre_training_module.ID
    left join 
    (
        select ID, AGENTCODE as AGENTCODE_cand, NAME, CANDIDATETYPEID from dna_con_iac_delta.iac_rec_can_can 
    ) candidate
    on training_record.TRAINEEID = candidate.ID
) iAC
on (trim(iAC.TRAINEEID) = trim(exception.AGT_CD) or trim(iAC.agentcode_cand) = trim(exception.AGT_CD)) 
""")

# put _sqldf into df_das as pyspark pandas df
df_T01_ps_details = df_T01_details.pandas_api()
display(df_T01_ps_details)
display(df_T01_ps_details.shape[0])

df_T01_details.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T01_details')

##### [HK & Macau] AGT_012 Identify agents assigned with internal business license (Business Ethics, MCV, Fatca, CRS, AIA Vitality) prior to completion of relevant refresher training on iAcademy.

-- Business Ethics & MCV renew once a year; FATCA CRS Vitality Indefinite

In [None]:
df_T12_FATCA_CRS_VIT = spark.sql(f"""
-- FATCA CRS Vitality
select agt_mas.AGT_CD, REGION, AGT_STATUS, AGT_NAME, AGT_CONTRACT_DT, TERMINATION_DT, agt_lic.LICENSE_DESC, agt_lic.EFF_DT, agt_lic.EXP_DT, agt_lic.LICENSE_TYPE, iac.*
from dna_gia_blob.`_das_agent_master` agt_mas
INNER JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE IN ('84','88','86')  -- FATCA CRS Vitality
        AND (EFF_DT >= '{review_period_start}' AND EFF_DT <= '{review_period_end}')
    ) agt_lic
ON agt_mas.AGT_CD = agt_lic.AGT_CD
LEFT JOIN 
(
    select TRAINEEID, TRAININGID, training_module.MODULEREF, LICENSEID, STATUS, STATUSDATE, training_module.CODE, right('00000'|| agentcode_cand,10) as agentcode_cand, name, CANDIDATETYPEID from  
    (
        select * from dna_con_iac_delta.iac_training_trainingstatus
        where TRAININGTYPE = 'Module'
        and (lower(status) like 'pass%' or lower(status) like 'complete%')
    ) training_record
    inner join dna_con_iac_delta.iac_training_module training_module
    on training_record.TRAININGID = training_module.ID
    inner join
    (
        select * from dna_con_iac_delta.iac_lmslicense_formodule 
        where LICENSEID IN ('84','88','86') -- FATCA CRS Vitality
    ) license_module
    on training_module.MODULETEMPLATEID = license_module.MODULETEMPLATEID
    left join 
    (
        select ID, AGENTCODE as AGENTCODE_cand, NAME, CANDIDATETYPEID from dna_con_iac_delta.iac_rec_can_can 
    ) candidate
    on training_record.TRAINEEID = candidate.ID
) iAC
on (trim(iAC.TRAINEEID) = trim(agt_mas.AGT_CD) or trim(iAC.agentcode_cand) = trim(agt_mas.AGT_CD)) and trim(LICENSE_TYPE) = trim(LICENSEID) and date(statusdate) <= date(agt_lic.EFF_DT)
where iAC.status is null
""")

# put _sqldf into df_das as pyspark pandas df
df_T12_ps_FATCA_CRS_VIT = df_T12_FATCA_CRS_VIT.pandas_api()
display(df_T12_ps_FATCA_CRS_VIT)
display(df_T12_ps_FATCA_CRS_VIT.shape[0])

df_T12_FATCA_CRS_VIT.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T12_FATCA_CRS_VIT')

In [None]:
df_T12_FATCA_CRS_VIT_details = spark.sql(f"""
SELECT exception.AGT_CD, exception.AGT_CONTRACT_DT, exception.AGT_NAME, exception.AGT_STATUS, iAC.* FROM dna_gia_blob.ca_agy_T12_FATCA_CRS_VIT exception
LEFT JOIN 
(
    select TRAINEEID, TRAININGID, training_module.MODULEREF, STATUS, STATUSDATE, TITLE, training_module.CODE, right('00000'|| agentcode_cand,10) as agentcode_cand, name, CANDIDATETYPEID from 
    (
        select * from dna_con_iac_delta.iac_training_trainingstatus
        where TRAININGTYPE = 'Module'
    ) training_record
    inner join dna_con_iac_delta.iac_training_module training_module
    on training_record.TRAININGID = training_module.ID
    inner join
    (
        select * from dna_con_iac_delta.iac_training_moduletemplate 
    ) training_module
    on training_module.MODULEREF = training_module.ID
    left join 
    (
        select ID, AGENTCODE as AGENTCODE_cand, NAME, CANDIDATETYPEID from dna_con_iac_delta.iac_rec_can_can 
    ) candidate
    on training_record.TRAINEEID = candidate.ID
) iAC
on (trim(iAC.TRAINEEID) = trim(exception.AGT_CD) or trim(iAC.agentcode_cand) = trim(exception.AGT_CD)) 
""")

# put _sqldf into df_das as pyspark pandas df
df_T12_FATCA_CRS_VIT_details_ps = df_T12_FATCA_CRS_VIT_details.pandas_api()
display(df_T12_FATCA_CRS_VIT_details_ps)
display(df_T12_FATCA_CRS_VIT_details_ps.shape[0])

df_T12_FATCA_CRS_VIT_details.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T12_FATCA_CRS_VIT_details')

In [None]:
df_T12_BE_MCV = spark.sql(f"""
-- BE MCV ( one year expiry )
select agt_mas.AGT_CD, REGION, AGT_STATUS, AGT_NAME, AGT_CONTRACT_DT, TERMINATION_DT, agt_lic.LICENSE_DESC, agt_lic.EFF_DT, agt_lic.EXP_DT, agt_lic.LICENSE_TYPE, iac.*
from (
    SELECT * FROM dna_gia_blob.`_das_agent_master`
    WHERE (AGT_STATUS IN ('00','10') OR MONTHS_BETWEEN(TERMINATION_DT, AGT_CONTRACT_DT) > 4)
 ) agt_mas
INNER JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LICENSE_TYPE IN ('85','65')  -- BE MCV
        AND DATE(EXP_DT) >= '2026-04-30' --to parameterize
    ) agt_lic
ON agt_mas.AGT_CD = agt_lic.AGT_CD
LEFT JOIN 
(
    select TRAINEEID, TRAININGID, training_module.MODULEREF, LICENSEID, STATUS, STATUSDATE, training_module.CODE, right('00000'|| agentcode_cand,10) as agentcode_cand, name, CANDIDATETYPEID from  
    (
        select * from dna_con_iac_delta.iac_training_trainingstatus
        where TRAININGTYPE = 'Module'
        and (lower(status) like 'pass%' or lower(status) like 'complete%')
        and DATE(STATUSDATE) >= '2023-07-01'  -- to parameterize
    ) training_record
    inner join dna_con_iac_delta.iac_training_module training_module
    on training_record.TRAININGID = training_module.ID
    inner join
    (
        select * from dna_con_iac_delta.iac_lmslicense_formodule 
        where LICENSEID IN ('85','65')  -- BE MCV
    ) license_module
    on training_module.MODULETEMPLATEID = license_module.MODULETEMPLATEID
    left join 
    (
        select ID, AGENTCODE as AGENTCODE_cand, NAME, CANDIDATETYPEID from dna_con_iac_delta.iac_rec_can_can 
    ) candidate
    on training_record.TRAINEEID = candidate.ID
) iAC
on (trim(iAC.TRAINEEID) = trim(agt_mas.AGT_CD) or trim(iAC.agentcode_cand) = trim(agt_mas.AGT_CD)) and trim(LICENSE_TYPE) = trim(LICENSEID) 
where iAC.status is null
""")

# put _sqldf into df_das as pyspark pandas df
df_T12_BE_MCV_ps = df_T12_BE_MCV.pandas_api()
display(df_T12_BE_MCV_ps)
display(df_T12_BE_MCV_ps.shape[0])

df_T12_BE_MCV.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T12_BE_MCV')

##### [HK & Macau] AGT_013 Identify agents assigned with internal product license prior to completion of relevant training on iAcademy.

In [None]:
df_T13 = spark.sql(f"""
-- internal product license
select agt_mas.AGT_CD, REGION, AGT_STATUS, AGT_NAME, AGT_CONTRACT_DT, TERMINATION_DT, agt_lic.LICENSE_DESC, agt_lic.EFF_DT, agt_lic.EXP_DT, agt_lic.LICENSE_TYPE, iac.*
from dna_gia_blob.`_das_agent_master` agt_mas
INNER JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LENGTH(LICENSE_TYPE) = 4
        AND (EFF_DT >= '{review_period_start}' AND EFF_DT <= '{review_period_end}')
    ) agt_lic
ON agt_mas.AGT_CD = agt_lic.AGT_CD
LEFT JOIN 
(
    select TRAINEEID, TRAININGID, training_module.MODULEREF, LICENSEID, STATUS, STATUSDATE, training_module.CODE, right('00000'|| agentcode_cand,10) as agentcode_cand, name, CANDIDATETYPEID from  
    (
        select * from dna_con_iac_delta.iac_training_trainingstatus
        where TRAININGTYPE = 'Module'
        and (lower(status) like 'pass%' or lower(status) like 'complete%')
    ) training_record
    inner join dna_con_iac_delta.iac_training_module training_module
    on training_record.TRAININGID = training_module.ID
    inner join
    (
        select * from dna_con_iac_delta.iac_lmslicense_formodule 
        where LENGTH(LICENSEID) = 4
    ) license_module
    on training_module.MODULETEMPLATEID = license_module.MODULETEMPLATEID
    left join 
    (
        select ID, AGENTCODE as AGENTCODE_cand, NAME, CANDIDATETYPEID from dna_con_iac_delta.iac_rec_can_can 
    ) candidate
    on training_record.TRAINEEID = candidate.ID
) iAC
on (trim(iAC.TRAINEEID) = trim(agt_mas.AGT_CD) or trim(iAC.agentcode_cand) = trim(agt_mas.AGT_CD)) and trim(LICENSE_TYPE) = trim(LICENSEID) and date(statusdate) <= date(agt_lic.EFF_DT)
where iAC.status is null
""")

# put _sqldf into df_das as pyspark pandas df
df_T13_ps = df_T13.pandas_api()
display(df_T13_ps)
display(df_T13_ps.shape[0])

df_T13.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T13_1')

##### [HK & Macau] AGT_010 Identify agents submitted policy application in which the policy application date was not within the effective date of the respective internal product license.


In [None]:
%sql
 SELECT * from aiahk_dna_p_catalog.dna_gia_blob.ca_agy_product_license A
 left join aiahk_dna_p_catalog.dna_gia_blob.`_das_product` B 
 ON trim(A.PLAN_CODE) = trim(B.PLAN_CD)

In [None]:
 spark.sql(
    f"""
CREATE OR REPLACE TEMPORARY VIEW df_T10_temp AS
    SELECT 
    CASE WHEN SUBSTRING(UI.POLICY_NO, 2, 1) <> '4' THEN 'HK' ELSE 'Macau' END HK_MACAU,
    UI.POLICY_NO, UI.APP_DATE, UI.POL_DATE, UI.PROPOSAL_TYPE, agt_map.AGT_CODE as UI_AGT_CODE, UI_Coverage.plan_no, UI_Coverage.PLAN_SHORT_NAME, DAS_product.PLAN_CD, PGS_product_license.LICENSE_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT
    
    , agt_lic.LICENSE_DESC AS LICENSE_DESC, agt_lic.EFF_DT AS EFF_DATE, agt_lic.EXP_DT AS EXP_DATE
    , (agt_lic.LICENSE_TYPE IS NULL) as Exception
    , CASE WHEN agt_lic.LICENSE_TYPE IS NULL THEN agt_lic_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF
    , CASE WHEN agt_lic.LICENSE_TYPE IS NULL THEN agt_lic_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP

     FROM 
    (
       SELECT * FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period 
    ) UI
    LEFT JOIN 
      aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_agt_mapping agt_map
      ON UI.POLICY_NO = agt_map.POLICY_NO
    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on agt_map.AGT_CODE = agt_mas.AGT_CD
    LEFT JOIN 
    (
       SELECT POLICY_NO, PLAN_NO, PLAN_SHORT_NAME FROM aiahk_dna_p_catalog.dna_gia_blob._ui_policy_app_review_period_coverage 
    ) UI_Coverage
    ON UI.POLICY_NO = UI_Coverage.POLICY_NO
    left join aiahk_dna_p_catalog.dna_gia_blob._das_product DAS_product
    ON trim(UI_Coverage.PLAN_SHORT_NAME) = trim(DAS_product.PLAN_SHORT_NAME)    
    inner join aiahk_dna_p_catalog.dna_gia_blob.ca_agy_product_license PGS_product_license
    on trim(DAS_product.PLAN_CD) = trim(PGS_product_license.PLAN_CODE)

    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        WHERE LEN(TRIM(LICENSE_TYPE)) = 4   -- product licenses 
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD
      AND trim(agt_lic.LICENSE_TYPE) = trim(PGS_product_license.LICENSE_CODE)
      AND (agt_lic.LICENSE_TYPE IS NOT NULL AND UI.APP_DATE BETWEEN agt_lic.EFF_DT AND agt_lic.EXP_DT) 

    LEFT JOIN (
        SELECT AGT_CD, LICENSE_TYPE, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        GROUP BY AGT_CD, LICENSE_TYPE
    ) agt_lic_all
      ON agt_mas.AGT_CD = agt_lic_all.AGT_CD
      AND trim(agt_lic_all.LICENSE_TYPE) = trim(PGS_product_license.LICENSE_CODE)
      """)
    
 df_T10 = spark.sql(f"""
    SELECT A.* FROM df_T10_temp A
    INNER JOIN 
    (
      SELECT POLICY_NO, PLAN_CD FROM df_T10_temp
      GROUP BY POLICY_NO, PLAN_CD
      HAVING COUNT(LICENSE_DESC) = 0
    ) B
    on A.POLICY_NO = B.POLICY_NO AND A.PLAN_CD = B.PLAN_CD
    """)

# put _sqldf into df_das as pyspark pandas df
df_T10_ps = df_T10.pandas_api()
display(df_T10_ps)
display(df_T10_ps.shape[0])

df_T10.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T10')

##### [HK & Macau] AGT_014 Identify agents generated proposals in iPGS+ without the relevant effective internal product licences.

In [None]:
# Web PGS
spark.sql(
    f"""
    CREATE OR REPLACE TEMPORARY VIEW df_T14_temp_webPGS AS
    SELECT DISTINCT a.AGENTCODE,RIGHT('00000' || a.AGENTCODE,10) AS AGENTCODE_ALIAS, a.CREATETIME, a.CREATETIME_HKT
    , b.plancode, a.completestatus, PGS_product_license.LICENSE_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT

    , agt_lic.LICENSE_DESC AS LICENSE_DESC, agt_lic.EFF_DT AS EFF_DATE, agt_lic.EXP_DT AS EXP_DATE
    , (agt_lic.LICENSE_TYPE IS NULL) as Exception
    , CASE WHEN agt_lic.LICENSE_TYPE IS NULL THEN agt_lic_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF
    , CASE WHEN agt_lic.LICENSE_TYPE IS NULL THEN agt_lic_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP

    FROM 
    (
        select *,  from_utc_timestamp(
                        to_timestamp(CREATETIME / 1000), 
                        'Asia/Hong_Kong') as CREATETIME_HKT
        from aiahk_dna_p_catalog.dna_con_webpgs_delta.webpgs_sqsquotation
        WHERE completestatus in (1,2)    -- generated proposal
        AND DATE(from_utc_timestamp(to_timestamp(CREATETIME / 1000), 'Asia/Hong_Kong')) BETWEEN '{review_period_start}' AND '{review_period_end}'
     ) a
    LEFT JOIN aiahk_dna_p_catalog.dna_con_webpgs_delta.webpgs_planselection b 
    ON a.SQSQUOTATIONID = b.SQSQUOTATIONID
    INNER JOIN aiahk_dna_p_catalog.dna_gia_blob.ca_agy_product_license PGS_product_license
    on trim(b.plancode) = trim(PGS_product_license.PLAN_CODE)

    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on RIGHT('00000' || a.AGENTCODE,10) = agt_mas.AGT_CD

    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD
      AND trim(agt_lic.LICENSE_TYPE) = trim(PGS_product_license.LICENSE_CODE)
      AND (agt_lic.LICENSE_TYPE IS NOT NULL AND DATE(a.CREATETIME_HKT) BETWEEN agt_lic.EFF_DT AND agt_lic.EXP_DT) 

    LEFT JOIN (
        SELECT AGT_CD, LICENSE_TYPE, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        GROUP BY AGT_CD, LICENSE_TYPE
    ) agt_lic_all
      ON agt_mas.AGT_CD = agt_lic_all.AGT_CD
      AND trim(agt_lic_all.LICENSE_TYPE) = trim(PGS_product_license.LICENSE_CODE)
      """)

   
df_T14_webPGS = spark.sql(f"""
    SELECT A.* FROM df_T14_temp_webPGS  A
    INNER JOIN 
    (
      SELECT AGENTCODE, CREATETIME, plancode FROM df_T14_temp_webPGS
      GROUP BY AGENTCODE, CREATETIME, plancode
      HAVING COUNT(LICENSE_DESC) = 0
    ) B
    on A.AGENTCODE = B.AGENTCODE AND A.CREATETIME = B.CREATETIME AND A.plancode = B.plancode
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T14_ps = df_T14_webPGS.pandas_api()
display(df_T14_ps)
display(df_T14_ps.shape[0])

df_T14_webPGS.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T14_webPGS')

In [None]:
# iPos PGS

spark.sql(
    f"""
    CREATE OR REPLACE TEMPORARY VIEW df_T14_temp_iposPGS AS
    SELECT DISTINCT a.AGENTCODE,RIGHT('00000' || a.AGENTCODE,10) AS AGENTCODE_ALIAS, a.CREATEDDATETIME, a.CREATETIME_HKT
    , b.plancode, a.completestatus, PGS_product_license.LICENSE_CODE
    , agt_mas.AGT_CD, agt_mas.AGT_FULL_NAME, agt_mas.REGION, agt_mas.AGT_CONTRACT_DT, agt_mas.AGT_STATUS, agt_mas.TERMINATION_DT

    , agt_lic.LICENSE_DESC AS LICENSE_DESC, agt_lic.EFF_DT AS EFF_DATE, agt_lic.EXP_DT AS EXP_DATE
    , (agt_lic.LICENSE_TYPE IS NULL) as Exception
    , CASE WHEN agt_lic.LICENSE_TYPE IS NULL THEN agt_lic_all.MAX_EFF_DT ELSE NULL END AS MAX_EFF
    , CASE WHEN agt_lic.LICENSE_TYPE IS NULL THEN agt_lic_all.MAX_EXP_DT ELSE NULL END AS MAX_EXP

    FROM 
    (
        select *, DATE(CREATEDDATETIME) as CREATETIME_HKT
        from aiahk_dna_p_catalog.dna_con_mag_delta.mag_t_sqsquotation
        WHERE completestatus in (1,2)    -- generated proposal
        AND DATE(CREATEDDATETIME) BETWEEN '{review_period_start}' AND '{review_period_end}'
     ) a
    LEFT JOIN aiahk_dna_p_catalog.dna_con_mag_delta.mag_t_planselection b 
    ON a.SQSQUOTATIONID = b.SQSQUOTATIONID
    INNER JOIN aiahk_dna_p_catalog.dna_gia_blob.ca_agy_product_license PGS_product_license
    on trim(b.plancode) = trim(PGS_product_license.PLAN_CODE)

    INNER JOIN  -- agents exist on AD base ( to fine tune later identification of sales channel )
    (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_master 
    ) agt_mas
    on RIGHT('00000' || a.AGENTCODE,10) = agt_mas.AGT_CD

    LEFT JOIN (
        SELECT * 
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
    ) agt_lic
      ON agt_mas.AGT_CD = agt_lic.AGT_CD
      AND trim(agt_lic.LICENSE_TYPE) = trim(PGS_product_license.LICENSE_CODE)
      AND (agt_lic.LICENSE_TYPE IS NOT NULL AND DATE(a.CREATETIME_HKT) BETWEEN agt_lic.EFF_DT AND agt_lic.EXP_DT) 

    LEFT JOIN (
        SELECT AGT_CD, LICENSE_TYPE, MAX(EFF_DT) as MAX_EFF_DT, MAX(EXP_DT) AS MAX_EXP_DT
        FROM aiahk_dna_p_catalog.dna_gia_blob._das_agent_license 
        GROUP BY AGT_CD, LICENSE_TYPE
    ) agt_lic_all
      ON agt_mas.AGT_CD = agt_lic_all.AGT_CD
      AND trim(agt_lic_all.LICENSE_TYPE) = trim(PGS_product_license.LICENSE_CODE)

      """)

   
df_T14_temp_iposPGS = spark.sql(f"""
    SELECT A.* FROM df_T14_temp_iposPGS  A
    INNER JOIN 
    (
      SELECT AGENTCODE, CREATEDDATETIME, plancode FROM df_T14_temp_iposPGS
      GROUP BY AGENTCODE, CREATEDDATETIME, plancode
      HAVING COUNT(LICENSE_DESC) = 0
    ) B
    on A.AGENTCODE = B.AGENTCODE AND A.CREATEDDATETIME = B.CREATEDDATETIME AND A.plancode = B.plancode
    """
)

# put _sqldf into df_das as pyspark pandas df
df_T14_ps_ipos = df_T14_temp_iposPGS.pandas_api()
display(df_T14_ps_ipos)
display(df_T14_ps_ipos.shape[0])

df_T14_temp_iposPGS.write.mode('overwrite').option("overwriteSchema", "True").saveAsTable('aiahk_dna_p_catalog.dna_gia_blob.ca_agy_T14_ipos')
