#VN Customer Service Data Analytics

###Objective: To curate data table as input to Power BI to visualize Inbound Call and Inbound Email case categories

Serve as input to prioritize scope for solution to reduce total inbound call and inbound email volume to reduce call and improve customer experience and self service solution adoption

In [0]:
%run "/Repos/dung_nguyen_hoang@mfcgd.com/Utilities/Functions"

<strong>Load libls, params and paths</strong>

In [0]:
from pyspark.sql.functions import *
from datetime import date, datetime, timedelta

spark.conf.set('partitionOverwriteMode', 'dynamic')

sfdc_path = '/mnt/prod/Published/VN/Master/VN_PUBLISHED_SFDC_EASYCLAIMS_DB/'
cics_path = '/mnt/prod/Published/VN/Master/VN_PUBLISHED_CICS_DB/'
cas_path = '/mnt/prod/Published/VN/Master/VN_PUBLISHED_CAS_DB/'
dm_path = '/mnt/prod/Curated/VN/Master/VN_CURATED_DATAMART_DB/'

tblSrc1 = 'CASE/'
tblSrc2 = 'USER/'
tblSrc3 = 'TCISC_SERVICE_DETAILS/'
tblSrc4 = 'TFIELD_VALUES/'
tblSrc5 = 'tpolidm_daily/'
tblSrc6 = 'tcustdm_daily/'
tblSrc7 = 'tagtdm_daily/'

src_paths = [sfdc_path,cics_path,cas_path,dm_path]
src_files = [tblSrc1,tblSrc2,tblSrc3,tblSrc4,
             tblSrc5,tblSrc6,tblSrc7,]

x = 0 # Change to number of months ago (0: last month-end, 1: last last month-end, ...)
today = datetime.now()
first_day_of_current_month = today.replace(day=1)
current_month = first_day_of_current_month

for i in range(x):
    first_day_of_previous_month = current_month - timedelta(days=1)
    first_day_of_previous_month = first_day_of_previous_month.replace(day=1)
    current_month = first_day_of_previous_month

last_day_of_x_months_ago = current_month - timedelta(days=1)
st_mth = date(2023, 1, 1).strftime('%Y-%m-%d')
end_mth = last_day_of_x_months_ago.strftime('%Y-%m-%d')
st_yr = st_mth[0:4]
print("Start and end date:", st_mth, end_mth)
print(f"Start year: {st_yr}")

<strong>Load parquet files and convert to temp views</strong>

In [0]:
list_df = load_parquet_files(src_paths, src_files)

In [0]:
generate_temp_view(list_df)

##Data Curation

Extract data from SFOX to form the base

In [0]:
sfdc_case_string = """
SELECT DISTINCT
  'VN'                    AS Market
, A.CaseNumber            AS Case_ID
, A.CreatedDate           AS Created_Date
, 'IFP'                   AS LOB
, ''                      AS Sub_Group
, A.Case_Type__c          AS Queue
, A.CreatedBy             AS Created_By
, A.Origin                AS Origin_of_Inbound_Inquiry
, A.Requester_Role__c     AS Inquirer_Group
, ''                      AS Inquirer_Rel_To_Pol
, A.Policy_Number__c      AS Pol_Num
, A.First_Contact_Resolution__c
, date_trunc('DD', A.CreatedDate) as case_creation_date
, A.LastModifiedDate
, NVL(A.Type, 'NaN')      AS Type
, A.Type__c               AS Sub_Type
, B.plan_code
, B.pol_iss_dt
, B.pol_eff_dt
, B.xpry_dt
, B.bill_mthd
, B.pmt_mode
, B.po_num
, B.po_iss_age
, B.insrd_num
, B.sa_code
, B.wa_code
, CASE WHEN B.wa_code = B.sa_code THEN 'Never Reassigned' ELSE "Reassigned" END AS Ever_Reassigned
, datediff(A.CreatedDate, B.pol_eff_dt) AS Pol_Tenure_in_Days
, C.channel
, C.rank_code
, C.br_code
, D.cli_typ
, D.birth_dt
, D.sex_code

FROM `case` A
LEFT JOIN tpolidm_daily B on A.Policy_Number__c = B.pol_num
LEFT JOIN tagtdm_daily C on B.sa_code = C.agt_code
LEFT JOIN tcustdm_daily D on B.po_num = D.cli_num

WHERE 1=1
AND A.Origin in ('Inbound call', 'Email')
AND Year(A.CreatedDate) >= {st_yr}
"""

df_vn_sfdc_case = sql_to_df(sfdc_case_string, 1, spark)

In [0]:
#print(df_vn_sfdc_case.count())
df_vn_sfdc_case.createOrReplaceTempView('df_vn_sfdc_case')

In [0]:
%sql
-- new build
create or replace temporary view df_Last_Next_Date2 AS

SELECT DISTINCT
Case_ID
, Pol_Num
, concat(Pol_Num, concat(Type, Sub_Type)) AS KEY
, case_creation_date
, lag(case_creation_date) over (partition by concat(Pol_Num, concat(Type, Sub_Type)) order by case_creation_date) as Last_Case_Date 
, lead(case_creation_date) over (partition by concat(Pol_Num, concat(Type, Sub_Type)) order by case_creation_date) as Next_Case_Date 

from (Select distinct Case_ID, case_creation_date, Pol_Num, Type, Sub_Type from df_vn_sfdc_case)
Where Pol_Num <> '000'

In [0]:
%sql

create or replace temporary view df_CWS_Last_Log2 AS

SELECT distinct 
CaseNumber
, Log_Date
, user_num
From df_CWS_Last_Log


In [0]:
%sql

create or replace temporary view df_Output AS

SELECT DISTINCT 

A.CAS_User_Full_Name__c
, A.CaseNumber
, A.Case_Record_Type__c
, A.OwnerProfileName__c
, A.OwnerRole__c
, A.Plan_Name__c
, A.Plan_Name_Rider__c
, A.PlanNameCap__c
, A.Policy_Number__c
, A.ServicingAgent__c
, A.ServicingAgentName__c
, A.User_Department__c
, A.Complaint_Channel__c
, A.CreatedById
, A.CreatedDate
, A.isDeleted
, A.Origin
, A.Sub_Type__c
, A.Type
, A.case_creation_date
, A.LastModifiedDate
, A.Policy_is_high_networth
, A.policy_is_ul_plan
, A.policy_orphan_indicator
, A.policy_base_category
, A.policy_base_needs
, A.policy_issue_date
, A.policy_effective_date
, A.policy_expiry_date
, A.policy_bill_method
, A.policy_bill_method2
, A.policy_payment_mode
, A.policy_owner_client_number
, A.policy_owner_effective_age
, A.policy_insured_client_number
, A.policy_servicing_agent_code
, A.policy_writting_agent_code
, A.Pol_Tenure_in_Days
, A.channel_group
, A.channel
, A.unit_name
, A.branch_name
, A.zone
, A.dbs_group
, A.client_type
, A.client_birth_date
, A.client_gender

, B.Last_Case_Date
, B.Next_Case_Date

, C.user_num
, C.Create_Date

, D.Log_date


FROM df_sg_sfox2_case A
LEFT JOIN df_Last_Next_Date2 B on A.CaseNumber = B.CaseNumber
LEFT JOIN df_CWS_User_Num_unique2 C on A.policy_owner_client_number = C.cli_num
LEFT JOIN df_CWS_Last_Log2 D on A.CaseNumber = D.CaseNumber



In [0]:
%sql
Select Count(*) from df_output

##Final Output = cservicedm

In [0]:

cservicedm = spark.sql("""
    Select
        A.CAS_User_Full_Name__c	AS	CAS_User_Name
        , A.CaseNumber	AS	Case_ID
        , A.CC_Case_Record_Type__c	AS	CC_Case_Record_Type
        , A.CC_OwnerProfileName__c	AS	CC_OwnerProfileName
        , A.CC_OwnerRole__c	AS	CC_OwnerRole
        , A.CC_Plan_Name__c	AS	CC_Plan_Name
        , A.CC_Plan_Name_Rider__c	AS	CC_Rider_Name
        , A.CC_PlanNameCap__c	AS	CC_Plan_Name_Cap
        , A.CC_Policy_Number__c	AS	Pol_Num
        , A.CC_ServicingAgent__c	AS	CC_Servicing_Agent
        , A.CC_ServicingAgentName__c	AS	CC_Servicing_AgentName
        , A.CC_User_Department__c	AS	CC_User_Department
        , A.Complaint_Channel__c	AS	Complaint_Channel
        , A.CreatedById	AS	CreatedBy_ID
        , A.CreatedDate	AS	Created_Date
        , A.isDeleted	AS	IsDeleted
        , A.Origin	AS	Origin_of_Inbound_Inquiry
        , A.Sub_Type__c	AS	Lv_2_Case_Category
        , A.Type	AS	Lv_1_Case_Category
        , A.case_creation_date	AS	Case_Creation_Date
        , A.LastModifiedDate
        , A.Policy_is_high_networth	AS	Is_HNW
        , A.policy_is_ul_plan	AS	Is_UL
        , A.policy_orphan_indicator	AS	Policy_Orphan_Indicator_curr
        , A.policy_base_category	AS	Policy_Base_Category
        , A.policy_base_needs	AS	Policy_Base_Needs
        , A.policy_issue_date	AS	Policy_Issue_Date
        , A.policy_effective_date	AS	Policy_Effective_Date
        , A.policy_expiry_date	AS	Policy_Expiry_Date_curr
        , A.policy_bill_method	AS	Bill_Method_curr
        , A.policy_bill_method2	AS	Bill_Method2_curr
        , A.policy_payment_mode	AS	Payment_Mode_curr
        , A.policy_owner_client_number	AS	Policy_Owner_Client_Number
        , A.policy_owner_effective_age	AS	PO_Effective_Age
        , A.policy_insured_client_number	AS	Insured_Client_Number_curr
        , A.policy_servicing_agent_code	AS	Servicing_Agent_curr
        , A.policy_writting_agent_code	AS	Writing_Agente
        , A.Pol_Tenure_in_Days	AS	Pol_Tenure_in_Days
        , A.channel_group	AS	SA_Channel_Group_curr
        , A.channel	AS	SA_Channel_curr
        , A.unit_name	AS	SA_Unit_Name_curr
        , A.branch_name	AS	SA_Branch_Name_curr
        , A.zone	AS	SA_Zone_curr
        , A.dbs_group	AS	PO_DBS_Group_curr
        , A.client_type	AS	PO_Client_Type_curr
        , A.client_birth_date	AS	PO_DOB
        , A.client_gender	AS	PO_Gender

        , B.Last_Case_Date AS Last_Case_Date
        , B.Next_Case_Date AS Next_Case_Date

        , C.user_num AS CWS_User_Num
        , C.Create_Date AS CWS_Last_Create_Date_curr

        ,D.Log_Date AS CWS_Last_Login_Date

    FROM df_sg_sfox2_case A
    LEFT JOIN df_Last_Next_Date2 B on A.CaseNumber = B.CaseNumber
    LEFT JOIN df_CWS_User_Num_unique2 C on A.policy_owner_client_number = C.cli_num
    LEFT JOIN df_CWS_Last_Log2 D on A.CaseNumber = D.CaseNumber


""")

#Switch back Spark conf to avoid date adjustment
spark.conf.set("spark.sql.session.timeZone","UTC+0")




## Write to ADLS GEN2

In [0]:
#Switch back Spark conf to avoid date adjustment
spark.conf.set("spark.sql.session.timeZone","UTC+0")

#Write result to ADLS gen2
cservicedm.write.mode("overwrite").parquet(f"/mnt/lab/vn/project/cpm/datamarts/customer_service_data/cservicedm")