# EZCAP -> WIPRO Reconciliation
Compare membership difference between WIPRO and EZCAP

In [11]:
import os
import pandas as pd
from datetime import date

pd.set_option("display.max_columns", None)

# WIPRO report pull date
wipro_date = '20211020'

In [3]:
path_wipro = '../../data/wipro/10.18.21_443918 HP Elig V1.xlsx'
sheet_name = '443918 HP Elig V1 - 10-18-2021'
dtype_wipro = {'MBD Part D Eligibilty Start Da':str, 'RACE_CD':str}

path_ezcap = '../../data/csv/HP_Elig_EZCAP_DW.csv'
dtype = {'REP_POP_ID':str}

# WIPRO
df_wipro = pd.read_excel(path_wipro, sheet_name = sheet_name, dtype=dtype_wipro).rename(columns={'Memnbr':'memnbr'})
df_wipro['memnbr'] = df_wipro['memnbr'].str.strip()
df_wipro['Enroll Status'] = df_wipro['Enroll Status'].str.strip()

# EZCAP
df_ezcap = pd.read_csv(path_ezcap, dtype=dtype)

In [4]:
df_wipro.head(3)

Unnamed: 0,memnbr,REP_POP_ID,PO_NBR,mcd_cat,SNPTYPE,Eff Start Date,Eff End Date,Enroll Status,Enroll Reason,MBD Part D Eligibilty Start Da,DSINFO PART D Eligibilty Start,delig_ind,hospice_ben,medical ben,rx_ben,dental_ben,mh_inp_ben,mh_dn_ben,mh_outer_ben,cd_inp_ben,cd_dn_ben,cd_outer_ben,RACE_CD,RACE_NAME
0,1AA1FX4NA00,14129 HMO,PO_NBR,mcd_cat,7,20200101,20210228,EAPRV,CMSAPRV,20140701.0,20140701,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,1,WHITE
1,1AA1FX4NA00,14129 HMO,PO_NBR,mcd_cat,12,20210301,99999999,EAPRV,CMSAPRV,20140701.0,20140701,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,1,WHITE
2,1AA3XY6MY70,14129 HMO,PO_NBR,mcd_cat,7,20200101,20201231,EAPRV,CMSAPRV,,20161001,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,4,ASIAN


In [5]:
df_ezcap.head(3)

Unnamed: 0,memnbr,REP_POP_ID,PO_NBR,mcd_cat,SNPTYPE,elig_start,elig_end,delig_ind,hospice_ben,medical_ben,rx_ben,dental_ben,mh_inp_ben,mh_dn_ben,mh_outer_ben,cd_inp_ben,cd_dn_ben,cd_outer_ben
0,1A05CC7XY50,14129,,,7,20200101,20201231,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y
1,1A14RA9QG85,14129,,,9,20210901,20991231,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y
2,1A15GQ4GY59,99999,,,11,20210101,20991231,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y


In [7]:
# on WIPRO, not EZCAP
df_eo = df_ezcap[['memnbr']].drop_duplicates()\
    .merge(df_wipro[['memnbr']].drop_duplicates(), on='memnbr', how='left', indicator=True)\
    .query('_merge == "left_only"')

n_total = len(df_eo['memnbr'].unique())
print(f'{n_total} member ID\'s found in EZCAP only.')

37 member ID's found in EZCAP only.


In [8]:
# join back df_wipro for basic info
df_eo = df_eo[['memnbr']].drop_duplicates().merge(df_ezcap, on='memnbr', how='inner')
df_eo['reason'] = 'unknown'

df_eo.head(3)

Unnamed: 0,memnbr,REP_POP_ID,PO_NBR,mcd_cat,SNPTYPE,elig_start,elig_end,delig_ind,hospice_ben,medical_ben,rx_ben,dental_ben,mh_inp_ben,mh_dn_ben,mh_outer_ben,cd_inp_ben,cd_dn_ben,cd_outer_ben,reason
0,1AU5AW3VG89,14378,,,5,20211101,20991231,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,unknown
1,1E98KY6DP39,14378,,,5,20211101,20991231,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,unknown
2,2JQ9Q26YC32,14378,,,5,20211101,20991231,P,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,unknown


In [15]:
# Member with app dates after WIPRO report pull date

# need to create a better way of checking for application date after WIPRO report pull date
# current method is looking up each individual in WIPRO to validate
# perhaps need to include record_create_date in data pull stage, and use filtering when convert to pipe-delimited 
# or pull part of the same data separately, but includes record_create_date so we can do validation here
elig_after_wipro_pull_date = df_eo.query(f'elig_start > {wipro_date}')['memnbr'].unique()
df_eo['reason']  = df_eo.apply(lambda df: 'app date after wipro report pull date' if df['memnbr'] in elig_after_wipro_pull_date else df['reason'], axis=1)

# Tally
print(f'{n_total} member ID\'s found in EZCAP only.\n')
print(df_eo.groupby(['reason'])['memnbr'].nunique())

37 member ID's found in EZCAP only.

reason
app date after wipro report pull date    34
unknown                                   3
Name: memnbr, dtype: int64


In [17]:
# disenrolled member
# this is done by looking up members in WIPRO and found Enroll Status to be 'DAPRV'
list_disenrolled = ['2RU5G70RQ86', '3JY0YX0EX46', '6FA7GY9AA39']
df_eo['reason']  = df_eo.apply(lambda df: 'Enroll Status DAPRV' if df['memnbr'] in list_disenrolled else df['reason'], axis=1)

# Tally
print(f'{n_total} member ID\'s found in EZCAP only.\n')
print(df_eo.groupby(['reason'])['memnbr'].nunique())

37 member ID's found in EZCAP only.

reason
Enroll Status DAPRV                       3
app date after wipro report pull date    34
Name: memnbr, dtype: int64
