# Data Warehouse Medicare National QA - Claim ICD Proc

## Initialization

Just loading packages that will be used and initializing connection to GP DB.

In [1]:
import pandas as pd
import sys
import psycopg2
from tqdm import tqdm
sys.path.append('H:/uth_helpers')
from db_utils import get_dsn

In [2]:
connection = psycopg2.connect(get_dsn()+' keepalives=1 keepalives_idle=30 keepalives_interval=10')
connection.autocommit = True

## Row Count and Claim Count

In [3]:
query = ''' drop table if exists qa_reporting.dw_mcrn_claim_icd_proc_counts;
create table qa_reporting.dw_mcrn_claim_icd_proc_counts
(
    calendar_year int,
    dw_row_count bigint,
    src_row_count bigint,
    row_count_diff bigint,
    row_count_percentage float,
    dw_uth_clm_id_count bigint,
    dw_src_clm_id_count bigint,
    src_clm_count bigint,
    clm_count_diff bigint,
    clm_count_percentage float,
    dw_uth_mbr_id_count bigint,
    dw_src_mbr_id_count bigint,
    src_mbr_count bigint,
    mbr_count_diff bigint,
    mbr_count_percentage float,
    date_generated date
);
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [4]:
with connection.cursor() as cursor:
    query = f'''
    insert into qa_reporting.dw_mcrn_claim_icd_proc_counts
    (calendar_year, dw_row_count, dw_uth_clm_id_count, date_generated)
    select year, count(a.*), count(distinct uth_claim_id), current_date
    from (
        select year, uth_claim_id, uth_member_id, proc_cd, icd_version
        from dw_staging.mcrn_claim_icd_proc
        where proc_position = 1
    ) a
    group by 1
    '''
    
    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_mcrn_claim_icd_proc_counts b
    set dw_src_clm_id_count = count
    from (
        select year,  count(distinct claim_id_src) as count 
        from dw_staging.mcrn_claim_icd_proc
    group by 1) a
    where a.year = b.calendar_year
    '''

    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_mcrn_claim_icd_proc_counts b
    set dw_uth_mbr_id_count = count
    from (
        select year, count(distinct uth_member_id) as count 
        from dw_staging.mcrn_claim_icd_proc
    group by 1) a
    where a.year = b.calendar_year
    '''

    cursor.execute(query)
    
    query = f'''
    update qa_reporting.dw_mcrn_claim_icd_proc_counts b
    set dw_src_mbr_id_count = count
    from (
        select year, count(distinct member_id_src) as count 
        from dw_staging.mcrn_claim_icd_proc
    group by 1) a
    where a.year = b.calendar_year
    '''

    cursor.execute(query)

In [3]:
with connection.cursor() as cursor:
    query = '''
    with mcrn_claims as (
        select extract(year from prcdr_dt1::date) as year, bene_id, clm_id
        from medicare_national.outpatient_base_claims_k
        where icd_prcdr_cd1 is not null
        union
        select extract(year from prcdr_dt1::date) as year, bene_id, clm_id
        from medicare_national.inpatient_base_claims_k
        where icd_prcdr_cd1 is not null
        union
        select extract(year from prcdr_dt1::date) as year, bene_id, clm_id
        from medicare_national.snf_base_claims_k
        where icd_prcdr_cd1 is not null
    ),
        mcrn_claims_counts as (
            select year as calendar_year, count(*) as src_row_count, count(distinct bene_id) src_mbr_count, count(distinct clm_id) src_clm_count
            from mcrn_claims
            group by 1
        )
    update qa_reporting.dw_mcrn_claim_icd_proc_counts a
    set src_row_count = b.src_row_count,
        row_count_diff = a.dw_row_count - b.src_row_count,
        row_count_percentage = 100. * abs(a.dw_row_count - b.src_row_count) / b.src_row_count,
        src_clm_count = b.src_clm_count,
        clm_count_diff = a.dw_uth_clm_id_count - b.src_clm_count,
        clm_count_percentage = 100. * abs(a.dw_uth_clm_id_count - b.src_clm_count) / b.src_clm_count,
        src_mbr_count = b.src_mbr_count,
        mbr_count_diff = a.dw_uth_mbr_id_count - b.src_mbr_count,   
        mbr_count_percentage = 100. * abs(a.dw_uth_mbr_id_count - b.src_mbr_count) / b.src_mbr_count
    from mcrn_claims_counts b
    where a.calendar_year = b.calendar_year
    ;
    '''

    cursor.execute(query)

In [4]:
query = '''select * from qa_reporting.dw_mcrn_claim_icd_proc_counts;'''

df = pd.read_sql(query, con=connection)
df.sort_values(['clm_count_percentage'], ascending=False)



Unnamed: 0,calendar_year,dw_row_count,src_row_count,row_count_diff,row_count_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
2,2021,246420,246420,0,0.0,246419,247473,246420,-1,0.000406,175688,175689,175063,625,0.357014,2023-12-18
0,2018,307491,307491,0,0.0,307491,308756,307491,0,0.0,215291,215291,214704,587,0.2734,2023-12-18
1,2020,263158,263158,0,0.0,263158,264554,263158,0,0.0,185789,185789,185054,735,0.397181,2023-12-18
3,2001,1,1,0,0.0,1,1,1,0,0.0,1,1,1,0,0.0,2023-12-18
4,2014,325958,325958,0,0.0,325958,327489,325958,0,0.0,224910,224910,224228,682,0.304155,2023-12-18
5,2019,301549,301549,0,0.0,301549,302778,301549,0,0.0,211075,211075,210503,572,0.27173,2023-12-18
6,2006,2,2,0,0.0,2,2,2,0,0.0,1,1,1,0,0.0,2023-12-18
7,2017,313424,313424,0,0.0,313424,314703,313424,0,0.0,219985,219985,219355,630,0.287206,2023-12-18
8,2012,7,7,0,0.0,7,8,7,0,0.0,3,3,3,0,0.0,2023-12-18
9,2011,2,2,0,0.0,2,2,2,0,0.0,2,2,2,0,0.0,2023-12-18


## ICD Procedure Codes

In [7]:
query = '''drop table if exists qa_reporting.dw_mcrn_icd_proc_counts;
select year, proc_cd, count(*) as proc_count
into qa_reporting.dw_mcrn_icd_proc_counts
from dw_staging.mcrn_claim_icd_proc
group by 1,2;
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [8]:
icd_proc_cd_df = pd.read_sql('select * from qa_reporting.dw_mcrn_icd_proc_counts;', con=connection)
icd_proc_cd_df



Unnamed: 0,year,proc_cd,proc_count
0,2018,0JU837Z,1
1,2018,027F4ZZ,1
2,2021,037K0DZ,6
3,2015,02WYX3Z,1
4,2021,01N83ZZ,2
...,...,...,...
110357,2019,00164ZB,5
110358,2019,0MSN0ZZ,2
110359,2019,0WPC33Z,1
110360,2021,0Y6H0Z3,92


In [9]:
query = '''
select a.*
from qa_reporting.dw_mcrn_icd_proc_counts a
left join reference_tables.ref_cms_icd_pcs_codes b
on a.proc_cd = cd_value
where b.cd_value is null;
'''

invalid_proc_df = pd.read_sql(query, con=connection)
invalid_proc_df



Unnamed: 0,year,proc_cd,proc_count
0,2017,97110GO,4
1,2016,97110GO,1
2,2015,97110GO,3
3,2016,45385,1
4,2015,V0481,1
5,2014,91112GP,1
6,2014,97001GP,4
7,2016,97001GP,3
8,2015,97001GP,1
9,2021,5AID70Z,1


In [10]:
invalid_proc_df.groupby('year')['proc_count'].sum()

year
2014    55
2015    29
2016    31
2017    35
2021     1
Name: proc_count, dtype: int64

In [11]:
proc_comp_df = pd.DataFrame({'overall_proc_count': icd_proc_cd_df.groupby('year')['proc_count'].sum(),
                            'invalid_proc_count': invalid_proc_df.groupby('year')['proc_count'].sum(),
                            'valid_proc_count': icd_proc_cd_df.groupby('year')['proc_count'].sum() - invalid_proc_df.groupby('year')['proc_count'].sum()})
proc_comp_df.loc[proc_comp_df['valid_proc_count'].isna(),'valid_proc_count'] = proc_comp_df.loc[proc_comp_df['valid_proc_count'].isna(),'overall_proc_count']
proc_comp_df['valid_proc_count'] =  proc_comp_df['valid_proc_count'].astype(int)
proc_comp_df['invalid_to_valid_percent'] = 100. * proc_comp_df['invalid_proc_count'] / proc_comp_df['valid_proc_count']
proc_comp_df  

Unnamed: 0_level_0,overall_proc_count,invalid_proc_count,valid_proc_count,invalid_to_valid_percent
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001,1,,1,
2006,2,,2,
2010,5,,5,
2011,3,,3,
2012,11,,11,
2013,14943,,14943,
2014,911536,55.0,911481,0.006034
2015,894896,29.0,894867,0.003241
2016,880228,31.0,880197,0.003522
2017,888662,35.0,888627,0.003939


In [12]:
invalid_proc_df.groupby('year').max()

Unnamed: 0_level_0,proc_cd,proc_count
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,99672,9
2015,V0481,5
2016,Z1211,5
2017,97530GP,5
2021,5AID70Z,1


## Procedure Code Position

In [13]:
query = '''drop table if exists qa_reporting.dw_mcrn_proc_position;
select year, proc_position, count(distinct uth_claim_id) as claim_count
into qa_reporting.dw_mcrn_proc_position
from dw_staging.mcrn_claim_icd_proc
group by 1,2;
'''
with connection.cursor() as cursor:
    cursor.execute(query)


In [14]:
proc_position_df = pd.read_sql('select * from qa_reporting.dw_mcrn_proc_position;', con=connection)
proc_position_df.sort_values(['year', 'proc_position'])



Unnamed: 0,year,proc_position,claim_count
135,2001,1,1
31,2006,1,2
36,2010,1,4
122,2010,4,1
73,2011,1,2
...,...,...,...
162,2021,21,831
178,2021,22,707
153,2021,23,605
182,2021,24,509


In [15]:
proc_position_df['proc_position'].unique()

array([ 6, 12, 23, 20,  4,  3,  5,  1, 25, 22,  2, 17, 18,  9, 21,  8, 19,
       16, 14, 24, 11, 10,  7, 15, 13], dtype=int64)

Checking if the counts for the proc_position are correct. The higher the proc_position is, the less counts there should be. If we sort the counts of the proc_position by year and assign their order, this value should match with the proc_position value.

In [16]:
proc_position_df['row_rank'] = proc_position_df.sort_values(['year', 'claim_count'], ascending=[True, False]).groupby(['year']).cumcount()+1
proc_position_df['position_check'] = proc_position_df['row_rank'] == proc_position_df['proc_position']
proc_position_df[~proc_position_df['position_check']]

Unnamed: 0,year,proc_position,claim_count,row_rank,position_check
2,2013,23,26,21,False
122,2010,4,1,2,False
154,2013,21,24,22,False
174,2013,24,20,23,False
214,2013,22,20,24,False


## ICD Version

In [17]:
query = '''drop table if exists qa_reporting.dw_mcrn_icd_proc_version_count;
select year, icd_version, count(distinct uth_claim_id) as claim_count
into qa_reporting.dw_mcrn_icd_proc_version_count
from dw_staging.mcrn_claim_icd_proc
group by 1,2;
'''
with connection.cursor() as cursor:
    cursor.execute(query)

In [18]:
icd_version_df = pd.read_sql('select * from qa_reporting.dw_mcrn_icd_proc_version_count;', con=connection)
icd_version_df.sort_values(['year', 'icd_version'])



Unnamed: 0,year,icd_version,claim_count
13,2001,,1
2,2006,,2
4,2010,,4
9,2011,,2
0,2012,,8
6,2013,,4690
12,2014,,327489
3,2015,,323864
11,2016,,315095
1,2017,,314703
