# Data Warehouse Medicare National QA - Claim Header

## Initialization

Just loading packages that will be used and initializing connection to GP DB.

In [1]:
import pandas as pd
import sys
import psycopg2
from tqdm import tqdm
sys.path.append('H:/uth_helpers')
from db_utils import get_dsn

In [2]:
connection = psycopg2.connect(get_dsn()+' keepalives=1 keepalives_idle=30 keepalives_interval=10')
connection.autocommit = True

## Row Count and Claim Count

Similar to the member_enrollment_monthly table, the claim_header table has the row count should equal the number of unique claims.


In [3]:
query = ''' drop table if exists qa_reporting.dw_mcrn_claim_header_counts;
create table qa_reporting.dw_mcrn_claim_header_counts
(
    calendar_year int,
    table_src text,
    dw_row_count int,
    src_row_count int,
    row_count_diff int,
    row_count_diff_percentage float,
    dw_uth_clm_id_count int,
    dw_src_clm_id_count int,
    src_clm_count int,
    clm_count_diff int,
    clm_count_percentage float,
    dw_uth_mbr_id_count int,
    dw_src_mbr_id_count int,
    src_mbr_count int,
    mbr_count_diff int,
    mbr_count_percentage float,
    date_generated date
);
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [4]:
with connection.cursor() as cursor:
    query = f'''
    insert into qa_reporting.dw_mcrn_claim_header_counts
    (calendar_year, table_src, dw_row_count, date_generated)
    select year, 'ALL' table_id_src, count(*), current_date
    from dw_staging.mcrn_claim_header
    group by 1
    '''

    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_mcrn_claim_header_counts b
    set dw_uth_clm_id_count = count
    from (
        select year, 'ALL' table_id_src,  count(distinct uth_claim_id) as count 
        from dw_staging.mcrn_claim_header
    group by 1 ) a
    where a.year = b.calendar_year
    and a.table_id_src = b.table_src
    '''

    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_mcrn_claim_header_counts b
    set dw_src_clm_id_count = clm_count,
        dw_src_mbr_id_count = mbr_count
    from (
        select year, 'ALL' table_id_src,  count(distinct claim_id_src) as clm_count, count(distinct member_id_src) as mbr_count 
        from dw_staging.mcrn_claim_header
        group by 1
    ) a
    where a.year = b.calendar_year
    and a.table_id_src = b.table_src
    '''

    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_mcrn_claim_header_counts b
    set dw_uth_mbr_id_count = count
    from (
        select year, 'ALL' table_id_src,  count(distinct uth_member_id) as count 
        from dw_staging.mcrn_claim_header
        group by 1
    ) a
    where a.year = b.calendar_year
    and a.table_id_src = b.table_src
    '''

    cursor.execute(query)

In [5]:
with connection.cursor() as cursor:
    query = '''
    with clms as (
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id
        from medicare_national.hha_base_claims_k
        union
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id
        from medicare_national.outpatient_base_claims_k
        union
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id
        from medicare_national.dme_claims_k
        union
        select extract(year from clm_from_dt::date)as year, bene_id, clm_id
        from medicare_national.inpatient_base_claims_k
        union
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id
        from medicare_national.bcarrier_claims_k
        union
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id
        from medicare_national.hospice_base_claims_k
        union
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id
        from medicare_national.snf_base_claims_k
    ),
    clm_counts as (
        select year, count(*) row_count, count(distinct bene_id) pat_count, count(distinct clm_id) clm_count
        from clms
        group by 1
    )
    update qa_reporting.dw_mcrn_claim_header_counts a
    set src_row_count = b.row_count,
        row_count_diff = dw_row_count - b.row_count,
        row_count_diff_percentage = 100. * abs( dw_row_count - b.row_count) / b.row_count,
        src_clm_count = clm_count,
        clm_count_diff = dw_uth_clm_id_count - b.clm_count,
        clm_count_percentage = 100. * abs(dw_uth_clm_id_count - b.clm_count) / b.clm_count,
        src_mbr_count = b.pat_count,
        mbr_count_diff = dw_uth_mbr_id_count - b.pat_count,
        mbr_count_percentage = 100. * abs(dw_uth_mbr_id_count - b.pat_count) / b.pat_count
    from clm_counts b
    where a.calendar_year = b.year
    ;
    '''

    cursor.execute(query)

In [6]:
df = pd.read_sql('select * from qa_reporting.dw_mcrn_claim_header_counts;', con=connection)
df



Unnamed: 0,calendar_year,table_src,dw_row_count,src_row_count,row_count_diff,row_count_diff_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
0,2018,ALL,60197621,60197621,0,0.0,60197621,60197621,60197621,0,0.0,1930824,1930824,1930824,0,0.0,2023-10-09
1,2000,ALL,2,2,0,0.0,2,2,2,0,0.0,2,2,2,0,0.0,2023-10-09
2,2020,ALL,53420884,53420884,0,0.0,53420884,53420884,53420884,0,0.0,1853702,1853702,1853702,0,0.0,2023-10-09
3,2002,ALL,4,4,0,0.0,4,4,4,0,0.0,4,4,4,0,0.0,2023-10-09
4,2008,ALL,7,7,0,0.0,7,7,7,0,0.0,7,7,7,0,0.0,2023-10-09
5,2003,ALL,3,3,0,0.0,3,3,3,0,0.0,1,1,1,0,0.0,2023-10-09
6,2001,ALL,2,2,0,0.0,2,2,2,0,0.0,2,2,2,0,0.0,2023-10-09
7,2014,ALL,58037854,58037854,0,0.0,58037854,58037854,58037854,0,0.0,1852507,1852507,1852507,0,0.0,2023-10-09
8,2009,ALL,9,9,0,0.0,9,9,9,0,0.0,8,8,8,0,0.0,2023-10-09
9,2019,ALL,60543286,60543286,0,0.0,60543286,60543286,60543286,0,0.0,1910419,1910419,1910419,0,0.0,2023-10-09


In [7]:
df.sort_values('calendar_year')

Unnamed: 0,calendar_year,table_src,dw_row_count,src_row_count,row_count_diff,row_count_diff_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
21,1997,ALL,3,3,0,0.0,3,3,3,0,0.0,1,1,1,0,0.0,2023-10-09
1,2000,ALL,2,2,0,0.0,2,2,2,0,0.0,2,2,2,0,0.0,2023-10-09
6,2001,ALL,2,2,0,0.0,2,2,2,0,0.0,2,2,2,0,0.0,2023-10-09
3,2002,ALL,4,4,0,0.0,4,4,4,0,0.0,4,4,4,0,0.0,2023-10-09
5,2003,ALL,3,3,0,0.0,3,3,3,0,0.0,1,1,1,0,0.0,2023-10-09
18,2004,ALL,10,10,0,0.0,10,10,10,0,0.0,10,10,10,0,0.0,2023-10-09
11,2005,ALL,1,1,0,0.0,1,1,1,0,0.0,1,1,1,0,0.0,2023-10-09
14,2006,ALL,4,4,0,0.0,4,4,4,0,0.0,4,4,4,0,0.0,2023-10-09
20,2007,ALL,4,4,0,0.0,4,4,4,0,0.0,4,4,4,0,0.0,2023-10-09
4,2008,ALL,7,7,0,0.0,7,7,7,0,0.0,7,7,7,0,0.0,2023-10-09


In [8]:
df['row_count_diff_percentage'].describe()

count    22.0
mean      0.0
std       0.0
min       0.0
25%       0.0
50%       0.0
75%       0.0
max       0.0
Name: row_count_diff_percentage, dtype: float64

In [9]:
df['clm_count_percentage'].describe()

count    22.0
mean      0.0
std       0.0
min       0.0
25%       0.0
50%       0.0
75%       0.0
max       0.0
Name: clm_count_percentage, dtype: float64

In [10]:
df['mbr_count_percentage'].describe()

count    22.0
mean      0.0
std       0.0
min       0.0
25%       0.0
50%       0.0
75%       0.0
max       0.0
Name: mbr_count_percentage, dtype: float64

## Claim Type

In [11]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.mcrn_claim_type_count;
    with mcrn_claims as (
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id, 'F' claim_type
        from medicare_national.hha_base_claims_k
        union all
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id, 'F' claim_type
        from medicare_national.outpatient_base_claims_k
        union all
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id, 'P' claim_type
        from medicare_national.dme_claims_k
        union all
        select extract(year from clm_from_dt::date)as year, bene_id, clm_id, 'F' claim_type
        from medicare_national.inpatient_base_claims_k
        union all
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id, 'P' claim_type
        from medicare_national.bcarrier_claims_k
        union all
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id, 'F' claim_type
        from medicare_national.hospice_base_claims_k
        union all
        select extract(year from clm_from_dt::date) as year, bene_id, clm_id, 'F' claim_type
        from medicare_national.snf_base_claims_k
    )
select year, claim_type, count(*)
into qa_reporting.mcrn_claim_type_count
from mcrn_claims
group by 1,2
    '''
    cursor.execute(query)

In [12]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_mcrn_claim_type_count;
select year, claim_type, count(*)
into qa_reporting.dw_mcrn_claim_type_count
from dw_staging.mcrn_claim_header
group by 1,2
    '''
    cursor.execute(query)

In [13]:
query = f'''
select a.year, a.claim_type, a.count as dw_claim_type_count, b.count as src_claim_type_count, 
        a.count-b.count as claim_type_count_difference, 
         100. * abs(a.count-b.count) / b.count as claim_type_count_difference_percentage
from qa_reporting.dw_mcrn_claim_type_count a
full outer join qa_reporting.mcrn_claim_type_count b
on a.year =b.year
and a.claim_type = b.claim_type
'''

pd.read_sql(query, con=connection).sort_values(['year', 'claim_type'])



Unnamed: 0,year,claim_type,dw_claim_type_count,src_claim_type_count,claim_type_count_difference,claim_type_count_difference_percentage
2,1997,F,3,3,0,0.0
10,2000,F,2,2,0,0.0
22,2001,F,1,1,0,0.0
12,2001,P,1,1,0,0.0
4,2002,F,4,4,0,0.0
3,2003,F,3,3,0,0.0
7,2004,F,6,6,0,0.0
28,2004,P,4,4,0,0.0
17,2005,P,1,1,0,0.0
19,2006,F,4,4,0,0.0


## 