# Data Warehouse Truven QA - Claim Header

## Initialization

Just loading packages that will be used and initializing connection to GP DB.

In [1]:
import pandas as pd
import sys
import psycopg2
from tqdm import tqdm
sys.path.append('H:/uth_helpers')
from db_utils import get_dsn

In [2]:
connection = psycopg2.connect(get_dsn())
connection.autocommit = True

In [3]:
year_df = pd.read_sql('select distinct year from dw_staging.claim_header_1_prt_truv;', con=connection)
tables = ['ccaes', 'mdcrs', 'mdcro', 'ccaeo']



## Row Count and Claim Count

Similar to the member_enrollment_monthly table, the claim_header table has the row count should equal the number of unique claims.

For this table, we extract claim data from the s, o, and f tables from the truven schema. 


In [3]:
query = ''' drop table if exists qa_reporting.dw_truv_claim_header_counts;
create table qa_reporting.dw_truv_claim_header_counts
(
    calendar_year int,
    table_src text,
    dw_row_count int,
    src_row_count int,
    row_count_diff int,
    row_count_diff_percentage float,
    dw_uth_clm_id_count int,
    dw_src_clm_id_count int,
    src_clm_count int,
    clm_count_diff int,
    clm_count_percentage float,
    dw_uth_mbr_id_count int,
    dw_src_mbr_id_count int,
    src_mbr_count int,
    mbr_count_diff int,
    mbr_count_percentage float,
    date_generated date
);
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [7]:
with connection.cursor() as cursor:
    for year in tqdm(year_df['year']):
        query = f'''
        insert into qa_reporting.dw_truv_claim_header_counts
        (calendar_year, table_src, dw_row_count, date_generated)
        select year, table_id_src, count(*), current_date
        from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1, 2
        '''
        
        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_clm_id_count = count
        from (
            select year, table_id_src,  count(distinct uth_claim_id) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1, 2 ) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_clm_id_count = count
        from (
            select year, table_id_src,  count(distinct claim_id_src) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1, 2 ) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_mbr_id_count = count
        from (
            select year, table_id_src,  count(distinct uth_member_id) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1, 2 ) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)
        
        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_mbr_id_count = count
        from (
            select year, table_id_src,   count(distinct member_id_src) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1, 2 ) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        insert into qa_reporting.dw_truv_claim_header_counts
        (calendar_year, table_src, dw_row_count, date_generated)
        select year, 'ALL' table_id_src, count(*), current_date
        from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1
        '''
        
        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_clm_id_count = count
        from (
            select year, 'ALL' table_id_src,  count(distinct uth_claim_id) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1 ) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_clm_id_count = count
        from (
            select year, 'ALL' table_id_src,  count(distinct claim_id_src) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_mbr_id_count = count
        from (
            select year, 'ALL' table_id_src,  count(distinct uth_member_id) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1 ) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)
        
        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_mbr_id_count = count
        from (
            select year, 'ALL' table_id_src,   count(distinct member_id_src) as count 
            from dw_staging.claim_header_1_prt_truv
        where year = {year}
        group by 1 ) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

100%|██████████| 12/12 [09:58<00:00, 49.88s/it]


In [9]:
with connection.cursor() as cursor:
    query = '''update qa_reporting.dw_truv_claim_header_counts a
    set src_row_count = b.row_count,
    row_count_diff = dw_row_count - b.row_count,
    row_count_diff_percentage = 100. * abs( dw_row_count - b.row_count) / b.row_count,
    src_clm_count = clm_count,
    clm_count_diff = dw_uth_clm_id_count - b.clm_count,
    clm_count_percentage = 100. * abs(dw_uth_clm_id_count - b.clm_count) / b.clm_count,
    src_mbr_count = b.pat_count,
    mbr_count_diff = dw_uth_mbr_id_count - b.pat_count,
    mbr_count_percentage = 100. * abs(dw_uth_mbr_id_count - b.pat_count) / b.pat_count
    from qa_reporting.truven_counts b
    where a.calendar_year = b.year
    and a.table_src = b.table_name
    ;
    '''

    cursor.execute(query)

ccaeo counts need to be run not using Python due to how long queries using this table run overall. Psycopg or Greenplum has a time limit for queries that are executed in this manner that is about 2 hours. The query may continue running on database server even if the connection to the server is closed. This means if this query is executed from here, you can check the status of the query using Greenplum (through a seperate query).

```
with truven_claims as (
            select year, 'ccaes' as table_name, enrolid, claim_id_derv
            from staging_clean.ccaes_etl
            union
            select year, 'ccaeo' as table_name, enrolid, claim_id_derv
            from staging_clean.ccaeo_etl
            union
            select year, 'mdcrs' as table_name, enrolid, claim_id_derv
            from staging_clean.mdcrs_etl
            union
            select year, 'mdcro' as table_name, enrolid, claim_id_derv
            from staging_clean.mdcro_etl
        ),
        truven_claims_counts as (
            select year, 'ALL' table_name, count(*) row_count, count(distinct enrolid) mbr_count, count(distinct claim_id_derv) clm_count
            from truven_claims
            group by 1,2
        )
        update qa_reporting.dw_truv_claim_header_counts a
        set src_row_count = b.row_count,
        row_count_diff = dw_row_count - b.row_count,
        row_count_diff_percentage = 100. * abs( dw_row_count - b.row_count) / b.row_count,
        src_clm_count = clm_count,
        clm_count_diff = dw_uth_clm_id_count - b.clm_count,
        clm_count_percentage = 100. * abs(dw_uth_clm_id_count - b.clm_count) / b.clm_count,
        src_mbr_count = b.mbr_count,
        mbr_count_diff = dw_uth_mbr_id_count - b.mbr_count,
        mbr_count_percentage = 100. * abs(dw_uth_mbr_id_count - b.mbr_count) / b.mbr_count
        from truven_claims_counts b
        where a.calendar_year = b.year
        and a.table_src = b.table_name
        ;
```

Note that the source row count from the o and s tables from truven are at detail level. This means that the row count from the individual tables are going to larger than the row count in the claim header table. If we use the query above, it will take the count of unique rows (based on year, table name, claim id, member id) which may be a more accurate method to evaluate the row count for the claim header table.

In [3]:
df = pd.read_sql('select * from qa_reporting.dw_truv_claim_header_counts;', con=connection)
df.head()



Unnamed: 0,calendar_year,table_src,dw_row_count,src_row_count,row_count_diff,row_count_diff_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
0,2018,ALL,290723506,290789941,-66435,0.022846,290425796,290412151,290492110,-66314,0.022828,22547314,22547314,22547774,-460,0.00204,2023-04-21
1,2018,ccaes,10980355,38984126,-28003771,71.833779,10980355,10975904,10976708,3647,0.033225,1029737,1029737,1029737,0,0.0,2023-04-21
2,2018,mdcrs,2343698,6992226,-4648528,66.481375,2343698,2341981,2342184,1514,0.064641,147674,147674,147674,0,0.0,2023-04-21
3,2018,mdcro,26096056,59193949,-33097893,55.914318,26096056,26092957,26105522,-9466,0.036261,1057407,1057407,1057428,-21,0.001986,2023-04-21
4,2018,ccaeo,251303397,589022269,-337718872,57.335502,251303397,251299019,251365523,-62126,0.024715,21528585,21528585,21529034,-449,0.002086,2023-04-21


In [4]:
df[df['table_src'] == 'ALL']

Unnamed: 0,calendar_year,table_src,dw_row_count,src_row_count,row_count_diff,row_count_diff_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
0,2018,ALL,290723506,290789941,-66435,0.022846,290425796,290412151,290492110,-66314,0.022828,22547314,22547314,22547774,-460,0.00204,2023-04-21
5,2020,ALL,271605426,271653875,-48449,0.017835,271405951,271395927,271454354,-48403,0.017831,20238560,20238560,20239042,-482,0.002382,2023-04-21
10,2021,ALL,288698899,288791345,-92446,0.032011,288506737,288497809,288599077,-92340,0.031996,20042311,20042311,20044196,-1885,0.009404,2023-04-21
15,2014,ALL,505258663,505315935,-57272,0.011334,504826402,504810286,504883546,-57144,0.011318,39662807,39662807,39663323,-516,0.001301,2023-04-21
20,2019,ALL,299644157,299676211,-32054,0.010696,299394734,299383819,299426746,-32012,0.010691,21777984,21777984,21778393,-409,0.001878,2023-04-21
25,2011,ALL,605491625,605479143,12482,0.002062,604833166,604820682,604820682,12484,0.002064,47089265,47089265,47089265,0,0.0,2023-04-21
26,2012,ALL,607339141,607373049,-33908,0.005583,606721289,606707165,606755083,-33794,0.00557,47494758,47494758,47495022,-264,0.000556,2023-04-21
35,2022,ALL,132274111,132327243,-53132,0.040152,132181344,132181175,132234183,-52839,0.039959,15144527,15144527,15154145,-9618,0.063468,2023-04-21
40,2016,ALL,329529635,329515942,13693,0.004155,329200320,329186607,329186623,13697,0.004161,24725600,24725600,24725600,0,0.0,2023-04-21
45,2017,ALL,293553201,293589258,-36057,0.012281,293290364,293275774,293326352,-35988,0.012269,22248809,22248809,22249093,-284,0.001276,2023-04-21


In [18]:
df['row_count_diff_percentage'].describe()

count    60.000000
mean     50.839985
std      26.312083
min       0.002062
25%      55.208591
50%      58.161286
75%      68.909598
max      73.379011
Name: row_count_diff_percentage, dtype: float64

In [19]:
df['clm_count_percentage'].describe()

count    60.000000
mean      0.028526
std       0.070955
min       0.000987
25%       0.009099
50%       0.018172
75%       0.030717
max       0.559619
Name: clm_count_percentage, dtype: float64

In [20]:
df['mbr_count_percentage'].describe()

count    60.000000
mean      0.025437
std       0.161949
min       0.000000
25%       0.000000
50%       0.000813
75%       0.002257
max       1.254483
Name: mbr_count_percentage, dtype: float64

## Claim Type

In [26]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_claim_type_count;
    with truv_claims as (
        select year, claim_id_derv, facprof
        from truven.ccaeo
        union
        select year, claim_id_derv, facprof
        from truven.ccaes
        union
        select year, claim_id_derv, facprof
        from truven.mdcro
        union
        select year, claim_id_derv, facprof
        from truven.mdcrs
    )
select year, facprof, count(*)
into qa_reporting.truv_claim_type_count
from truv_claims
group by 1,2
    '''
    cursor.execute(query)

In [23]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_claim_type_count;
select year, claim_type, count(*)
into qa_reporting.dw_truv_claim_type_count
from dw_staging.claim_header
group by 1,2
    '''
    cursor.execute(query)

In [34]:
query = f'''
select a.year, a.claim_type, a.count as dw_claim_type_count, b.count as src_claim_type_count, 
        a.count-b.count as claim_type_count_difference, 
         100. * abs(a.count-b.count) / b.count as claim_type_count_difference_percentage
from qa_reporting.dw_truv_claim_type_count a
full outer join qa_reporting.truv_claim_type_count b
on a.year =b.year
and a.claim_type = b.facprof
'''

pd.read_sql(query, con=connection).sort_values(['year', 'claim_type'])



Unnamed: 0,year,claim_type,dw_claim_type_count,src_claim_type_count,claim_type_count_difference,claim_type_count_difference_percentage
0,2011,F,74242621,74022443,220178,0.297448
8,2011,P,531249004,530798241,450763,0.084922
1,2012,F,74154300,73946444,207856,0.28109
11,2012,P,533184841,532808641,376200,0.070607
10,2013,F,58878891,58740345,138546,0.235862
20,2013,P,429119051,428822352,296699,0.069189
16,2014,F,62955864,62858795,97069,0.154424
5,2014,P,442302799,442024753,278046,0.062903
2,2015,F,40273680,40208649,65031,0.161734
14,2015,P,280980268,280818543,161725,0.057591


## Costs

### Sum

In [None]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_total;
    with truv_claims as (
        select year, claim_id_derv, netpay, pay
        from truven.ccaeo
        union
        select year, claim_id_derv, netpay, pay
        from truven.ccaes
        union
        select year, claim_id_derv, netpay, pay
        from truven.mdcro
        union
        select year, claim_id_derv, netpay, pay
        from truven.mdcrs
    )
select year, 'ALL' as table_name, 
        sum(netpay) total_netpay,
        sum(pay) total_pay
into qa_reporting.truv_costs_total
from truv_claims
group by 1
    '''
    cursor.execute(query)

In [10]:
for table in tables:
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_total
    select year, '{table}' as table_name, sum(netpay) total_netpay, sum(pay) total_pay
    from truven.{table}
    group by 1
        '''
        cursor.execute(query)

In [3]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_total;
select year, table_id_src,
        sum(total_charge_amount) as total_charge_amount,
        sum(total_allowed_amount) as total_allowed_amount, 
        sum(total_paid_amount) as total_paid_amount
into qa_reporting.dw_truv_costs_total
from dw_staging.claim_header
group by 1,2
    '''
    cursor.execute(query)

In [4]:
with connection.cursor() as cursor:
    query = '''insert into qa_reporting.dw_truv_costs_total
select year, 'ALL',
        sum(total_charge_amount) as total_charge_amount,
        sum(total_allowed_amount) as total_allowed_amount, 
        sum(total_paid_amount) as total_paid_amount
from dw_staging.claim_header
group by 1
    '''
    cursor.execute(query)

In [17]:
query = f'''
select a.year, b.table_name, 
        a.total_paid_amount as dw_total_paid, b.total_netpay as src_total_paid, 
        a.total_paid_amount-b.total_netpay as total_paid_difference, 
        100. * abs(a.total_paid_amount-b.total_netpay) / b.total_netpay as total_paid_difference_percentage,
        a.total_allowed_amount as dw_total_allowed, b.total_pay as src_total_allowed,
        a.total_allowed_amount-b.total_pay as total_allowed_difference, 
        100. * abs(a.total_allowed_amount-b.total_pay) / b.total_pay as total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_total a
full outer join qa_reporting.truv_costs_total b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])



Unnamed: 0,year,table_name,dw_total_paid,src_total_paid,total_paid_difference,total_paid_difference_percentage,dw_total_allowed,src_total_allowed,total_allowed_difference,total_allowed_difference_percentage
32,2011,ALL,161163600000.0,153598300000.0,7565246000.0,4.925344,226711500000.0,215259700000.0,11451780000.0,5.319985
5,2011,ccaeo,97913530000.0,97930910000.0,-17378350.0,0.017746,120463900000.0,120483500000.0,-19592080.0,0.016261
22,2011,ccaes,48483420000.0,48530140000.0,-46715810.0,0.096261,53045010000.0,53095060000.0,-50041180.0,0.094248
15,2011,mdcro,9648785000.0,9647447000.0,1338554.0,0.013875,32822330000.0,32816200000.0,6131274.0,0.018684
16,2011,mdcrs,5117819000.0,5117239000.0,580440.8,0.011343,20380270000.0,20378950000.0,1317226.0,0.006464
2,2012,ALL,165692400000.0,157724900000.0,7967511000.0,5.051524,233646000000.0,221365500000.0,12280480000.0,5.547606
44,2012,ccaeo,102384900000.0,102415600000.0,-30685120.0,0.029961,127744900000.0,127782400000.0,-37446690.0,0.029305
53,2012,ccaes,49972650000.0,50013770000.0,-41128370.0,0.082234,54681430000.0,54725520000.0,-44082680.0,0.080552
40,2012,mdcro,8678780000.0,8678058000.0,722079.1,0.008321,32144520000.0,32141700000.0,2813778.0,0.008754
9,2012,mdcrs,4656050000.0,4655846000.0,204192.8,0.004386,19075080000.0,19073590000.0,1487018.0,0.007796


### Min

In [None]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_min;
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.ccaeo
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.ccaes
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.mdcro
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.mdcrs
        group by 1,2
    )
select year, 'ALL' as table_name, 
        min(netpay) min_netpay,
        min(pay) min_pay
into qa_reporting.truv_costs_min
from truv_claims
group by 1
    '''
    cursor.execute(query)

In [8]:
for table in tables[-1:]:
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_min
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.{table}
        group by 1,2
    )
    select year, '{table}' as table_name, min(netpay), min(pay) 
    from truv_claims
    group by 1
        '''
        cursor.execute(query)

In [9]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_min;
select year, table_id_src,
        min(total_charge_amount) as min_total_charge_amount,
        min(total_allowed_amount) as min_total_allowed_amount, 
        min(total_paid_amount) as min_total_paid_amount
into qa_reporting.dw_truv_costs_min
from dw_staging.claim_header
group by 1,2
    '''
    cursor.execute(query)

In [10]:
with connection.cursor() as cursor:
    query = '''insert into qa_reporting.dw_truv_costs_min
select year, 'ALL',
        min(total_charge_amount) as min_total_charge_amount,
        min(total_allowed_amount) as min_total_allowed_amount, 
        min(total_paid_amount) as min_total_paid_amount
from dw_staging.claim_header
group by 1
    '''
    cursor.execute(query)

In [14]:
query = f'''
select a.year, b.table_name, 
        a.min_total_paid_amount as dw_min_total_paid, b.min_netpay as src_min_paid, 
        a.min_total_paid_amount-b.min_netpay as min_total_paid_difference, 
        100. * abs(a.min_total_paid_amount-b.min_netpay) / b.min_netpay as min_total_paid_difference_percentage,
        a.min_total_allowed_amount as dw_min_total_allowed, b.min_pay as src_min_allowed,
        a.min_total_allowed_amount-b.min_pay as min_total_allowed_difference, 
        100. * abs(a.min_total_allowed_amount-b.min_pay) / b.min_pay as min_total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_min a
full outer join qa_reporting.truv_costs_min b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])

Unnamed: 0,year,table_name,dw_min_total_paid,src_min_paid,min_total_paid_difference,min_total_paid_difference_percentage,dw_min_total_allowed,src_min_allowed,min_total_allowed_difference,min_total_allowed_difference_percentage
14,2011,ALL,-2414961.42,-2414961.0,-7.741221e-06,-3.205526e-10,-8878220.0,-8878220.0,0.0,0.0
24,2011,ccaeo,-365085.75,-365085.8,0.0002670288,-7.314139e-08,-1598982.22,-1598982.0,0.001862793,-1.164987e-07
4,2011,ccaes,-2414961.42,-2414961.0,-7.741221e-06,-3.205526e-10,-8878220.0,-8878220.0,0.0,0.0
38,2011,mdcro,-58497.0,-58497.0,0.0,0.0,-1955955.0,-1955955.0,0.0,0.0
25,2011,mdcrs,-1985218.0,-1985218.0,0.0,0.0,-1985218.0,-1985218.0,0.0,0.0
37,2012,ALL,-3228180.67,-3228181.0,-1.709e-05,-5.294004e-10,-3228180.67,-3228181.0,-1.709e-05,-5.294004e-10
20,2012,ccaeo,-431422.47,-431422.5,-0.002592773,-6.009825e-07,-915090.0,-915090.0,0.0,0.0
39,2012,ccaes,-3228180.67,-3228181.0,-1.709e-05,-5.294004e-10,-3228180.67,-3228181.0,-1.709e-05,-5.294004e-10
12,2012,mdcro,-107614.94,-107614.9,-0.0025,-2.323098e-06,-586311.96,-720835.3,134523.3,-18.66215
36,2012,mdcrs,-859830.64,-859830.6,-6.104e-07,-7.099072e-11,-1340482.0,-1340482.0,0.0,0.0


### Max

In [4]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_max;
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.ccaeo
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.ccaes
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.mdcro
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.mdcrs
        group by 1,2
    )
select year, 'ALL' as table_name, 
        max(netpay) max_netpay,
        max(pay) max_pay
into qa_reporting.truv_costs_max
from truv_claims
group by 1
    '''
    cursor.execute(query)

In [5]:
for table in tables:
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_max
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.{table}
        group by 1,2
    )
    select year, '{table}' as table_name, max(netpay), max(pay) 
    from truv_claims
    group by 1
        '''
        cursor.execute(query)

In [6]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_max;
select year, table_id_src,
        max(total_charge_amount) as max_total_charge_amount,
        max(total_allowed_amount) as max_total_allowed_amount, 
        max(total_paid_amount) as max_total_paid_amount
into qa_reporting.dw_truv_costs_max
from dw_staging.claim_header
group by 1,2
    '''
    cursor.execute(query)

In [7]:
with connection.cursor() as cursor:
    query = '''insert into qa_reporting.dw_truv_costs_max
select year, 'ALL',
        max(total_charge_amount) as max_total_charge_amount,
        max(total_allowed_amount) as max_total_allowed_amount, 
        max(total_paid_amount) as max_total_paid_amount
from dw_staging.claim_header
group by 1
    '''
    cursor.execute(query)

In [10]:
query = '''
select a.year, b.table_name, 
        a.max_total_paid_amount as dw_max_total_paid, b.max_netpay as src_max_paid, 
        a.max_total_paid_amount-b.max_netpay as max_total_paid_difference, 
        100. * abs(a.max_total_paid_amount-b.max_netpay) / b.max_netpay as max_total_paid_difference_percentage,
        a.max_total_allowed_amount as dw_max_total_allowed, b.max_pay as src_max_allowed,
        a.max_total_allowed_amount-b.max_pay as max_total_allowed_difference, 
        100. * abs(a.max_total_allowed_amount-b.max_pay) / b.max_pay as max_total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_max a
full outer join qa_reporting.truv_costs_max b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])



Unnamed: 0,year,table_name,dw_max_total_paid,src_max_paid,max_total_paid_difference,max_total_paid_difference_percentage,dw_max_total_allowed,src_max_allowed,max_total_allowed_difference,max_total_allowed_difference_percentage
58,2011,ALL,3412327.0,59177120.0,-55764790.0,94.23371,8878220.0,63381440.0,-54503220.0,85.9924
12,2011,ccaeo,2462133.31,42340120.0,-39877980.0,94.18487,2462133.31,50000390.0,-47538260.0,95.07577
50,2011,ccaes,3412327.0,59177120.0,-55764790.0,94.23371,8878220.0,63381440.0,-54503220.0,85.9924
35,2011,mdcro,500555.0,828083.3,-327528.3,39.55258,6106155.25,6106155.0,0.0,0.0
3,2011,mdcrs,1985812.0,1985812.0,0.0,0.0,5422836.35,5422836.0,2.818058e-05,5.19665e-10
28,2012,ALL,4314090.0,56421370.0,-52107280.0,92.3538,4314090.0,60305930.0,-55991840.0,92.84633
48,2012,ccaeo,1801357.5,38293140.0,-36491790.0,95.29587,1802211.5,47719910.0,-45917700.0,96.22336
13,2012,ccaes,4314090.0,56421370.0,-52107280.0,92.3538,4314090.0,60305930.0,-55991840.0,92.84633
7,2012,mdcro,404472.16,611210.4,-206738.2,33.82439,1068762.0,1068762.0,0.0,0.0
40,2012,mdcrs,1397399.84,1397400.0,5.940128e-06,4.250844e-10,3174327.93,3174328.0,2.708564e-06,8.532717e-11


### Median

These queries take a while to run. May lose connection when executing, but the query may still be running even if connection is closed.

In [None]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_median;
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.ccaeo
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.ccaes
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.mdcro
        group by 1,2
        union
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.mdcrs
        group by 1,2
    )
select year, 'ALL' as table_name, 
        median(netpay) median_netpay,
        median(pay) median_pay
into qa_reporting.truv_costs_median
from truv_claims
group by 1
    '''
    cursor.execute(query)

In [None]:
for table in tables:
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_median
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.{table}
        group by 1,2
    )
    select year, '{table}' as table_name, median(netpay), median(pay) 
    from truv_claims
    group by 1
        '''
        cursor.execute(query)

In [None]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_median;
select year, table_id_src,
        median(total_charge_amount) as median_total_charge_amount,
        median(total_allowed_amount) as median_total_allowed_amount, 
        median(total_paid_amount) as median_total_paid_amount
into qa_reporting.dw_truv_costs_median
from dw_staging.claim_header
group by 1,2
    '''
    cursor.execute(query)

In [None]:
with connection.cursor() as cursor:
    query = '''insert into qa_reporting.dw_truv_costs_median
select year, 'ALL',
        median(total_charge_amount) as median_total_charge_amount,
        median(total_allowed_amount) as median_total_allowed_amount, 
        median(total_paid_amount) as median_total_paid_amount
from dw_staging.claim_header
group by 1
    '''
    cursor.execute(query)

In [11]:
query = f'''
select a.year, b.table_name, 
        a.median_total_paid_amount as dw_median_total_paid, b.median_netpay as src_median_paid, 
        a.median_total_paid_amount-b.median_netpay as median_total_paid_difference, 
        100. * abs(a.median_total_paid_amount-b.median_netpay) / b.median_netpay as median_total_paid_difference_percentage,
        a.median_total_allowed_amount as dw_median_total_allowed, b.median_pay as src_median_allowed,
        a.median_total_allowed_amount-b.median_pay as median_total_allowed_difference, 
        100. * abs(a.median_total_allowed_amount-b.median_pay) / b.median_pay as median_total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_median a
full outer join qa_reporting.truv_costs_median b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])



Unnamed: 0,year,table_name,dw_median_total_paid,src_median_paid,median_total_paid_difference,median_total_paid_difference_percentage,dw_median_total_allowed,src_median_allowed,median_total_allowed_difference,median_total_allowed_difference_percentage
51,2011,ALL,55.72,55.709991,0.01000854,0.01796544,95.0,95.0,0.0,0.0
54,2011,ccaeo,64.99,64.97998,0.01001953,0.01541941,93.13,93.109985,0.02001465,0.02149571
53,2011,ccaes,141.8,141.79,0.01,0.007052684,172.84,172.84,1.49015e-10,8.621559e-11
52,2011,mdcro,18.0,18.0,0.0,0.0,95.78,95.75,0.03,0.03133159
50,2011,mdcrs,24.26,24.25,0.01,0.04123711,106.7,106.7,7.450609e-10,6.982764e-10
1,2012,ALL,55.71,55.709976,2.380371e-05,4.272791e-05,97.0,97.0,0.0,0.0
4,2012,ccaeo,64.95,64.939995,0.01000519,0.01540682,95.96,95.949951,0.01004883,0.01047299
3,2012,ccaes,141.3,141.3,1.117598e-09,7.909402e-10,176.22,176.2,0.02,0.01135074
0,2012,mdcro,16.2,16.199997,3.051758e-06,1.883801e-05,96.01,96.0,0.01,0.01041667
2,2012,mdcrs,21.02,21.02,2.142038e-10,1.019047e-09,103.15,103.15,5.587992e-10,5.417346e-10


## 