# Data Warehouse Truven QA - Claim Header

## Initialization

Just loading packages that will be used and initializing connection to GP DB.

In [1]:
import pandas as pd
import sys
import psycopg2
from tqdm import tqdm
sys.path.append('H:/uth_helpers')
from db_utils import get_dsn

In [2]:
connection = psycopg2.connect(get_dsn()+' keepalives=1 keepalives_idle=30 keepalives_interval=10')
connection.autocommit = True

In [3]:
year_df = pd.read_sql('select distinct year from dw_staging.trum_claim_header;', con=connection)
tables = ['ccaes', 'mdcrs', 'mdcro', 'ccaeo']



## Row Count and Claim Count

Similar to the member_enrollment_monthly table, the claim_header table has the row count should equal the number of unique claims.

For this table, we extract claim data from the s, o, and f tables from the truven schema. 


In [4]:
query = ''' drop table if exists qa_reporting.dw_truv_claim_header_counts;
create table qa_reporting.dw_truv_claim_header_counts
(
    data_source bpchar(4),
    calendar_year int,
    table_src text,
    dw_row_count int,
    src_row_count int,
    row_count_diff int,
    row_count_diff_percentage float,
    dw_uth_clm_id_count int,
    dw_src_clm_id_count int,
    src_clm_count int,
    clm_count_diff int,
    clm_count_percentage float,
    dw_uth_mbr_id_count int,
    dw_src_mbr_id_count int,
    src_mbr_count int,
    mbr_count_diff int,
    mbr_count_percentage float,
    date_generated date
);
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [5]:
with connection.cursor() as cursor:
    for year in tqdm(year_df['year']):
        query = f'''
        insert into qa_reporting.dw_truv_claim_header_counts
        (data_source, calendar_year, table_src, dw_row_count, date_generated)
        select data_source, year, table_id_src, count(*), current_date
        from dw_staging.truc_claim_header
        where year = {year}
        group by 1, 2, 3
        '''
        
        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_clm_id_count = count
        from (
            select data_source, year, table_id_src,  count(distinct uth_claim_id) as count 
            from dw_staging.truc_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_clm_id_count = count
        from (
            select data_source, year, table_id_src,  count(distinct claim_id_src) as count 
            from dw_staging.truc_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_mbr_id_count = count
        from (
            select data_source, year, table_id_src, count(distinct uth_member_id) as count 
            from dw_staging.truc_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)
        
        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_mbr_id_count = count
        from (
            select data_source, year, table_id_src, count(distinct member_id_src) as count 
            from dw_staging.truc_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

100%|██████████| 12/12 [51:53<00:00, 259.42s/it]


In [6]:
with connection.cursor() as cursor:
    for year in tqdm(year_df['year']):
        query = f'''
        insert into qa_reporting.dw_truv_claim_header_counts
        (data_source, calendar_year, table_src, dw_row_count, date_generated)
        select data_source, year, table_id_src, count(*), current_date
        from dw_staging.trum_claim_header
        where year = {year}
        group by 1, 2, 3
        '''
        
        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_clm_id_count = count
        from (
            select data_source, year, table_id_src, count(distinct uth_claim_id) as count 
            from dw_staging.trum_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_clm_id_count = count
        from (
            select data_source, year, table_id_src, count(distinct claim_id_src) as count 
            from dw_staging.trum_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_uth_mbr_id_count = count
        from (
            select data_source, year, table_id_src, count(distinct uth_member_id) as count 
            from dw_staging.trum_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)
        
        query = f'''
        update qa_reporting.dw_truv_claim_header_counts b
        set dw_src_mbr_id_count = count
        from (
            select data_source, year, table_id_src, count(distinct member_id_src) as count 
            from dw_staging.trum_claim_header
        where year = {year}
        group by 1, 2, 3) a
        where a.year = b.calendar_year
        and a.table_id_src = b.table_src
        '''

        cursor.execute(query)

100%|██████████| 12/12 [08:23<00:00, 41.94s/it]


In [7]:
with connection.cursor() as cursor:
    query = '''update qa_reporting.dw_truv_claim_header_counts a
    set src_row_count = b.row_count,
    row_count_diff = dw_row_count - b.row_count,
    row_count_diff_percentage = 100. * abs( dw_row_count - b.row_count) / b.row_count,
    src_clm_count = clm_count,
    clm_count_diff = dw_uth_clm_id_count - b.clm_count,
    clm_count_percentage = 100. * abs(dw_uth_clm_id_count - b.clm_count) / b.clm_count,
    src_mbr_count = b.pat_count,
    mbr_count_diff = dw_uth_mbr_id_count - b.pat_count,
    mbr_count_percentage = 100. * abs(dw_uth_mbr_id_count - b.pat_count) / b.pat_count
    from qa_reporting.truven_counts b
    where a.calendar_year = b.year
    and a.table_src = b.table_name
    ;
    '''

    cursor.execute(query)

Note that the source row count from the o and s tables from truven are at detail level. This means that the row count from the individual tables are going to larger than the row count in the claim header table. Instead of comparing the row count from the DW table with the raw table, we can compare the row count with the unique claim count from the raw table.

In [8]:
df = pd.read_sql('select * from qa_reporting.dw_truv_claim_header_counts;', con=connection)
df.head()



Unnamed: 0,data_source,calendar_year,table_src,dw_row_count,src_row_count,row_count_diff,row_count_diff_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
0,truc,2020,ccaes,9530422,33760060,-24229638,71.770127,9530422,9526317,9529232,1190,0.012488,824990,824990,825450,-460,0.055727,2023-10-12
1,truc,2020,ccaeo,217781219,485549961,-267768742,55.147516,217781219,217776827,217834256,-53037,0.024347,18685492,18685492,18687285,-1793,0.009595,2023-10-12
2,truc,2018,ccaes,10975995,38984126,-28008131,71.844963,10975995,10971517,10976708,-713,0.006496,1028859,1028859,1029737,-878,0.085264,2023-10-12
3,truc,2018,ccaeo,251287085,589022269,-337735184,57.338271,251287085,251283034,251365523,-78438,0.031205,21525862,21525862,21529034,-3172,0.014734,2023-10-12
4,truc,2021,ccaes,9696994,33461744,-23764750,71.020656,9696994,9693609,9698022,-1028,0.0106,811223,811223,811797,-574,0.070707,2023-10-12


In [9]:
df[(df['data_source'] == 'trum')].sort_values(['calendar_year', 'table_src'])

Unnamed: 0,data_source,calendar_year,table_src,dw_row_count,src_row_count,row_count_diff,row_count_diff_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
45,trum,2011,mdcro,106545093,234255047,-127709954,54.517482,106545093,106542800,106577308,-32215,0.030227,4743651,4743651,4750926,-7275,0.153128,2023-10-12
43,trum,2011,mdcrs,10833438,32535953,-21702515,66.703179,10833438,10831969,10836750,-3312,0.030563,794174,794174,794571,-397,0.049964,2023-10-12
44,trum,2012,mdcro,99412248,229768218,-130355970,56.733682,99412248,99410354,99450749,-38501,0.038714,4384481,4384481,4391031,-6550,0.149168,2023-10-12
42,trum,2012,mdcrs,10013746,32200251,-22186505,68.901652,10013746,10011723,10014407,-661,0.0066,714024,714024,714226,-202,0.028282,2023-10-12
41,trum,2013,mdcro,88011596,208095580,-120083984,57.706168,88011596,88009897,88058549,-46953,0.05332,3805817,3805817,3812531,-6714,0.176103,2023-10-12
40,trum,2013,mdcrs,8710381,29045752,-20335371,70.011515,8710381,8708514,8711803,-1422,0.016323,599538,599538,599773,-235,0.039181,2023-10-12
33,trum,2014,mdcro,79484204,193877526,-114393322,59.002879,79484204,79481580,79544615,-60411,0.075946,3505530,3505530,3514364,-8834,0.251368,2023-10-12
32,trum,2014,mdcrs,7963924,26044601,-18080677,69.421977,7963924,7961307,7964806,-882,0.011074,531316,531316,531556,-240,0.04515,2023-10-12
39,trum,2015,mdcro,49709576,121473554,-71763978,59.077862,49709576,49708196,49736837,-27261,0.05481,2032351,2032351,2034639,-2288,0.112452,2023-10-12
38,trum,2015,mdcrs,5185230,16705568,-11520338,68.961067,5185230,5183316,5184315,915,0.017649,336223,336223,336283,-60,0.017842,2023-10-12


In [11]:
df['clm_count_percentage'].describe()

count    48.000000
mean      0.036420
std       0.042563
min       0.001371
25%       0.012224
50%       0.028283
75%       0.051330
max       0.281574
Name: clm_count_percentage, dtype: float64

In [12]:
df['mbr_count_percentage'].describe()

count    48.000000
mean      0.078722
std       0.097704
min       0.008867
25%       0.027043
50%       0.050361
75%       0.090472
max       0.630042
Name: mbr_count_percentage, dtype: float64

## Claim Type

In [13]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_claim_type_count;
    with truv_claims as (
        select 'mdcro' as table_src, year, claim_id_derv, facprof
        from truven.mdcro
        union
        select 'mdcrs' as table_src, year, claim_id_derv, facprof
        from truven.mdcrs
    )
select table_src, year, facprof, count(*)
into qa_reporting.truv_claim_type_count
from truv_claims
group by 1,2,3
    '''
    cursor.execute(query)


    query = '''
    with truv_claims as (
        select 'ccaeo' as table_src, year, claim_id_derv, facprof
        from truven.ccaeo
        union
        select 'ccaes' as table_src, year, claim_id_derv, facprof
        from truven.ccaes
    )
    insert into qa_reporting.truv_claim_type_count
select table_src, year, facprof, count(*)
from truv_claims
group by 1,2,3
    '''
    cursor.execute(query)


In [14]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_claim_type_count;
select table_id_src, year, claim_type, count(*)
into qa_reporting.dw_truv_claim_type_count
from dw_staging.trum_claim_header
group by 1,2,3
    '''
    cursor.execute(query)

    query = '''
insert into qa_reporting.dw_truv_claim_type_count
select table_id_src, year, claim_type, count(*)
from dw_staging.truc_claim_header
group by 1,2,3
    '''
    cursor.execute(query)

In [15]:
query = f'''
select a.table_id_src, a.year, a.claim_type, a.count as dw_claim_type_count, b.count as src_claim_type_count, 
        a.count-b.count as claim_type_count_difference, 
         100. * abs(a.count-b.count) / b.count as claim_type_count_difference_percentage
from qa_reporting.dw_truv_claim_type_count a
full outer join qa_reporting.truv_claim_type_count b
on a.year = b.year
and a.claim_type = b.facprof
and a.table_id_src = b.table_src
'''

pd.read_sql(query, con=connection).sort_values(['year', 'claim_type'])



Unnamed: 0,table_id_src,year,claim_type,dw_claim_type_count,src_claim_type_count,claim_type_count_difference,claim_type_count_difference_percentage
10,ccaeo,2011,F,55727659,55741449,-13790,0.024739
12,ccaes,2011,F,3745968,3746955,-987,0.026341
17,mdcrs,2011,F,1249943,1250249,-306,0.024475
86,mdcro,2011,F,13497693,13501590,-3897,0.028863
38,mdcro,2011,P,93047400,93075720,-28320,0.030427
...,...,...,...,...,...,...,...
82,mdcrs,2022,F,328757,328811,-54,0.016423
24,mdcro,2022,P,32313223,32320811,-7588,0.023477
33,ccaeo,2022,P,195779160,195870754,-91594,0.046762
35,ccaes,2022,P,6821150,6838761,-17611,0.257517


In [16]:
pd.read_sql(query, con=connection).sort_values(['claim_type_count_difference_percentage'])



Unnamed: 0,table_id_src,year,claim_type,dw_claim_type_count,src_claim_type_count,claim_type_count_difference,claim_type_count_difference_percentage
44,mdcrs,2021,P,3330480,3330440,40,0.001201
88,ccaes,2019,F,1664428,1664398,30,0.001802
92,mdcrs,2016,P,4284431,4284519,-88,0.002054
2,mdcrs,2017,P,3094575,3094470,105,0.003393
80,mdcrs,2016,F,528335,528315,20,0.003786
...,...,...,...,...,...,...,...
45,ccaeo,2022,F,22297151,22319856,-22705,0.101726
41,ccaeo,2017,F,28361538,28391983,-30445,0.107231
37,mdcro,2017,F,5081956,5088331,-6375,0.125287
38,ccaes,2022,P,6821150,6838761,-17611,0.257517


## Costs

### Sum

In [17]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_total;

    create table qa_reporting.truv_costs_total
    (
        year int,
        table_name text,
        total_netpay bigint,
        total_pay bigint
    );
    '''
    cursor.execute(query)

In [18]:
for table in tables:
    print(table)
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_total
    select year, '{table}' as table_name, sum(netpay) total_netpay, sum(pay) total_pay
    from truven.{table}
    group by 1
        '''
        cursor.execute(query)

ccaes
mdcrs
mdcro
ccaeo


In [19]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_total;
select year, table_id_src,
        sum(total_charge_amount) as total_charge_amount,
        sum(total_allowed_amount) as total_allowed_amount, 
        sum(total_paid_amount) as total_paid_amount
into qa_reporting.dw_truv_costs_total
from dw_staging.trum_claim_header
group by 1,2
    '''
    cursor.execute(query)

    query = '''
insert into qa_reporting.dw_truv_costs_total
select year, table_id_src,
        sum(total_charge_amount) as total_charge_amount,
        sum(total_allowed_amount) as total_allowed_amount, 
        sum(total_paid_amount) as total_paid_amount
from dw_staging.truc_claim_header
group by 1,2
    '''
    cursor.execute(query)

In [20]:
query = f'''
select a.year, b.table_name, 
        a.total_paid_amount as dw_total_paid, b.total_netpay as src_total_paid, 
        a.total_paid_amount-b.total_netpay as total_paid_difference, 
        100. * abs(a.total_paid_amount-b.total_netpay) / b.total_netpay as total_paid_difference_percentage,
        a.total_allowed_amount as dw_total_allowed, b.total_pay as src_total_allowed,
        a.total_allowed_amount-b.total_pay as total_allowed_difference, 
        100. * abs(a.total_allowed_amount-b.total_pay) / b.total_pay as total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_total a
full outer join qa_reporting.truv_costs_total b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])



Unnamed: 0,year,table_name,dw_total_paid,src_total_paid,total_paid_difference,total_paid_difference_percentage,dw_total_allowed,src_total_allowed,total_allowed_difference,total_allowed_difference_percentage
14,2011,ccaeo,97890230000.0,97930912848,-40678170.0,0.041538,120433100000.0,120483500000.0,-50352810.0,0.041792
18,2011,ccaes,48461680000.0,48530139946,-68455730.0,0.141058,53019780000.0,53095060000.0,-75271570.0,0.141768
19,2011,mdcro,9643964000.0,9647446643,-3482661.0,0.036099,32813720000.0,32816200000.0,-2478564.0,0.007553
17,2011,mdcrs,5114301000.0,5117238523,-2937408.0,0.057402,20373080000.0,20378950000.0,-5871361.0,0.028811
12,2012,ccaeo,102370200000.0,102415611581,-45449300.0,0.044377,127724700000.0,127782400000.0,-57650250.0,0.045116
15,2012,ccaes,49963980000.0,50013773704,-49795540.0,0.099564,54671100000.0,54725520000.0,-54417530.0,0.099437
16,2012,mdcro,8674936000.0,8678058239,-3122601.0,0.035983,32137510000.0,32141700000.0,-4194030.0,0.013049
13,2012,mdcrs,4654027000.0,4655845817,-1818886.0,0.039067,19071190000.0,19073590000.0,-2397466.0,0.01257
40,2013,ccaeo,82506500000.0,82548547983,-42047810.0,0.050937,102840000000.0,102889800000.0,-49850580.0,0.04845
43,2013,ccaes,40898560000.0,40959390364,-60830870.0,0.148515,44649480000.0,44714320000.0,-64843520.0,0.145017


### Min

In [21]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_min;

    create table qa_reporting.truv_costs_min
    (
        year int,
        table_name text,
        min_netpay bigint,
        min_pay bigint
    );
    '''
    cursor.execute(query)

In [22]:
for table in tables[:]:
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_min
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.{table}
        group by 1,2
    )
    select year, '{table}' as table_name, min(netpay), min(pay) 
    from truv_claims
    group by 1
        '''
        cursor.execute(query)

In [23]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_min;
select year, table_id_src,
        min(total_charge_amount) as min_total_charge_amount,
        min(total_allowed_amount) as min_total_allowed_amount, 
        min(total_paid_amount) as min_total_paid_amount
into qa_reporting.dw_truv_costs_min
from dw_staging.trum_claim_header
group by 1,2
    '''
    cursor.execute(query)

    query = '''
insert into qa_reporting.dw_truv_costs_min
select year, table_id_src,
        min(total_charge_amount) as min_total_charge_amount,
        min(total_allowed_amount) as min_total_allowed_amount, 
        min(total_paid_amount) as min_total_paid_amount
from dw_staging.truc_claim_header
group by 1,2
    '''
    cursor.execute(query)

In [24]:
query = f'''
select a.year, b.table_name, 
        a.min_total_paid_amount as dw_min_total_paid, b.min_netpay as src_min_paid, 
        a.min_total_paid_amount-b.min_netpay as min_total_paid_difference, 
        100. * abs(a.min_total_paid_amount-b.min_netpay) / b.min_netpay as min_total_paid_difference_percentage,
        a.min_total_allowed_amount as dw_min_total_allowed, b.min_pay as src_min_allowed,
        a.min_total_allowed_amount-b.min_pay as min_total_allowed_difference, 
        100. * abs(a.min_total_allowed_amount-b.min_pay) / b.min_pay as min_total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_min a
full outer join qa_reporting.truv_costs_min b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])



Unnamed: 0,year,table_name,dw_min_total_paid,src_min_paid,min_total_paid_difference,min_total_paid_difference_percentage,dw_min_total_allowed,src_min_allowed,min_total_allowed_difference,min_total_allowed_difference_percentage
31,2011,ccaeo,-365085.75,-365086,0.25,-6.8e-05,-1598982.22,-1598982.0,-0.22,-1.4e-05
28,2011,ccaes,-2414961.42,-2414961,-0.42,-1.7e-05,-8878220.0,-8878220.0,0.0,0.0
29,2011,mdcro,-58497.0,-58497,0.0,0.0,-1955955.0,-1955955.0,0.0,0.0
26,2011,mdcrs,-1985218.0,-1985218,0.0,0.0,-1985218.0,-1985218.0,0.0,0.0
30,2012,ccaeo,-431422.47,-431422,-0.47,-0.000109,-915090.0,-915090.0,0.0,0.0
25,2012,ccaes,-3228180.67,-3228181,0.33,-1e-05,-3228180.67,-3228181.0,0.33,-1e-05
27,2012,mdcro,-107614.94,-107615,0.06,-5.6e-05,-586311.96,-720835.0,134523.04,-18.662113
24,2012,mdcrs,-859830.64,-859831,0.36,-4.2e-05,-1340482.0,-1340482.0,0.0,0.0
6,2013,ccaeo,-628859.17,-628859,-0.17,-2.7e-05,-763213.5,-763214.0,0.5,-6.6e-05
5,2013,ccaes,-2500213.33,-2500213,-0.33,-1.3e-05,-2500213.33,-2500213.0,-0.33,-1.3e-05


### Max

In [25]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_max;

    create table qa_reporting.truv_costs_max
    (
        year int,
        table_name text,
        max_netpay bigint,
        max_pay bigint
    );
    '''
    cursor.execute(query)

In [26]:
for table in tables:
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_max
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.{table}
        group by 1,2
    )
    select year, '{table}' as table_name, max(netpay), max(pay) 
    from truv_claims
    group by 1
        '''
        cursor.execute(query)

In [27]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_max;
select year, table_id_src,
        max(total_charge_amount) as max_total_charge_amount,
        max(total_allowed_amount) as max_total_allowed_amount, 
        max(total_paid_amount) as max_total_paid_amount
into qa_reporting.dw_truv_costs_max
from dw_staging.trum_claim_header
group by 1,2
    '''
    cursor.execute(query)

    query = '''
insert into qa_reporting.dw_truv_costs_max
select year, table_id_src,
        max(total_charge_amount) as max_total_charge_amount,
        max(total_allowed_amount) as max_total_allowed_amount, 
        max(total_paid_amount) as max_total_paid_amount
from dw_staging.truc_claim_header
group by 1,2
    '''
    cursor.execute(query)

In [28]:
query = '''
select a.year, b.table_name, 
        a.max_total_paid_amount as dw_max_total_paid, b.max_netpay as src_max_paid, 
        a.max_total_paid_amount-b.max_netpay as max_total_paid_difference, 
        100. * abs(a.max_total_paid_amount-b.max_netpay) / b.max_netpay as max_total_paid_difference_percentage,
        a.max_total_allowed_amount as dw_max_total_allowed, b.max_pay as src_max_allowed,
        a.max_total_allowed_amount-b.max_pay as max_total_allowed_difference, 
        100. * abs(a.max_total_allowed_amount-b.max_pay) / b.max_pay as max_total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_max a
full outer join qa_reporting.truv_costs_max b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])



Unnamed: 0,year,table_name,dw_max_total_paid,src_max_paid,max_total_paid_difference,max_total_paid_difference_percentage,dw_max_total_allowed,src_max_allowed,max_total_allowed_difference,max_total_allowed_difference_percentage
20,2011,ccaeo,2462133.31,42340117,-39877983.69,94.184869,2462133.31,50000389.0,-47538255.69,95.07577
21,2011,ccaes,3412327.0,59177118,-55764791.0,94.233705,8878220.0,63381441.0,-54503221.0,85.9924
19,2011,mdcro,500555.0,828083,-327528.0,39.552557,6106155.25,6106155.0,0.25,4.09423e-06
18,2011,mdcrs,1985812.0,1985812,0.0,0.0,5422836.35,5422836.0,0.35,6.454187e-06
16,2012,ccaeo,1801357.5,38293144,-36491786.5,95.295875,1802211.5,47719911.0,-45917699.5,96.22336
17,2012,ccaes,4314090.0,56421370,-52107280.0,92.353801,4314090.0,60305929.0,-55991839.0,92.84633
23,2012,mdcro,404472.16,611210,-206737.84,33.824355,1068762.0,1068762.0,0.0,0.0
22,2012,mdcrs,1397399.84,1397400,-0.16,1.1e-05,3174327.93,3174328.0,-0.07,2.205191e-06
43,2013,ccaeo,1975256.41,33904695,-31929438.59,94.174092,1975256.41,40530165.0,-38554908.59,95.12645
42,2013,ccaes,4922667.0,59838224,-54915557.0,91.773374,4922667.0,63465750.0,-58543083.0,92.24358


### Median

These queries take a while to run. May lose connection when executing, but the query may still be running even if connection is closed.

In [29]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.truv_costs_median;

    create table qa_reporting.truv_costs_median
    (
        year int,
        table_name text,
        median_netpay bigint,
        median_pay bigint
    );
    '''
    cursor.execute(query)

In [30]:
for table in tables:
    print(table)
    with connection.cursor() as cursor:
        query = f'''insert into qa_reporting.truv_costs_median
    with truv_claims as (
        select year, claim_id_derv, sum(netpay) netpay, sum(pay) pay
        from truven.{table}
        group by 1,2
    )
    select year, '{table}' as table_name, median(netpay), median(pay) 
    from truv_claims
    group by 1
        '''
        cursor.execute(query)

ccaes
mdcrs
mdcro
ccaeo


In [31]:
with connection.cursor() as cursor:
    query = '''drop table if exists qa_reporting.dw_truv_costs_median;
select year, table_id_src,
        median(total_charge_amount) as median_total_charge_amount,
        median(total_allowed_amount) as median_total_allowed_amount, 
        median(total_paid_amount) as median_total_paid_amount
into qa_reporting.dw_truv_costs_median
from dw_staging.trum_claim_header
group by 1,2
    '''
    cursor.execute(query)

    query = '''
insert into qa_reporting.dw_truv_costs_median
select year, table_id_src,
        median(total_charge_amount) as median_total_charge_amount,
        median(total_allowed_amount) as median_total_allowed_amount, 
        median(total_paid_amount) as median_total_paid_amount
from dw_staging.truc_claim_header
group by 1,2
    '''
    cursor.execute(query)

In [32]:
query = f'''
select a.year, b.table_name, 
        a.median_total_paid_amount as dw_median_total_paid, b.median_netpay as src_median_paid, 
        a.median_total_paid_amount-b.median_netpay as median_total_paid_difference, 
        100. * abs(a.median_total_paid_amount-b.median_netpay) / b.median_netpay as median_total_paid_difference_percentage,
        a.median_total_allowed_amount as dw_median_total_allowed, b.median_pay as src_median_allowed,
        a.median_total_allowed_amount-b.median_pay as median_total_allowed_difference, 
        100. * abs(a.median_total_allowed_amount-b.median_pay) / b.median_pay as median_total_allowed_difference_percentage
from qa_reporting.dw_truv_costs_median a
full outer join qa_reporting.truv_costs_median b
on a.year =b.year
and a.table_id_src = b.table_name
'''

pd.read_sql(query, con=connection).sort_values(['year', 'table_name'])



Unnamed: 0,year,table_name,dw_median_total_paid,src_median_paid,median_total_paid_difference,median_total_paid_difference_percentage,dw_median_total_allowed,src_median_allowed,median_total_allowed_difference,median_total_allowed_difference_percentage
45,2011,ccaeo,65.0,65,0.0,0.0,93.13,93.0,0.13,0.139785
34,2011,ccaes,141.82,142,-0.18,0.126761,172.84,173.0,-0.16,0.092486
25,2011,mdcro,18.0,18,0.0,0.0,95.79,96.0,-0.21,0.21875
43,2011,mdcrs,24.26,24,0.26,1.083333,106.7,107.0,-0.3,0.280374
12,2012,ccaeo,64.95,65,-0.05,0.076923,95.96,96.0,-0.04,0.041667
40,2012,ccaes,141.32,141,0.32,0.22695,176.24,176.0,0.24,0.136364
2,2012,mdcro,16.2,16,0.2,1.25,96.03,96.0,0.03,0.03125
20,2012,mdcrs,21.01,21,0.01,0.047619,103.16,103.0,0.16,0.15534
7,2013,ccaeo,65.0,65,0.0,0.0,96.27,96.0,0.27,0.28125
1,2013,ccaes,147.79,148,-0.21,0.141892,183.09,183.0,0.09,0.04918
