# Data Warehouse Truven QA - Pharmacy Claims

## Initialization

Just loading packages that will be used and initializing connection to GP DB.

In [1]:
import pandas as pd
import sys
import psycopg2
from tqdm import tqdm
sys.path.append('H:/uth_helpers')
from db_utils import get_dsn

In [2]:
connection = psycopg2.connect(get_dsn()+' keepalives=1 keepalives_idle=30 keepalives_interval=10')
connection.autocommit = True

## Row Counts and Claim Counts

In [3]:
query = ''' drop table if exists qa_reporting.dw_truv_pharmacy_claims_counts;
create table qa_reporting.dw_truv_pharmacy_claims_counts
(
    data_source bpchar(4),
    calendar_year int,
    table_src text,
    dw_row_count bigint,
    src_row_count bigint,
    row_count_diff bigint,
    row_count_diff_percentage float,
    dw_uth_clm_id_count bigint,
    dw_src_clm_id_count bigint,
    src_clm_count bigint,
    clm_count_diff bigint,
    clm_count_percentage float,
    dw_uth_mbr_id_count bigint,
    dw_src_mbr_id_count bigint,
    src_mbr_count bigint,
    mbr_count_diff bigint,
    mbr_count_percentage float,
    date_generated date
);
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [4]:
with connection.cursor() as cursor:
    query = f'''
    insert into qa_reporting.dw_truv_pharmacy_claims_counts
    (data_source, calendar_year, table_src, dw_row_count, dw_uth_clm_id_count, date_generated)
    select data_source, year, table_id_src, count(*), count(distinct uth_rx_claim_id), current_date
    from dw_staging.trum_pharmacy_claims a
    group by 1,2,3
    '''
    
    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_truv_pharmacy_claims_counts b
    set dw_src_clm_id_count = count
    from (
        select data_source, year, table_id_src, count(distinct rx_claim_id_src) as count 
        from dw_staging.trum_pharmacy_claims
    group by 1,2,3) a
    where a.year = b.calendar_year
    and b.table_src = a.table_id_src
    and a.data_source = b.data_source
    '''

    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_truv_pharmacy_claims_counts b
    set dw_uth_mbr_id_count = count
    from (
        select data_source, year, table_id_src, count(distinct uth_member_id) as count 
        from dw_staging.trum_pharmacy_claims
    group by 1,2,3) a
    where a.year = b.calendar_year
    and b.table_src = a.table_id_src
    and a.data_source = b.data_source
    '''

    cursor.execute(query)
    
    query = f'''
    update qa_reporting.dw_truv_pharmacy_claims_counts b
    set dw_src_mbr_id_count = count
    from (
        select data_source, year, table_id_src, count(distinct member_id_src) as count 
        from dw_staging.trum_pharmacy_claims
    group by 1,2,3) a
    where a.year = b.calendar_year
    and b.table_src = a.table_id_src
    and a.data_source = b.data_source
    '''

    cursor.execute(query)

In [5]:
with connection.cursor() as cursor:
    query = f'''
    insert into qa_reporting.dw_truv_pharmacy_claims_counts
    (data_source, calendar_year, table_src, dw_row_count, dw_uth_clm_id_count, date_generated)
    select data_source, year, table_id_src, count(*), count(distinct uth_rx_claim_id), current_date
    from dw_staging.truc_pharmacy_claims a
    group by 1,2,3
    '''
    
    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_truv_pharmacy_claims_counts b
    set dw_src_clm_id_count = count
    from (
        select data_source, year, table_id_src, count(distinct rx_claim_id_src) as count 
        from dw_staging.truc_pharmacy_claims
    group by 1,2,3) a
    where a.year = b.calendar_year
    and b.table_src = a.table_id_src
    and a.data_source = b.data_source
    '''

    cursor.execute(query)

    query = f'''
    update qa_reporting.dw_truv_pharmacy_claims_counts b
    set dw_uth_mbr_id_count = count
    from (
        select data_source, year, table_id_src, count(distinct uth_member_id) as count 
        from dw_staging.truc_pharmacy_claims
    group by 1,2,3) a
    where a.year = b.calendar_year
    and b.table_src = a.table_id_src
    and a.data_source = b.data_source
    '''

    cursor.execute(query)
    
    query = f'''
    update qa_reporting.dw_truv_pharmacy_claims_counts b
    set dw_src_mbr_id_count = count
    from (
        select data_source, year, table_id_src, count(distinct member_id_src) as count 
        from dw_staging.truc_pharmacy_claims
    group by 1,2,3) a
    where a.year = b.calendar_year
    and b.table_src = a.table_id_src
    and a.data_source = b.data_source
    '''

    cursor.execute(query)

In [6]:
query = '''update qa_reporting.dw_truv_pharmacy_claims_counts a
    set src_row_count = b.row_count,
    row_count_diff = a.dw_row_count - b.row_count,
    row_count_diff_percentage = 100. * abs(a.dw_row_count - b.row_count) / b.row_count,
    src_clm_count = b.clm_count,
    clm_count_diff = a.dw_uth_clm_id_count - b.clm_count,
    clm_count_percentage = 100. * abs(a.dw_uth_clm_id_count - b.clm_count) / b.clm_count,
    src_mbr_count = b.pat_count,
    mbr_count_diff = a.dw_uth_mbr_id_count - b.pat_count,   
    mbr_count_percentage = 100. * abs(a.dw_uth_mbr_id_count - b.pat_count) / b.pat_count
    from (select * from qa_reporting.truven_counts where table_name in('ccaed', 'mdcrd')) b
    where a.calendar_year = b.year
    and a.table_src = b.table_name
    ;'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [8]:
with connection.cursor() as cursor:
    # Note that this query will run for a long time
    query = '''
    with truven_claims as (
            select year, enrolid, rx_id_src
            from staging_clean.ccaed_etl
            union all
            select year, enrolid, rx_id_src
            from staging_clean.mdcrd_etl
        ),
        truven_claims_counts as (
            select year as calendar_year, count(*) as src_row_count, 
                    count(distinct enrolid) src_mbr_count,
                    count(distinct rx_id_src) src_clm_count
            from truven_claims
            group by 1
        )
    update qa_reporting.dw_truv_pharmacy_claims_counts a
    set src_row_count = b.src_row_count,
    row_count_diff = a.dw_row_count - b.src_row_count,
    row_count_diff_percentage = 100. * abs(a.dw_row_count - b.src_row_count) / b.src_row_count,
    src_clm_count = b.src_clm_count,
    clm_count_diff = a.dw_uth_clm_id_count - b.src_clm_count,
    clm_count_percentage = 100. * abs(a.dw_uth_clm_id_count - b.src_clm_count) / b.src_clm_count,
    src_mbr_count = b.src_mbr_count,
    mbr_count_diff = a.dw_uth_mbr_id_count - b.src_mbr_count,   
    mbr_count_percentage = 100. * abs(a.dw_uth_mbr_id_count - b.src_mbr_count) / b.src_mbr_count
    from truven_claims_counts b
    where a.calendar_year = b.calendar_year
    and a.table_src = 'ALL'
    ;
    '''

    cursor.execute(query)

In [9]:
pd.read_sql('select * from qa_reporting.dw_truv_pharmacy_claims_counts', con=connection).sort_values(['row_count_diff_percentage', 'clm_count_percentage', 'mbr_count_percentage'])



Unnamed: 0,data_source,calendar_year,table_src,dw_row_count,src_row_count,row_count_diff,row_count_diff_percentage,dw_uth_clm_id_count,dw_src_clm_id_count,src_clm_count,clm_count_diff,clm_count_percentage,dw_uth_mbr_id_count,dw_src_mbr_id_count,src_mbr_count,mbr_count_diff,mbr_count_percentage,date_generated
14,trum,2020,mdcrd,43885459,43901901,-16442,0.037452,43150185,43150185,43155669,-5484,0.012707,1561746,1561746,1563493,-1747,0.111737,2023-10-13
19,trum,2019,mdcrd,43550538,43567923,-17385,0.039903,42623686,42623686,42628776,-5090,0.01194,1503626,1503626,1505037,-1411,0.093752,2023-10-13
20,trum,2012,mdcrd,90932397,90968863,-36466,0.040086,90278973,90278973,90305643,-26670,0.029533,3146998,3146998,3153272,-6274,0.198968,2023-10-13
12,trum,2021,mdcrd,34978211,34994521,-16310,0.046607,34025441,34025441,34030942,-5501,0.016165,1267802,1267802,1269364,-1562,0.123054,2023-10-13
13,trum,2018,mdcrd,30634642,30650133,-15491,0.050541,29531866,29531866,29537128,-5262,0.017815,1017094,1017094,1018570,-1476,0.144909,2023-10-13
17,trum,2015,mdcrd,57061840,57094378,-32538,0.05699,56664517,56664517,56681262,-16745,0.029542,1880679,1880679,1883600,-2921,0.155075,2023-10-13
18,trum,2022,mdcrd,37085100,37106643,-21543,0.058057,36048950,36048950,36060087,-11137,0.030885,1383478,1383478,1386834,-3356,0.24199,2023-10-13
15,trum,2011,mdcrd,98880446,98938042,-57596,0.058214,98335713,98335713,98371093,-35380,0.035966,3392183,3392183,3399703,-7520,0.221196,2023-10-13
22,trum,2013,mdcrd,81845763,81898656,-52893,0.064583,81554952,81554952,81589584,-34632,0.042447,2875298,2875298,2881673,-6375,0.221226,2023-10-13
8,truc,2019,ccaed,195893709,196023413,-129704,0.066168,194914404,194914404,194924194,-9790,0.005022,16274541,16274541,16276029,-1488,0.009142,2023-10-13


## NDC

In [10]:
query = '''drop table if exists qa_reporting.dw_truv_ndc_count;
select data_source, year, ndc, count(*)
into qa_reporting.dw_truv_ndc_count
from dw_staging.trum_pharmacy_claims
group by 1,2,3;

insert into qa_reporting.dw_truv_ndc_count
select data_source, year, ndc, count(*)
from dw_staging.truc_pharmacy_claims
group by 1,2,3'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [11]:
ndc_df = pd.read_sql('select * from qa_reporting.dw_truv_ndc_count;', con=connection)
ndc_df



Unnamed: 0,data_source,year,ndc,count
0,trum,2017,50428695912,95
1,trum,2014,24090049684,415
2,trum,2022,42799092201,2599
3,trum,2015,57664011688,337
4,trum,2021,25021046274,6
...,...,...,...,...
957166,truc,2015,68094017162,107
957167,truc,2012,49769042128,8
957168,truc,2013,00093304401,1
957169,truc,2018,00185093997,1957


In [12]:
ndc_df.groupby(['data_source', 'year'])['count'].sum()

data_source  year
truc         2011    365260756
             2012    362021168
             2013    283452852
             2014    302606303
             2015    230311386
             2016    235433474
             2017    215140764
             2018    210009558
             2019    195893709
             2020    177916186
             2021    179782645
             2022    166553322
trum         2011     98880446
             2012     90932397
             2013     81845763
             2014     71110384
             2015     57061840
             2016     56357005
             2017     39145600
             2018     30634642
             2019     43550538
             2020     43885459
             2021     34978211
             2022     37085100
Name: count, dtype: int64

In [13]:
query = '''
select a.*
from qa_reporting.dw_truv_ndc_count a
left join reference_tables.redbook b
on a.ndc = b.ndcnum
where b.ndcnum is null;
'''

missing_ndc_df = pd.read_sql(query, con=connection)
missing_ndc_df



Unnamed: 0,data_source,year,ndc,count
0,trum,2022,67877059005,5689
1,trum,2016,11822511150,40
2,trum,2011,46122001734,1
3,truc,2013,41163025694,1
4,truc,2022,63323001203,1
...,...,...,...,...
142940,trum,2019,69336040230,376
142941,trum,2014,43353054660,4
142942,truc,2012,54868568400,1
142943,truc,2012,43353054660,7


In [14]:
missing_ndc_df.groupby('year')['count'].sum()

year
2011     1581706
2012     1435813
2013     1108933
2014     1318829
2015      875019
2016      996356
2017      930588
2018      841213
2019     2194205
2020    10787594
2021    33022328
2022    39210957
Name: count, dtype: int64

In [15]:
ndc_comp_df = pd.DataFrame({'overall_count': ndc_df.groupby('year')['count'].sum(),
                            'invalid_count': missing_ndc_df.groupby('year')['count'].sum(),
                            'valid_count': ndc_df.groupby('year')['count'].sum() - missing_ndc_df.groupby('year')['count'].sum()})
ndc_comp_df.loc[ndc_comp_df['valid_count'].isna(),'valid_count'] = ndc_comp_df.loc[ndc_comp_df['valid_count'].isna(),'overall_count']
ndc_comp_df['valid_count'] =  ndc_comp_df['valid_count'].astype(int)
ndc_comp_df['invalid_to_valid_percent'] = 100. * ndc_comp_df['invalid_count'] / ndc_comp_df['valid_count']
ndc_comp_df

Unnamed: 0_level_0,overall_count,invalid_count,valid_count,invalid_to_valid_percent
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2011,464141202,1581706,462559496,0.341946
2012,452953565,1435813,451517752,0.317997
2013,365298615,1108933,364189682,0.304493
2014,373716687,1318829,372397858,0.354145
2015,287373226,875019,286498207,0.305419
2016,291790479,996356,290794123,0.342633
2017,254286364,930588,253355776,0.367305
2018,240644200,841213,239802987,0.350793
2019,239444247,2194205,237250042,0.924849
2020,221801645,10787594,211014051,5.112263


## Days Supply

In [16]:
query = '''drop table if exists qa_reporting.dw_truv_rx_days_supply;
create table qa_reporting.dw_truv_rx_days_supply
(
    data_source bpchar(4),
    year int,
    dw_min_days_supply numeric,
    dw_median_days_supply numeric,
    dw_max_days_supply numeric,
    dw_avg_days_supply numeric,
    src_min_days_supply numeric,
    src_median_days_supply numeric,
    src_max_days_supply numeric,
    src_avg_days_supply numeric,
    min_days_supply_diff numeric,
    median_days_supply_diff numeric,
    max_days_supply_diff numeric,
    avg_days_supply_diff numeric
);
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [17]:
with connection.cursor() as cursor:
    query = '''
    insert into qa_reporting.dw_truv_rx_days_supply
    (data_source, year, dw_min_days_supply)
    select data_source, year, min(days_supply)
    from dw_staging.trum_pharmacy_claims
    group by 1,2
    '''

    cursor.execute(query)


    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set dw_median_days_supply = n
    from (select data_source, year, median(days_supply) n
    from dw_staging.trum_pharmacy_claims
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set dw_max_days_supply = n
    from (select data_source, year, max(days_supply) n
    from dw_staging.trum_pharmacy_claims
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set dw_avg_days_supply = n
    from (select data_source, year, avg(days_supply) n
    from dw_staging.trum_pharmacy_claims
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

In [18]:
with connection.cursor() as cursor:
    query = '''
    insert into qa_reporting.dw_truv_rx_days_supply
    (data_source, year, dw_min_days_supply)
    select data_source, year, min(days_supply)
    from dw_staging.truc_pharmacy_claims
    group by 1,2
    '''

    cursor.execute(query)


    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set dw_median_days_supply = n
    from (select data_source, year, median(days_supply) n
    from dw_staging.truc_pharmacy_claims
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set dw_max_days_supply = n
    from (select data_source, year, max(days_supply) n
    from dw_staging.truc_pharmacy_claims
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set dw_avg_days_supply = n
    from (select data_source, year, avg(days_supply) n
    from dw_staging.truc_pharmacy_claims
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

In [None]:
with connection.cursor() as cursor:
    query ='''drop table if exists dev.ip_truven_rx_daysupp;

create table dev.ip_truven_rx_daysupp
with (
appendonly=true, 
orientation=row, 
compresstype=zlib, 
compresslevel=5 
)
as 
select 'truc' as data_source, year, enrolid, (enrolid::text || ndcnum::text || svcdate::text) as rx_id_derv, daysupp
from truven.ccaed
distributed by (rx_id_derv);
    '''

    cursor.execute(query)

    query = '''insert into dev.ip_truven_rx_daysupp
select 'trum', year, enrolid, (enrolid::text || ndcnum::text || svcdate::text) as rx_id_derv, daysupp
from truven.mdcrd;
    '''

    cursor.execute(query)

    query= '''vacuum analyze dev.ip_truven_rx_daysupp;'''

    cursor.execute(query)

In [None]:
with connection.cursor() as cursor:
    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set src_min_days_supply = n
    from (select data_source, year, min(daysupp) n
    from dev.ip_truven_rx_daysupp
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)


    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set src_median_days_supply = n
    from (select data_source, year,  median(daysupp) n
    from dev.ip_truven_rx_daysupp
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set src_max_days_supply = n
    from (select data_source, year,  max(daysupp) n
    from dev.ip_truven_rx_daysupp
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

    query = '''
    update qa_reporting.dw_truv_rx_days_supply a
    set src_avg_days_supply = n
    from (select data_source, year,  avg(daysupp) n
    from dev.ip_truven_rx_daysupp
    group by 1,2) b
    where a.year = b.year
    and a.data_source = b.data_source;
    '''

    cursor.execute(query)

In [None]:
with connection.cursor() as cursor:
    query = '''
    update qa_reporting.dw_truv_rx_days_supply
    set min_days_supply_diff = dw_min_days_supply - src_min_days_supply,
    median_days_supply_diff = dw_median_days_supply - src_median_days_supply,
    max_days_supply_diff = dw_max_days_supply - src_max_days_supply,
    avg_days_supply_diff = dw_avg_days_supply - src_avg_days_supply
    ;
    '''

    cursor.execute(query)

In [None]:
df = pd.read_sql('select * from qa_reporting.dw_truv_rx_days_supply;', con=connection)
df



Unnamed: 0,data_source,year,dw_min_days_supply,dw_median_days_supply,dw_max_days_supply,dw_avg_days_supply,src_min_days_supply,src_median_days_supply,src_max_days_supply,src_avg_days_supply,min_days_supply_diff,median_days_supply_diff,max_days_supply_diff,avg_days_supply_diff
0,truc,2015,-365.0,30.0,999.0,34.635105,-365.0,30.0,999.0,34.628984,0.0,0.0,0.0,0.006121
1,truc,2017,-365.0,30.0,999.0,35.771861,-365.0,30.0,999.0,35.761799,0.0,0.0,0.0,0.010062
2,truc,2019,-365.0,30.0,999.0,37.434441,-365.0,30.0,999.0,37.432851,0.0,0.0,0.0,0.00159
3,truc,2022,-365.0,30.0,999.0,38.952116,-365.0,30.0,999.0,38.954489,0.0,0.0,0.0,-0.002373
4,truc,2018,-180.0,30.0,999.0,36.269105,-180.0,30.0,999.0,36.266811,0.0,0.0,0.0,0.002294
5,truc,2012,-999.0,30.0,999.0,32.745973,-999.0,30.0,999.0,32.742878,0.0,0.0,0.0,0.003095
6,truc,2013,-180.0,30.0,999.0,33.548952,-180.0,30.0,999.0,33.545176,0.0,0.0,0.0,0.003775
7,truc,2021,-276.0,30.0,999.0,37.690981,-276.0,30.0,999.0,37.692452,0.0,0.0,0.0,-0.001471
8,truc,2014,-180.0,30.0,999.0,33.551244,-180.0,30.0,999.0,33.548467,0.0,0.0,0.0,0.002778
9,truc,2020,-180.0,30.0,999.0,39.631469,-180.0,30.0,999.0,39.631411,0.0,0.0,0.0,5.9e-05


In [None]:
df[['year', 'min_days_supply_diff', 'median_days_supply_diff', 'max_days_supply_diff', 'avg_days_supply_diff']]

Unnamed: 0,year,min_days_supply_diff,median_days_supply_diff,max_days_supply_diff,avg_days_supply_diff
0,2015,0.0,0.0,0.0,0.006121
1,2017,0.0,0.0,0.0,0.010062
2,2019,0.0,0.0,0.0,0.00159
3,2022,0.0,0.0,0.0,-0.002373
4,2018,0.0,0.0,0.0,0.002294
5,2012,0.0,0.0,0.0,0.003095
6,2013,0.0,0.0,0.0,0.003775
7,2021,0.0,0.0,0.0,-0.001471
8,2014,0.0,0.0,0.0,0.002778
9,2020,0.0,0.0,0.0,5.9e-05


In [None]:
connection.cursor().execute('drop table if exists dev.ip_truven_rx_daysupp;')

## Dispense as Written

In [3]:
query = '''drop table if exists qa_reporting.dw_truv_rx_daw_counts;
create table qa_reporting.dw_truv_rx_daw_counts
(
    data_source bpchar(4),
    year int,
    table_src text,
    dispensed_as_written text,
    dw_count int,
    src_count int,
    count_diff int,
    count_diff_percent numeric
)
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [4]:
query = '''
insert into qa_reporting.dw_truv_rx_daw_counts
(data_source, year, table_src, dispensed_as_written, dw_count)
select data_source, year, table_id_src, dispensed_as_written, count(*)
from dw_staging.trum_pharmacy_claims
group by 1,2,3,4;

insert into qa_reporting.dw_truv_rx_daw_counts
(data_source, year, table_src, dispensed_as_written, dw_count)
select data_source, year, table_id_src, dispensed_as_written, count(*)
from dw_staging.truc_pharmacy_claims
group by 1,2,3,4;
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [5]:
query = '''
with rx_daw_count as (
    select year, 'mdcrd' as table_src, case when dawind is null then '00' else dawind end as dawind, count(*)
    from staging_clean.mdcrd_etl
    group by 1,2,3
)
update qa_reporting.dw_truv_rx_daw_counts a
set src_count = b.count,
count_diff = dw_count - b.count,
count_diff_percent = 100. * abs(dw_count - b.count) / b.count
from rx_daw_count b
where a.year = b.year
and a.dispensed_as_written = b.dawind
and a.table_src = b.table_src;
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [6]:
query = '''
with rx_daw_count as (
    select year, 'ccaed' as table_src, case when dawind is null then '00' else dawind end as dawind, count(*)
    from staging_clean.ccaed_etl
    group by 1,2,3
)
update qa_reporting.dw_truv_rx_daw_counts a
set src_count = b.count,
count_diff = dw_count - b.count,
count_diff_percent = 100. * abs(dw_count - b.count) / b.count
from rx_daw_count b
where a.year = b.year
and a.dispensed_as_written = b.dawind
and a.table_src = b.table_src;
'''

with connection.cursor() as cursor:
    cursor.execute(query)

In [8]:
df = pd.read_sql('select * from qa_reporting.dw_truv_rx_daw_counts', con=connection).sort_values('count_diff_percent')
df.sort_values('count_diff_percent')



Unnamed: 0,data_source,year,table_src,dispensed_as_written,dw_count,src_count,count_diff,count_diff_percent
197,trum,2017,mdcrd,04,5030,5030,0,0.000000
231,trum,2022,mdcrd,08,1939,1939,0,0.000000
256,trum,2021,mdcrd,08,2147,2147,0,0.000000
94,truc,2021,ccaed,08,9542,9542,0,0.000000
149,trum,2022,mdcrd,04,1948,1948,0,0.000000
...,...,...,...,...,...,...,...,...
61,truc,2017,ccaed,09,47021,47133,-112,0.237625
80,truc,2017,ccaed,07,17892,17941,-49,0.273117
53,truc,2020,ccaed,00,3718218,3731301,-13083,0.350628
254,trum,2021,mdcrd,09,5463,5488,-25,0.455539
