# Data Warehouse Admission Acute IP QA

In [1]:
import pandas as pd
import psycopg2
import sys
sys.path.append('H:/')
from uth_helpers.db_utils import get_dsn

In [2]:
connection = psycopg2.connect(get_dsn()+' keepalives=1 keepalives_idle=30 keepalives_interval=10')

## Checking Counts of Admit table and Admit Claims Table

In [3]:
query = '''
select data_source, count(*) admit_row_count, count(distinct admit_id) admit_count, count(distinct uth_member_id) pat_count
from dev.gm_dw_ip_admit
group by 1;
'''

admit_df = pd.read_sql(query, con=connection)
admit_df



Unnamed: 0,data_source,admit_row_count,admit_count,pat_count
0,mhtw,16758,16758,13112
1,mcpp,352067,352067,270227
2,trum,1750303,1750303,946315
3,mcrt,3742109,3742109,1487393
4,mcrn,2646701,2646701,1071737
5,mdcd,6242921,6242921,3651344
6,truc,8527365,8527365,5891961
7,optd,10839102,10839102,6176069
8,optz,10844126,10844126,6187088


In [4]:
query = '''
select data_source, count(*) claim_row_count, count(distinct admit_id) admit_count, count(distinct uth_member_id) pat_count
from dev.gm_dw_ip_admit_claim
group by 1;
'''

admit_claim_df = pd.read_sql(query, con=connection)
admit_claim_df



Unnamed: 0,data_source,claim_row_count,admit_count,pat_count
0,mhtw,122579,16758,13112
1,mcpp,1630844,352067,270227
2,trum,26176725,1750303,946315
3,mcrt,59116944,3742109,1487393
4,mcrn,39565398,2646701,1071737
5,mdcd,57031058,6242921,3651344
6,truc,83088932,8527365,5891961
7,optd,147252249,10839100,6176067
8,optz,148455719,10844124,6187086


In [5]:
comp_df = pd.merge(left=admit_df, right=admit_claim_df, on='data_source')
comp_df

Unnamed: 0,data_source,admit_row_count,admit_count_x,pat_count_x,claim_row_count,admit_count_y,pat_count_y
0,mhtw,16758,16758,13112,122579,16758,13112
1,mcpp,352067,352067,270227,1630844,352067,270227
2,trum,1750303,1750303,946315,26176725,1750303,946315
3,mcrt,3742109,3742109,1487393,59116944,3742109,1487393
4,mcrn,2646701,2646701,1071737,39565398,2646701,1071737
5,mdcd,6242921,6242921,3651344,57031058,6242921,3651344
6,truc,8527365,8527365,5891961,83088932,8527365,5891961
7,optd,10839102,10839102,6176069,147252249,10839100,6176067
8,optz,10844126,10844126,6187088,148455719,10844124,6187086


Ideally, the number of unique admits should be the same in both tables. If it doesn't then we need to figure out if there is a significant difference between the two tables. This difference can happen due to the dates that are on the claims in both the detail and header levels.

In [6]:
comp_df['admit_count_same'] = comp_df['admit_count_x'] == comp_df['admit_count_y']
comp_df['pat_count_same'] = comp_df['pat_count_x'] == comp_df['pat_count_y']
comp_df

Unnamed: 0,data_source,admit_row_count,admit_count_x,pat_count_x,claim_row_count,admit_count_y,pat_count_y,admit_count_same,pat_count_same
0,mhtw,16758,16758,13112,122579,16758,13112,True,True
1,mcpp,352067,352067,270227,1630844,352067,270227,True,True
2,trum,1750303,1750303,946315,26176725,1750303,946315,True,True
3,mcrt,3742109,3742109,1487393,59116944,3742109,1487393,True,True
4,mcrn,2646701,2646701,1071737,39565398,2646701,1071737,True,True
5,mdcd,6242921,6242921,3651344,57031058,6242921,3651344,True,True
6,truc,8527365,8527365,5891961,83088932,8527365,5891961,True,True
7,optd,10839102,10839102,6176069,147252249,10839100,6176067,False,False
8,optz,10844126,10844126,6187088,148455719,10844124,6187086,False,False


## Checking if there are any admissions episodes that overlap

Again, there shouldn't be any admissions that overlap with each other, but if there is, we need to investigate.

In [7]:
query ='''
select a.data_source, count(*) --a.admit_date as a_admit_dt, b.admit_date as b_admit_dt, b.discharge_date as b_discharge_date
from dev.gm_dw_ip_admit a 
inner join dev.gm_dw_ip_admit b
on a.data_source = b.data_source
and a.uth_member_id = b.uth_member_id
and a.admit_date between b.admit_date+1 and b.discharge_date
group by 1;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,count


In [8]:
query ='''
select a.data_source, a.admit_date as a_admit_dt, a.discharge_date as a_discharge_date, 
        b.admit_date as b_admit_dt, b.discharge_date as b_discharge_date,
        a.admit_id, b.admit_id
from dev.gm_dw_ip_admit a 
inner join dev.gm_dw_ip_admit b
on a.data_source = b.data_source
and a.uth_member_id = b.uth_member_id
and a.admit_date between b.admit_date+1 and b.discharge_date;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,a_admit_dt,a_discharge_date,b_admit_dt,b_discharge_date,admit_id,admit_id.1


## Checking Admission and Discharge Dates

Here we check the dates for admission and discharge to see if the dates are outside the initial range of years we generated the admit tables for.

In [9]:
query = '''
select data_source,
	sum(case when extract(year from admit_date) not between 2011 and 2023 then 1 else 0 end) as admit_oor,
	sum(case when extract(year from discharge_date) not between 2011 and 2023 then 1 else 0 end) as discharge_oor,
	sum(case when (discharge_date - admit_date) < 0 then 1 else 0 end ) as discharge_before_admit,
	count(*),
	sum(case when extract(year from admit_date) not between 2011 and 2023 then 1 else 0 end) * 1.0 / count(*) as admit_oor_pct,
	sum(case when extract(year from discharge_date) not between 2011 and 2023 then 1 else 0 end) * 1.0 / count(*) as discharge_oor_pct,
	sum(case when (discharge_date - admit_date) < 0 then 1 else 0 end ) * 1.0 / count(*) as discharge_before_admit_pct
from dev.gm_dw_ip_admit
group by data_source;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,admit_oor,discharge_oor,discharge_before_admit,count,admit_oor_pct,discharge_oor_pct,discharge_before_admit_pct
0,truc,0,0,0,8527365,0.0,0.0,0.0
1,mhtw,0,0,0,16758,0.0,0.0,0.0
2,mcrn,12,0,0,2646701,5e-06,0.0,0.0
3,trum,0,0,0,1750303,0.0,0.0,0.0
4,mcpp,0,0,0,352067,0.0,0.0,0.0
5,mcrt,10,0,0,3742109,3e-06,0.0,0.0
6,optd,0,1,0,10839102,0.0,9.225857e-08,0.0
7,mdcd,0,2,0,6242921,0.0,3.203629e-07,0.0
8,optz,0,1,0,10844126,0.0,9.221582e-08,0.0


In [10]:
query = '''
select *
from dev.gm_dw_ip_admit
where extract(year from admit_date) not between 2011 and 2023;'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,uth_member_id,enc_id,admit_date,discharge_date,enc_discharge_status,admit_id,total_charge_amount,total_allowed_amount,total_paid_amount,missing_terminal_status,missing_terminal_status_117,paid_status,member_id_src,insert_ts
0,mcrt,361005906,0,2010-10-03,2016-10-11,62,361005906-000-2010,4418784.5,396999.48,342969.87,False,True,,ggggggguawffayy,2023-09-18 14:25:01
1,mcrn,533430040,0,2007-02-01,2016-08-31,30,533430040-000-2007,2812899.83,2050.09,1329.49,False,False,,gggggggAgggyfBA,2023-09-18 14:44:10
2,mcrt,530439252,0,2010-03-27,2015-04-03,2,530439252-000-2010,1654395.29,234694.72,199892.79,False,False,,gggggggaynjywnA,2023-09-18 14:29:45
3,mcrn,532962777,0,2007-06-20,2017-07-09,6,532962777-000-2007,591049.04,85205.92,75782.86,False,False,,gggggggwfAaanya,2023-09-18 14:43:18
4,mcrn,531994797,0,2008-08-08,2015-08-17,51,531994797-000-2008,393800.94,165881.7,140049.13,False,False,,gggggggBuuyBnwa,2023-09-18 14:40:42
5,mcrt,358459887,0,2007-06-20,2017-07-09,6,358459887-000-2007,591049.04,85205.92,75782.86,False,False,,gggggggwfAaanya,2023-09-18 14:13:55
6,mcrt,361182011,0,2007-08-23,2017-09-04,6,361182011-000-2007,1000376.2,130354.21,113916.12,False,True,,gggggggngfnBgyj,2023-09-18 14:25:01
7,mcrt,530472500,0,2010-07-28,2015-10-09,6,530472500-000-2010,381247.84,91372.83,81296.83,False,False,,gggggggBgAgyBju,2023-09-18 14:29:45
8,mcrn,533467104,0,2010-02-09,2020-02-10,20,533467104-000-2010,938035.38,195186.33,168925.51,False,False,,gggggggjwuAauAy,2023-09-18 14:44:10
9,mcrn,533336581,0,2009-07-10,2019-07-11,3,533336581-000-2009,964866.61,239733.61,224240.23,False,True,,gggggggfwfBBgAB,2023-09-18 14:44:10


In [11]:
query = '''
select *
from dev.gm_dw_ip_admit
where extract(year from discharge_date) not between 2011 and 2023;'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,uth_member_id,enc_id,admit_date,discharge_date,enc_discharge_status,admit_id,total_charge_amount,total_allowed_amount,total_paid_amount,missing_terminal_status,missing_terminal_status_117,paid_status,member_id_src,insert_ts
0,optz,1056840675,0,2019-10-18,2029-10-22,0,1056840675-000-2019,50591.03,50591.0,,False,False,,560499258253711,2023-09-18 17:00:27
1,optd,1125923772,0,2019-10-18,2029-10-22,0,1125923772-000-2019,51754.62,51754.59,,False,False,,33170178655,2023-09-18 18:03:12
2,mdcd,672678906,0,2021-10-10,2024-10-22,1,672678906-000-2021,467356.0,60.0,58.8,False,False,,512249890,2023-09-18 16:06:54
3,mdcd,1190592142,0,2021-08-01,2027-08-02,1,1190592142-000-2021,33416.58,2026.26,846.04,False,False,,749709742,2023-09-18 16:33:52


## Checking Bill Types at the beginning and end of an admit episode

In step 2, we added a column to determine whether during an admission episode had terminal bill codes (111, 114, 117) which indicate the start/end of an inpatient stay.

In [18]:
query = '''
select data_source, missing_terminal_status, count(*), 100.* count(*) / sum(count(*)) over (partition by data_source)
from dev.gm_dw_ip_admit
group by 1,2
order by 1,2;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,missing_terminal_status,count,?column?
0,mcpp,False,352022,99.987218
1,mcpp,True,45,0.012782
2,mcrn,False,2643950,99.896059
3,mcrn,True,2751,0.103941
4,mcrt,False,3739563,99.931963
5,mcrt,True,2546,0.068037
6,mdcd,False,6127514,98.151394
7,mdcd,True,115407,1.848606
8,mhtw,False,16718,99.761308
9,mhtw,True,40,0.238692


In [19]:
query = '''
select data_source, missing_terminal_status_117, count(*), 100.* count(*) / sum(count(*)) over (partition by data_source)
from dev.gm_dw_ip_admit
group by 1,2
order by 1,2;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,missing_terminal_status_117,count,?column?
0,mcpp,False,349545,99.283659
1,mcpp,True,2522,0.716341
2,mcrn,False,2618302,98.927004
3,mcrn,True,28399,1.072996
4,mcrt,False,3678852,98.30959
5,mcrt,True,63257,1.69041
6,mdcd,False,6111476,97.894495
7,mdcd,True,131445,2.105505
8,mhtw,False,16751,99.958229
9,mhtw,True,7,0.041771


## Other

Double checking that member_id_src and claim_id_src are filled in the tables.

In [20]:
query = '''
select data_source, count(*)
from dev.gm_dw_ip_admit
where member_id_src is null or member_id_src = ''
group by 1'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,count


In [21]:
query = '''
select data_source, count(*)
from dev.gm_dw_ip_admit_claim
where member_id_src is null or member_id_src = ''
or claim_id_src is null or claim_id_src = ''
group by 1'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,count
