# Data Warehouse Admission Acute IP QA

In [1]:
import pandas as pd
import psycopg2
import sys
sys.path.append('H:/')
from uth_helpers.db_utils import get_dsn

In [2]:
connection = psycopg2.connect(get_dsn()+' keepalives=1 keepalives_idle=30 keepalives_interval=10')

## Checking Counts of Admit table and Admit Claims Table

In [3]:
query = '''
select data_source, count(*) admit_row_count, count(distinct admit_id) admit_count, count(distinct uth_member_id) pat_count
from dev.gm_dw_ip_admit
group by 1;
'''

admit_df = pd.read_sql(query, con=connection)
admit_df



Unnamed: 0,data_source,admit_row_count,admit_count,pat_count
0,trum,1996822,1996822,1049259
1,mcrn,2979300,2979300,1166207
2,mcrt,4237268,4237268,1636408
3,truc,8730913,8730913,6016963


In [4]:
query = '''
select data_source, count(*) claim_row_count, count(distinct admit_id) admit_count, count(distinct uth_member_id) pat_count
from dev.gm_dw_ip_admit_claim
group by 1;
'''

admit_claim_df = pd.read_sql(query, con=connection)
admit_claim_df



Unnamed: 0,data_source,claim_row_count,admit_count,pat_count
0,trum,30273607,1996822,1049259
1,mcrn,45077629,2979300,1166207
2,mcrt,67859737,4237268,1636408
3,truc,85179641,8730913,6016963


In [5]:
comp_df = pd.merge(left=admit_df, right=admit_claim_df, on='data_source')
comp_df

Unnamed: 0,data_source,admit_row_count,admit_count_x,pat_count_x,claim_row_count,admit_count_y,pat_count_y
0,trum,1996822,1996822,1049259,30273607,1996822,1049259
1,mcrn,2979300,2979300,1166207,45077629,2979300,1166207
2,mcrt,4237268,4237268,1636408,67859737,4237268,1636408
3,truc,8730913,8730913,6016963,85179641,8730913,6016963


Ideally, the number of unique admits should be the same in both tables. If it doesn't then we need to figure out if there is a significant difference between the two tables. This difference can happen due to the dates that are on the claims in both the detail and header levels.

In [6]:
comp_df['admit_count_same'] = comp_df['admit_count_x'] == comp_df['admit_count_y']
comp_df['pat_count_same'] = comp_df['pat_count_x'] == comp_df['pat_count_y']
comp_df

Unnamed: 0,data_source,admit_row_count,admit_count_x,pat_count_x,claim_row_count,admit_count_y,pat_count_y,admit_count_same,pat_count_same
0,trum,1996822,1996822,1049259,30273607,1996822,1049259,True,True
1,mcrn,2979300,2979300,1166207,45077629,2979300,1166207,True,True
2,mcrt,4237268,4237268,1636408,67859737,4237268,1636408,True,True
3,truc,8730913,8730913,6016963,85179641,8730913,6016963,True,True


## Checking if there are any admissions episodes that overlap

Again, there shouldn't be any admissions that overlap with each other, but if there is, we need to investigate.

In [7]:
query ='''
select a.data_source, count(*) --a.admit_date as a_admit_dt, b.admit_date as b_admit_dt, b.discharge_date as b_discharge_date
from dev.gm_dw_ip_admit a 
inner join dev.gm_dw_ip_admit b
on a.data_source = b.data_source
and a.uth_member_id = b.uth_member_id
and a.admit_date between b.admit_date+1 and b.discharge_date
group by 1;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,count


In [8]:
query ='''
select a.data_source, a.admit_date as a_admit_dt, a.discharge_date as a_discharge_date, 
        b.admit_date as b_admit_dt, b.discharge_date as b_discharge_date,
        a.admit_id, b.admit_id
from dev.gm_dw_ip_admit a 
inner join dev.gm_dw_ip_admit b
on a.data_source = b.data_source
and a.uth_member_id = b.uth_member_id
and a.admit_date between b.admit_date+1 and b.discharge_date;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,a_admit_dt,a_discharge_date,b_admit_dt,b_discharge_date,admit_id,admit_id.1


## Checking Admission and Discharge Dates

Here we check the dates for admission and discharge to see if the dates are outside the initial range of years we generated the admit tables for.

In [9]:
query = '''
select data_source,
	sum(case when extract(year from admit_date) not between 2011 and 2023 then 1 else 0 end) as admit_oor,
	sum(case when extract(year from discharge_date) not between 2011 and 2023 then 1 else 0 end) as discharge_oor,
	sum(case when (discharge_date - admit_date) < 0 then 1 else 0 end ) as discharge_before_admit,
	count(*),
	sum(case when extract(year from admit_date) not between 2011 and 2023 then 1 else 0 end) * 1.0 / count(*) as admit_oor_pct,
	sum(case when extract(year from discharge_date) not between 2011 and 2023 then 1 else 0 end) * 1.0 / count(*) as discharge_oor_pct,
	sum(case when (discharge_date - admit_date) < 0 then 1 else 0 end ) * 1.0 / count(*) as discharge_before_admit_pct
from dev.gm_dw_ip_admit
group by data_source;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,admit_oor,discharge_oor,discharge_before_admit,count,admit_oor_pct,discharge_oor_pct,discharge_before_admit_pct
0,truc,0,0,0,8730913,0.0,0.0,0.0
1,mcrn,0,0,0,2979300,0.0,0.0,0.0
2,trum,0,0,0,1996822,0.0,0.0,0.0
3,mcrt,0,0,0,4237268,0.0,0.0,0.0


In [10]:
query = '''
select *
from dev.gm_dw_ip_admit
where extract(year from admit_date) not between 2011 and 2023;'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,uth_member_id,enc_id,admit_date,discharge_date,enc_discharge_status,admit_id,total_charge_amount,total_allowed_amount,total_paid_amount,missing_terminal_status,missing_terminal_status_117,paid_status,member_id_src,insert_ts


In [11]:
query = '''
select *
from dev.gm_dw_ip_admit
where extract(year from discharge_date) not between 2011 and 2023;'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,uth_member_id,enc_id,admit_date,discharge_date,enc_discharge_status,admit_id,total_charge_amount,total_allowed_amount,total_paid_amount,missing_terminal_status,missing_terminal_status_117,paid_status,member_id_src,insert_ts


## Checking Bill Types at the beginning and end of an admit episode

In step 2, we added a column to determine whether during an admission episode had terminal bill codes (111, 114, 117) which indicate the start/end of an inpatient stay.

In [12]:
query = '''
select data_source, missing_terminal_status, count(*), 100.* count(*) / sum(count(*)) over (partition by data_source)
from dev.gm_dw_ip_admit
group by 1,2
order by 1,2;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,missing_terminal_status,count,?column?
0,mcrn,False,2976196,99.895814
1,mcrn,True,3104,0.104186
2,mcrt,False,4234326,99.930568
3,mcrt,True,2942,0.069432
4,truc,False,8428279,96.533765
5,truc,True,302634,3.466235
6,trum,False,1993863,99.851815
7,trum,True,2959,0.148185


In [13]:
query = '''
select data_source, missing_terminal_status_117, count(*), 100.* count(*) / sum(count(*)) over (partition by data_source)
from dev.gm_dw_ip_admit
group by 1,2
order by 1,2;
'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,missing_terminal_status_117,count,?column?
0,mcrn,False,2946574,98.901554
1,mcrn,True,32726,1.098446
2,mcrt,False,4164120,98.273699
3,mcrt,True,73148,1.726301
4,truc,False,8477605,97.098723
5,truc,True,253308,2.901277
6,trum,False,1895310,94.916322
7,trum,True,101512,5.083678


## Checking that total costs in admit table matches total cost in admit claims table

In [4]:
query = '''
with a as (
select a.*, total_allowed_amount, case when clm_total_allowed_amount != total_allowed_amount then 1 end as diff_allowed_flag
	, total_charge_amount, case when clm_total_charge_amount != total_charge_amount then 1 end as diff_charge_flag
	, total_paid_amount, case when clm_total_paid_amount != total_paid_amount then 1 end as diff_paid_flag
from 
	(select data_source, calendar_year , derived_uth_admission_id, sum(allowed_amount) clm_total_allowed_amount
													, sum(paid_amount) clm_total_paid_amount
													, sum(charge_amount) clm_total_charge_amount
	from data_warehouse.admission_acute_ip_claims  
	group by data_source, calendar_year, derived_uth_admission_id) a 
left join 
	(select data_source, calendar_year, derived_uth_admission_id, total_allowed_amount, total_paid_amount, total_charge_amount 
	 from data_warehouse.admission_acute_ip) b 
on a.derived_uth_admission_id=b.derived_uth_admission_id and a.calendar_year=b.calendar_year and a.data_source = b.data_source
)
select data_source, count(*)
from a
where diff_allowed_flag = 1 
or diff_charge_flag = 1 
or diff_paid_flag = 1
group by 1
;'''

pd.read_sql(query, con=connection)

  pd.read_sql(query, con=connection)


Unnamed: 0,data_source,count


## Other

Double checking that member_id_src and claim_id_src are filled in the tables.

In [14]:
query = '''
select data_source, count(*)
from dev.gm_dw_ip_admit
where member_id_src is null or member_id_src = ''
group by 1'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,count


In [15]:
query = '''
select data_source, count(*)
from dev.gm_dw_ip_admit_claim
where member_id_src is null or member_id_src = ''
or claim_id_src is null or claim_id_src = ''
group by 1'''

pd.read_sql(query, con=connection)



Unnamed: 0,data_source,count
