In [30]:
import pandas as pd 
import os 
import sqlite3 
import shutil
import numpy as np
from datetime import date
import datetime
import time
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


production = sqlite3.connect('C:\\python_development\\productivity.db', timeout=120) 

In [44]:
# import all raw data and write to database
from datetime import datetime
rvu = pd.read_csv(r'\\vault\powerbiflatfiles$\Production\DW_PB_RVU_EXTRACT.txt', 
                  sep='|', 
                  low_memory=False, 
                  usecols=['IDENTITY_ID','Place_Of_Service_Name', 'Place_Of_Service_Id','Location_Name', 'Location_Id','Department_Name','Department_Id',
                    'Bill_Area_Name', 'Billing_Provider_Id', 'Billing_Provider_Name', 'Pat_Enc_Csn_Id', 'Service_Date', 
                    'Mrn','Orig_Benefit_Plan_Id', 'Orig_Benefit_Plan_Name', 'Orig_Payor_Id', 'Orig_Payor_Name', 'Orig_Epic_Financial_Class', 
                    'Orig_Ucr_Financial_Class', 'Post_Date', 'Cpt_Code', 'Procedure', 'Payments','Charges', 'Wrvu', 
                    'Pos_Type_Name'], 
                  parse_dates=['Service_Date','Post_Date'])
rvu.Payments = rvu.Payments * -1
lookback_year = datetime.now().year - 3
rvu = rvu[rvu.Post_Date >= f'7-1-{lookback_year}']
# two billing providers need to be updated due to error: nandani gowda vs ashwin gowda, ken ballou vs leigh ballou
# patch for two billing providers identified incorrectly in the data
bill_prov_id_patch = {92388:269915,42066:70397}
bill_prov_name_patch = {'BALLOU, LEIGH A':'BALLOU, KENNETH A','GOWDA, ASHWIN A':'GOWDA, NANDINI NETKAL'}
rvu['Billing_Provider_Id'] = rvu['Billing_Provider_Id'].replace(bill_prov_id_patch)
rvu['Billing_Provider_Name'] = rvu['Billing_Provider_Name'].replace(bill_prov_name_patch)

rvu.drop_duplicates(inplace=True)
rvu.to_sql('src_productivity', production, if_exists='replace', index=False)

967031

In [34]:
# need to update this financial class grouping from the server
# run the staging query
stg_fact_rvu_fiscal_year_query = '''
select 
IDENTITY_ID, l.location_name as Place_Of_Service_Name, Place_Of_Service_Id,f.Location_Name, f.Location_Id,
Department_Name, Department_Id, Bill_Area_Name, 
case when Billing_Provider_Id is null then 99999 else Billing_Provider_Id end as Billing_Provider_Id, 
Billing_Provider_Name, Pat_Enc_Csn_Id, Service_Date, Mrn, Orig_Benefit_Plan_Id, Orig_Benefit_Plan_Name, Orig_Payor_Id, 
Orig_Payor_Name, Orig_Epic_Financial_Class, Orig_Ucr_Financial_Class, Post_Date, Cpt_Code, Procedure, 
Charges, Payments, Wrvu, Pos_Type_Name,
c.financial_class,
c.financial_class_grouping,
d.month_start as post_monthyear,
d.fiscal_yearmonth as FiscalMonthYear,
d.fiscal_year as FiscalYear,
d.fiscal_period as FiscalPeriod,
d.calendar_month_name as Month,
case 
    when l.location_name like "%Inp%" or l.location_name like "%RANCHO SPRINGS MEDICAL CENTER I%" 
        or l.location_name like "%IP%" or l.location_name like "%HOSPITALIST%" 
        or Pos_Type_Name like "%Hospital%"
        then 'Inpatient'
    else 'Outpatient' end as IP_OP_status
from src_productivity as f
inner join dim_date as d on f.Post_Date = d.Date
left outer join financial_dimension as c on f.Orig_Payor_Id = c.payor_id and f.Orig_Benefit_Plan_Id = c.benefit_plan_id
left outer join dim_location as l on f.Place_Of_Service_Id = l.location_id and l.location_type = "Place of Service"
 
'''

stg_fact_rvu_fiscal_year = pd.read_sql_query(stg_fact_rvu_fiscal_year_query, production)
stg_fact_rvu_fiscal_year.drop_duplicates(inplace=True)
stg_fact_rvu_fiscal_year.to_sql('stg_productivity', production, if_exists='replace', index=False)

981633

In [48]:
# complete the reporting model and create the file
rvu_data_model = '''
select
rf.*,
pd.provider_name,
pd.activity_code,
pd.activity_name,
pd.subdivision_name,
case 
    when rf.IP_OP_status = 'Inpatient'
    and pd.inpatient_specialty is not null then pd.inpatient_specialty
    else pd.primary_specialty end as provider_specialty,
case 
    when rf.IP_OP_status = 'Inpatient'
    and pd.inpatient_specialty_id is not null then pd.inpatient_specialty_id
    else pd.primary_specialty_id end as provider_specialty_id,
case 
    when rf.IP_OP_status = 'Inpatient'
    and pd.inpatient_specialty_id is not null 
    then 
    strftime('%Y', FiscalMonthYear) || '-' || cast(provider_id as int) || '-' || cast(pd.inpatient_specialty_id as str)
    else 
    strftime('%Y', FiscalMonthYear) || '-' || cast(provider_id as int) || '-' || cast(pd.primary_specialty_id as str)
    end as kpi_model_join, 
case 
    when rf.IP_OP_status = 'Inpatient'
    and pd.inpatient_specialty_id is not null 
    then 
    (cast(pd.provider_id as str) || "-" || strftime("%Y_%m", FiscalMonthYear) || "-" || cast(pd.inpatient_specialty_id as str))
    else 
    (cast(pd.provider_id as str) || "-" || strftime("%Y_%m", FiscalMonthYear) || "-" || cast(pd.primary_specialty_id as str))
    end as provider_specialty_fiscalyearmonth
from stg_fact_rvu_fiscal_year_2y as rf
left outer join vw_provider_dimension as pd on rf.Billing_Provider_Id = pd.provider_id

'''
rvu_query = pd.read_sql_query(rvu_data_model, production)
rvu_query.drop_duplicates(inplace=True)
print(rvu_query.shape[0])
null_provider = rvu_query[rvu_query.provider_specialty.isna() == True]
rvu_data = rvu_query[rvu_query.provider_specialty.isna() == False]
print(rvu_data.shape[0])
print(rvu_data.Wrvu.sum())
rvu_data.to_sql('rpt_productivity', production, if_exists='replace', index=False)
rvu_query.to_csv('C:\\python_development\\report_csv\\rpt_productivity.csv', index=False)

959901
959901
343252.77


In [49]:
# update the null providers table
null_provider.to_csv('C:\\python_development\\report_csv\\null_provider.csv', index=False)

null_provider.to_sql('src_null_provider', production, if_exists='replace', index=False)
# should just look at all of them, but focus on the identifier subset 
null_provider

Unnamed: 0,IDENTITY_ID,Place_Of_Service_Name,Place_Of_Service_Id,Location_Name,Location_Id,Department_Name,Department_Id,Bill_Area_Name,Billing_Provider_Id,Billing_Provider_Name,Pat_Enc_Csn_Id,Service_Date,Mrn,Orig_Benefit_Plan_Id,Orig_Benefit_Plan_Name,Orig_Payor_Id,Orig_Payor_Name,Orig_Epic_Financial_Class,Orig_Ucr_Financial_Class,Post_Date,Cpt_Code,Procedure,Charges,Payments,Wrvu,Pos_Type_Name,financial_class,financial_class_grouping,post_monthyear,FiscalMonthYear,FiscalYear,FiscalPeriod,Month,IP_OP_status,provider_name,activity_code,activity_name,subdivision_name,provider_specialty,provider_specialty_id,kpi_model_join,provider_specialty_fiscalyearmonth


In [52]:
# write the monthly files and prepare files for export to the vault
vw_complete_monthly_rvu_data_model = pd.read_sql_query('select * from vw_monthly_productivity', production)
vw_complete_monthly_rvu_data_model.drop_duplicates(inplace=True)
print(vw_complete_monthly_rvu_data_model.monthly_wrvu.sum())
vw_complete_monthly_rvu_data_model.to_csv('C:\\python_development\\report_csv\\monthly_productivity.csv', index=False)

343252.77


In [54]:
# export files to the vault
shutil.copy2('C:\\python_development\\report_csv\\rpt_productivity.csv','V:\\Production\\Data_Model\\export_csv\\rpt_productivity.csv')
shutil.copy2('C:\\python_development\\report_csv\\monthly_productivity.csv',
             'V:\\Production\\Data_Model\\export_csv\\monthly_productivity.csv')

'V:\\Production\\Data_Model\\export_csv\\monthly_productivity.csv'