In [1]:
%pip install pyspark

Note: you may need to restart the kernel to use updated packages.


In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
import pandas as pd
import pyodbc
import numpy as np
import re
import os

In [3]:
spark = SparkSession.builder.appName("PBPPlanBuilder").getOrCreate()

In [None]:
#system parameters\
PBP_SOURCE_FOLDER = 'PBP_Benefits_2024/'
DR_TARGET_FOLDER = 'PBP_Benefits_2024_Results/'

In [None]:
# year parameter for CMS

PLAN_YEAR = 2024
RX_CATASTROHPIC_LIMIT = 8000
RX_INITIAL_COVERAGE_LIMIT = 5030
DEDAULT_RX_DEDUCTIBLE = 545
MEDICARE_DEDUCTIBLE_PART_A = 1632
MEDICARE_DEDUCTIBLE_PART_B = 240

In [None]:
# load all required files
def load_csv(csv_file_path):
    return spark.read.format("csv") \
    .option("delimiter", "\t") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load(csv_file_path)

# drop views from memory if the view exist
for t in spark.catalog.listTables():
    spark.catalog.dropTempView(t.name)

df_pbp_section_A = load_csv(PBP_SOURCE_FOLDER + 'pbp_Section_A.txt')

df_pbp_section_A.createTempView('pbp_section_A')

df_pbp_mrx = load_csv(PBP_SOURCE_FOLDER + 'pbp_mrx.txt')
df_pbp_mrx.createTempView('pbp_mrx')

df_pbp_Section_C = load_csv(PBP_SOURCE_FOLDER + 'pbp_Section_C.txt')
df_pbp_Section_C.createTempView('pbp_Section_C')

df_pbp_Section_C_OON = load_csv(PBP_SOURCE_FOLDER + 'pbp_Section_C_OON.txt')
df_pbp_Section_C_OON.createTempView('pbp_Section_C_OON')

df_pbp_Section_C_POS = load_csv(PBP_SOURCE_FOLDER + 'pbp_Section_C_POS.txt')
df_pbp_Section_C_POS.createTempView('pbp_Section_C_POS')

df_pbp_Section_D = load_csv(PBP_SOURCE_FOLDER + 'pbp_Section_D.txt')
df_pbp_Section_D.createTempView('pbp_Section_D')

df_pbp_b1a_inpat_hosp = load_csv(PBP_SOURCE_FOLDER + 'pbp_b1a_inpat_hosp.txt')
df_pbp_b1a_inpat_hosp.createTempView('pbp_b1a_inpat_hosp')


df_pbp_b2_snf = load_csv(PBP_SOURCE_FOLDER + 'pbp_b2_snf.txt')
df_pbp_b2_snf.createTempView('pbp_b2_snf')

df_pbp_step2 = load_csv(PBP_SOURCE_FOLDER + 'pbp_step2.txt')
df_pbp_step2.createTempView('pbp_step2')


df_pbp_b4_emerg_urgent =load_csv(PBP_SOURCE_FOLDER + 'pbp_b4_emerg_urgent.txt')
df_pbp_b4_emerg_urgent.createTempView('pbp_b4_emerg_urgent')


df_pbp_b7_health_prof = load_csv(PBP_SOURCE_FOLDER + 'pbp_b7_health_prof.txt')
df_pbp_b7_health_prof.createTempView('pbp_b7_health_prof')

df_pbp_b8_clin_diag_ther =load_csv(PBP_SOURCE_FOLDER + 'pbp_b8_clin_diag_ther.txt')
df_pbp_b8_clin_diag_ther.createTempView('pbp_b8_clin_diag_ther')

df_pbp_b9_outpat_hosp = load_csv(PBP_SOURCE_FOLDER + 'pbp_b9_outpat_hosp.txt')
df_pbp_b9_outpat_hosp.createTempView('pbp_b9_outpat_hosp')

df_pbp_b10_amb_trans =load_csv(PBP_SOURCE_FOLDER + 'pbp_b10_amb_trans.txt')
df_pbp_b10_amb_trans.createTempView('pbp_b10_amb_trans')

df_pbp_b11_dme_prosth_orth_sup =load_csv(PBP_SOURCE_FOLDER + 'pbp_b11_dme_prosth_orth_sup.txt')
df_pbp_b11_dme_prosth_orth_sup.createTempView('pbp_b11_dme_prosth_orth_sup')

df_pbp_b13_other_services =load_csv(PBP_SOURCE_FOLDER + 'pbp_b13_other_services.txt')
df_pbp_b13_other_services.createTempView('pbp_b13_other_services')

df_pbp_b13_b19b_other_services_vbid_uf =load_csv(PBP_SOURCE_FOLDER + 'pbp_b13_b19b_other_services_vbid_uf.txt')
df_pbp_b13_b19b_other_services_vbid_uf.createTempView('pbp_b13_b19b_other_services_vbid_uf')

df_pbp_b14_preventive =load_csv(PBP_SOURCE_FOLDER + 'pbp_b14_preventive.txt')
df_pbp_b14_preventive.createTempView('pbp_b14_preventive')

df_pbp_b15_partb_rx_drugs= load_csv(PBP_SOURCE_FOLDER + 'pbp_b15_partb_rx_drugs.txt')
df_pbp_b15_partb_rx_drugs.createTempView('pbp_b15_partb_rx_drugs')

df_pbp_b16_dental= load_csv(PBP_SOURCE_FOLDER + 'pbp_b16_dental.txt')
df_pbp_b16_dental.createTempView('pbp_b16_dental')

df_pbp_b17_eye_exams_wear_aids =load_csv(PBP_SOURCE_FOLDER + 'pbp_b17_eye_exams_wear.txt')
df_pbp_b17_eye_exams_wear_aids.createTempView('pbp_b17_eye_exams_wear')

df_pbp_b18_hearing_exams_aids =load_csv(PBP_SOURCE_FOLDER + 'pbp_b18_hearing_exams_aids.txt')
df_pbp_b18_hearing_exams_aids.createTempView('pbp_b18_hearing_exams_aids')

df_pbp_b18_b19b_hearing_exams_aids_vbid_uf =load_csv(PBP_SOURCE_FOLDER + 'pbp_b18_b19b_hearing_exams_aids_vbid_uf.txt')
df_pbp_b18_b19b_hearing_exams_aids_vbid_uf.createTempView('pbp_b18_b19b_hearing_exams_aids_vbid_uf')

In [None]:
#List of utility functions
def write_to_csv_file(df, file_name):
    pandas_df = df.toPandas()
    pandas_df.to_csv(DR_TARGET_FOLDER + file_name + '.csv', index=False)

def read_pd_from_csv_file(file_name):
    return pd.read_csv(DR_TARGET_FOLDER + file_name + '.csv')

def write_pd_to_csv(df, file_name):
    df.to_csv(DR_TARGET_FOLDER + file_name + '.csv', index=False)

def convert_to_int(field, null_value):
    if field is None:
        return null_value
    return int(field)

def convert_to_currency(float_field):
    return '${:,.2f}'.format(float_field)

def convert_to_currency_no_decimal(float_field):
    return '${:,.0f}'.format(float_field)

def drop_pbp_mrx_columns(df):
	pbp_mrx_columns = []
	for column_name in df.columns:
		if column_name.lower().startswith('pbp_') or column_name.lower().startswith('mrx_'):
			pbp_mrx_columns.append(column_name)
	df = df.drop(pbp_mrx_columns, axis=1)
	return df

def run_query_in_db(query, database):
    # Create a connection to the database
    SERVER = 'docurobot-dev.cmba97i2bfdq.us-east-1.rds.amazonaws.com'
    DATABASE =  database
    USERNAME = 'alee'
    PASSWORD = 'P@$$w0rd'
    connectionString = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}'

    conn = pyodbc.connect(connectionString)
    return pd.read_sql_query(query, conn)


def drop_pbp_mrx_columns(df):
	pbp_mrx_columns = []
	for column_name in df.columns:
		if column_name.lower().startswith('pbp_') or column_name.lower().startswith('mrx_'):
			pbp_mrx_columns.append(column_name)
	df = df.drop(pbp_mrx_columns, axis=1)
	return df

def run_query_in_db(query, database):
    # Create a connection to the database
    SERVER = 'docurobot-dev.cmba97i2bfdq.us-east-1.rds.amazonaws.com'
    DATABASE =  database
    USERNAME = 'alee'
    PASSWORD = 'P@$$w0rd'
    connectionString = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}'

    conn = pyodbc.connect(connectionString)
    return pd.read_sql_query(query, conn)


In [None]:
# Carrier, Organization, Plans
# Source: HPMS..[usp_loadCarriersPUF] 
def get_plan_type(pbp_a_eghp_yn, pbp_a_org_type, pbp_a_plan_type, mrx_benefit_type):
	'''
	Source: HPSM..usp_loadCarriersPUF_2023
	'''
	pbp_a_eghp_yn = int(pbp_a_eghp_yn)
	pbp_a_org_type = int(pbp_a_org_type)
	pbp_a_plan_type = int(pbp_a_plan_type)
	if mrx_benefit_type in [1,2,3,4]:
		mrx_drug_ben_yn = 1
	if pbp_a_org_type == 10 and pbp_a_plan_type == 29 and mrx_drug_ben_yn == 1:
		return 'PDP'
	elif mrx_benefit_type == 1:
		return 'MAPD'
	return 'MA'

def get_snp_type(pbp_a_special_need_flag, pbp_a_special_need_plan_type):
	pbp_a_special_need_flag = int(pbp_a_special_need_flag)
	if pbp_a_special_need_flag == 1:
		pbp_a_special_need_plan_type = int(pbp_a_special_need_plan_type)
		if pbp_a_special_need_plan_type == 1: #	Institutional
			return 'I-SNP'
		if pbp_a_special_need_plan_type == 3: #	Dual-Eligible
			return 'D-SNP'
		if pbp_a_special_need_plan_type == 4: #	Chronic or Disabling Condition
			return 'C-SNP'
	return ''

def get_qid(ContractID, PlanID, SegmentID):
	return ContractID + ('000' + str(PlanID))[-3:] + ('000' + str(SegmentID))[-3:]

def get_qid_from_bid_id(bid_id):
	bid_id_splited = bid_id.split('_')
	bid_id_splited[1] = ('00' + bid_id_splited[1])[-3:]
	bid_id_splited[2] = ('00' + bid_id_splited[2])[-3:]
	return ''.join(bid_id_splited)

def get_medical_plan_type_code(pbp_a_plan_type):
	'''
	Source: hpms.dbo.f_getMedicalPlanTypeCode
	'''
	if pbp_a_plan_type is not None:
		pbp_a_plan_type = int(pbp_a_plan_type)
		if pbp_a_plan_type == 1:return 10 # 'HMO'
		if pbp_a_plan_type == 2: return 120 #'HMOPOS'
		if pbp_a_plan_type == 4: return 140 #'Local PPO'
		if pbp_a_plan_type == 5: return 230 #'PSO (State License)'#??
		if pbp_a_plan_type == 7: return 110 #'MSA'#??
		if pbp_a_plan_type == 8: return 240 #'RFB PFFS'#??
		if pbp_a_plan_type == 9: return 220 #'PFFS'
		if pbp_a_plan_type == 18: return 80 #'1876 Cost'#?? cost plan?
		if pbp_a_plan_type == 19: return 80 #'HCPP - 1833 Cost'#?? cost plan?
		if pbp_a_plan_type == 20: return 90 #'National Pace'#??
		if pbp_a_plan_type == 29: return 190 #'Medicare Prescription Drug Plan' # PDP
		if pbp_a_plan_type == 30: return 190# 'Employer/Union Only Direct Contract PDP' # PDP
		if pbp_a_plan_type == 31: return 130 #'Regional PPO' # RPPO
		if pbp_a_plan_type == 32: return 250 #'Fallback'#??
		if pbp_a_plan_type == 40: return 70 #'Employer/Union Only Direct Contract PFFS'# PFFS
		if pbp_a_plan_type == 42: return 260 #'RFB HMO'
		if pbp_a_plan_type == 43: return 270 #'RFB HMOPOS'
		if pbp_a_plan_type == 44: return 280 #'RFB Local PPO'
		if pbp_a_plan_type == 45: return 290 #'RFB PSO (State License)'
		if pbp_a_plan_type == 47: return 300 #'Employer Direct PPO'
		if pbp_a_plan_type == 48: return 100 #'MMP HMO'
		if pbp_a_plan_type == 49: return 100 #'MMP HMOPOS'
	return 0 #'unknown'

def get_rx_deductible_limit(mrx_alt_ded_amount, mrx_alt_ded_charge):
	if not np.isnan(mrx_alt_ded_amount):
		return mrx_alt_ded_amount
	if not np.isnan(mrx_alt_ded_charge):
		if int(mrx_alt_ded_charge) == 1:
			return float(DEDAULT_RX_DEDUCTIBLE)
	return float(0)

query = f'''
SELECT 
	{PLAN_YEAR} as PlanYear,    
	PBP_A_ORG_MARKETING_NAME as CarrierName, pbp_a_org_name as OrganizationName, PBP_A_ORG_WEBSITE as WebSiteAddress,
	a.PBP_A_CONTRACT_NUMBER as ContractID,	
	a.pbp_a_plan_identifier1 as PlanID, 	
	a.PBP_A_SEGMENT_ID as SegmentID, 
	PBP_A_PLAN_NAME as PlanName, 
	PBP_A_PLAN_GEOG_NAME as GeoName, 
	PBP_A_ORG_MARKETING_NAME as CarrierName, 
	pbp_a_org_name as OrganizationName,
	m.PBP_A_PLAN_TYPE as MedicalPlanType,
	pbp_a_eghp_yn, PBP_A_ORG_TYPE, pbp_a_snp_pct, pbp_a_snp_cond, m.PBP_A_PLAN_TYPE,
	pbp_a_special_need_flag, pbp_a_special_need_plan_type, 
	mrx_alt_ded_amount, mrx_alt_ded_charge, mrx_benefit_type
FROM pbp_section_A a 
	left join pbp_mrx m on a.bid_id = m.bid_id
	left join pbp_Section_D d on a.bid_id = d.bid_id
	where cast(pbp_a_eghp_yn as int) = 2 and cast(m.PBP_A_PLAN_TYPE as int) in (1, 2, 4, 9, 29, 31, 42, 43, 44, 45 )
	and CAST(a.pbp_a_plan_identifier1 AS INT) < 800
'''

df_plans = spark.sql(query)
write_to_csv_file(df_plans, 'Plans')

#Add calculated columns
df_plans = read_pd_from_csv_file('Plans')
df_plans['PlanType'] = df_plans.apply(lambda x: get_plan_type(x.pbp_a_eghp_yn,x.PBP_A_ORG_TYPE, x.PBP_A_PLAN_TYPE, x.mrx_benefit_type), axis=1)
df_plans['SNPType'] =  df_plans.apply(lambda x: get_snp_type(x.pbp_a_special_need_flag, x.pbp_a_special_need_plan_type), axis=1)
df_plans['QID'] = df_plans.apply(lambda x: get_qid(x.ContractID, x.PlanID, x.SegmentID), axis=1)
df_plans['DrugDeductibleLimit'] = df_plans.apply(lambda x: get_rx_deductible_limit(x.mrx_alt_ded_amount, x.mrx_alt_ded_charge), axis=1)
df_plans = drop_pbp_mrx_columns(df_plans)

write_pd_to_csv(df_plans, 'Plans')

In [None]:
'''
Matching DF for DR..MedicalBenefits
'''
pandas_df_medicalbenefits = pd.DataFrame(columns = ['QID', 'PlanYear', 'CategoryName', 'ServiceName', 'Network', 'CostShare'])


# add to pandas_df_medicalbenefits if there is no mismatching
def add_category_benefit_to_df_medicalbenefits(df, df_all):
    df_new_category_service_network = df[['CategoryName', 'ServiceName', 'Network']].drop_duplicates()
    for index, csn in df_new_category_service_network.iterrows():
        category_name = csn['CategoryName']
        service_name = csn['ServiceName']
        network = csn['Network']
        df_existing = df_all[(df_all['CategoryName'] == category_name) &  (df_all['CategoryName'] == category_name)& (df_all['Network'] == network)]
        if len(df_existing) > 0:
            df_all.drop(df_all.index, inplace=True)
    df_selected = df[df_all.columns.tolist()]
    df_all = pd.concat([df_all, df_selected], ignore_index=True)
    return df_all


MEDICARE_CRAWLED_DATA = 'MedicalBenefits_PBPJSON_20240521'
medicare_crawled_data_file_path = f'{DR_TARGET_FOLDER}{MEDICARE_CRAWLED_DATA}.csv'
df_medicare_displaying_benefits = read_pd_from_csv_file(MEDICARE_CRAWLED_DATA)

#utility method to compare
def purify_cost_share(benefit):
  if benefit is not None:
    benefit = benefit.strip().lower().replace('not covered','not applicable')
    benefit = benefit.strip().lower().replace(' or ', ', ').replace('<br/>', '').replace('<br />', '').replace('\r','')
    benefit = benefit.strip().lower().replace('(limits apply)','').replace('(always covered)','')
    benefit = benefit.replace(' per item', '').replace('(always covered)','')
    benefit = re.sub('maximum \d+ (other|(every (year|\d? years)))', '', benefit)
    if benefit.strip() == f"$0 copay, 0% coinsurance":
      benefit = '$0 copay'
    if benefit.strip() == f'0% coinsurance':
      benefit = '$0 copay'
  return benefit
#utility method to compare
def matched(benefit1, benefit2):
  if benefit1 is not None and benefit2 is not None:
    benefit1 = purify_cost_share(benefit1)
    benefit2 = purify_cost_share(benefit2)    
      
    benefit1 = benefit1.replace('.', '').replace('$', '').replace('%', '').replace(' ', '').replace(',', '')
    benefit2 = benefit2.replace('.', '').replace('$', '').replace('%', '').replace(' ', '').replace(',', '')
    return benefit1 == benefit2
  return False

def get_medicare_site_url(qid):
    contractid = qid[:5]
    planid = qid[5:8]
    segmentid = qid[8:]
    return f'https://www.medicare.gov/plan-compare/#/plan-details/{PLAN_YEAR}-{contractid}-{planid}-{int(segmentid)}?year={PLAN_YEAR}&lang=en#benefits'



In [None]:
def explode_inn_oon_costshare(df_benefits):
    df_benefits_inn = df_benefits[(df_benefits['INN_CostShare'] != '')]
    df_benefits_inn['Network'] = df_benefits_inn.apply(lambda x: 'In-network' if x.pbp_c_pos_yn == 1 or x.pbp_c_oon_yn  == 1 else '', axis=1)
    df_benefits_inn = df_benefits_inn[['PlanYear','QID','CategoryName','ServiceName','INN_CostShare','Medicare.gov URL', 'Network']]
    df_benefits_inn.rename(columns={"INN_CostShare": "CostShare"}, inplace=True)
    
    df_benefits_oon = df_benefits[(df_benefits['OON_CostShare'] != '')]
    df_benefits_oon['Network'] = 'Out-of-network'
    df_benefits_oon = df_benefits_oon[['PlanYear','QID','CategoryName','ServiceName','OON_CostShare','Medicare.gov URL', 'Network']]
    df_benefits_oon.rename(columns={"OON_CostShare": "CostShare"}, inplace=True)
    return pd.concat([df_benefits_inn, df_benefits_oon])

In [None]:
# Caregiver
# Benefit Code = 14c22(NMC)
# In-Network & Out-of-Network
# uses Medicare for out-of-network if NMC is available

query = f'''select	
	a.PBP_A_CONTRACT_NUMBER as ContractID,	
	a.pbp_a_plan_identifier1 as PlanID, 	
	a.PBP_A_SEGMENT_ID as SegmentID,  
    pbp_a_special_need_plan_type, pbp_a_dsnp_zerodollar, pbp_a_snp_state_cvg_yn,
    pbp_b14c_bendesc_yn,
pbp_b14c_bendesc_ehc,
pbp_b14c_bendesc_amo_mhc,
pbp_b14c_bendesc_typ_mhc,
pbp_b14c_bendesc_amo_rat,
pbp_b14c_rat_bendesc_ehc,
pbp_b14c_bendesc_amo_isa,
pbp_b14c_bendesc_ihss,
pbp_b14c_bendesc_sce,
pbp_b14c_sce_type_chk,
pbp_b14c_sce_note,
pbp_b14c_maxplan_yn,
pbp_b14c_maxplan_ehc,
pbp_b14c_maxplan_amt_mhc,
pbp_b14c_maxplan_per_mhc,
pbp_b14c_maxplan_per_mhc_d,
pbp_b14c_maxplan_amt_rat,
pbp_b14c_maxplan_per_rat,
pbp_b14c_maxplan_per_rat_d,
pbp_b14c_maxplan_amt_isa,
pbp_b14c_maxplan_per_isa,
pbp_b14c_maxplan_per_isa_d,
pbp_b14c_maxplan_amt_ihss,
pbp_b14c_maxplan_per_ihss,
pbp_b14c_maxplan_per_ihss_d,
pbp_b14c_maxplan_amt_sce,
pbp_b14c_maxplan_per_sce,
pbp_b14c_maxplan_per_sce_d,
pbp_b14c_maxenr_yn,
pbp_b14c_maxenr_ehc,
pbp_b14c_maxenr_amt_mhc,
pbp_b14c_maxenr_per_mhc,
pbp_b14c_maxenr_per_mhc_d,
pbp_b14c_maxenr_amt_rat,
pbp_b14c_maxenr_per_rat,
pbp_b14c_maxenr_per_rat_d,
pbp_b14c_maxenr_amt_isa,
pbp_b14c_maxenr_per_isa,
pbp_b14c_maxenr_per_isa_d,
pbp_b14c_maxenr_amt_ihss,
pbp_b14c_maxenr_per_ihss,
pbp_b14c_maxenr_per_ihss_d,
pbp_b14c_maxenr_amt_sce,
pbp_b14c_maxenr_per_sce,
pbp_b14c_maxenr_per_sce_d,
pbp_b14c_coins_yn,
pbp_b14c_coins_ehc,
pbp_b14c_coins_pct_min_mhc,
pbp_b14c_coins_pct_max_mhc,
pbp_b14c_coins_pct_min_rat_wp,
pbp_b14c_coins_pct_max_rat_wp,
pbp_b14c_coins_pct_min_rat_nh,
pbp_b14c_coins_pct_max_rat_nh,
pbp_b14c_coins_pct_min_isa,
pbp_b14c_coins_pct_max_isa,
pbp_b14c_coins_pct_min_ihss,
pbp_b14c_coins_pct_max_ihss,
pbp_b14c_coins_pct_min_sce,
pbp_b14c_coins_pct_max_sce,
pbp_b14c_ded_yn,
pbp_b14c_ded_amt,
pbp_b14c_copay_yn,
pbp_b14c_copay_ehc,
pbp_b14c_copay_mhc_min_amt,
pbp_b14c_copay_mhc_max_amt,
pbp_b14c_copay_rat_wp_min_amt,
pbp_b14c_copay_rat_wp_max_amt,
pbp_b14c_copay_rat_nh_min_amt,
pbp_b14c_copay_rat_nh_max_amt,
pbp_b14c_copay_isa_min_amt,
pbp_b14c_copay_isa_max_amt,
pbp_b14c_copay_min_amt_ihss,
pbp_b14c_copay_max_amt_ihss,
pbp_b14c_copay_min_amt_sce,
pbp_b14c_copay_max_amt_sce,
pbp_b14c_auth_yn,
pbp_b14c_refer_yn,

    c.pbp_c_oon_yn, pbp_c_pos_yn, 
    pbp_c_oon_outpt_maxplan_yn,pbp_c_oon_outpt_maxplan_amt,pbp_c_oon_outpt_maxplan_per,pbp_c_oon_outpt_maxplan_per_d,
    pbp_c_oon_outpt_coins_yn,pbp_c_oon_outpt_coins_min_pct,pbp_c_oon_outpt_coins_max_pct,
    pbp_c_oon_outpt_copay_yn,pbp_c_oon_outpt_copay_min_amt,pbp_c_oon_outpt_copay_max_amt,
    pbp_c_oon_outpt_ded_yn,pbp_c_oon_outpt_ded_amt,
    pbp_c_pos_yn,
    pbp_c_pos_outpt_coins_yn,pbp_c_pos_outpt_coins_min_pct,pbp_c_pos_outpt_coins_max_pct,
    pbp_c_pos_outpt_copay_yn,pbp_c_pos_outpt_copay_min_amt,pbp_c_pos_outpt_copay_max_amt,
    pbp_c_pos_outpt_maxplan_yn,pbp_c_pos_outpt_maxplan_amt,pbp_c_pos_outpt_maxplan_per,pbp_c_pos_outpt_maxplan_per_d,pbp_c_pos_outpt_deduct_yn,pbp_c_pos_outpt_deduct_amt
from 
pbp_Section_A a 
inner join pbp_Section_C c on a.bid_id = c.bid_id
inner join pbp_b14_preventive b on c.bid_id = b.bid_id
left join pbp_Section_C_OON coon on c.bid_id = coon.bid_id and concat(';', COALESCE(coon.pbp_c_oon_out_nmc_bendesc_cats, '')) like '%;14c22;%'
left join pbp_Section_C_POS cpos on c.bid_id = cpos.bid_id and concat(';', COALESCE(cpos.pbp_c_pos_outpt_nmc_bencats, '')) like '%;14c22;%'
where CAST(c.pbp_a_plan_identifier AS INT) < 800 and cast(pbp_a_eghp_yn as int) = 2  
and cast(c.PBP_A_PLAN_TYPE as int) in (1, 2, 4, 9, 29, 31, 42, 43, 44, 45 )
'''
df_14c = spark.sql(query)
write_to_csv_file(df_14c, 'MedicalBenefits_14c_DataSource')

from PBP_Benefit_Text import Benefit_14c22, Plan
df_medical_benefits_14c22 = read_pd_from_csv_file('MedicalBenefits_14c_DataSource')
df_medical_benefits_14c22['PlanYear'] = 2024
df_medical_benefits_14c22['QID'] = df_medical_benefits_14c22.apply(lambda x: Plan.get_QID(x), axis=1)
df_medical_benefits_14c22['CategoryName'] = 'Caregiver'
df_medical_benefits_14c22['ServiceName'] = ''

df_medical_benefits_14c22['INN_CostShare'] = df_medical_benefits_14c22.apply(lambda x: Benefit_14c22.get_INN_text(x), axis=1)
df_medical_benefits_14c22['OON_CostShare'] = df_medical_benefits_14c22.apply(lambda x: Benefit_14c22.get_OON_text(x), axis=1)
df_medical_benefits_14c22['Medicare.gov URL'] = df_medical_benefits_14c22.apply(lambda x: get_medicare_site_url(x.QID), axis=1)

df_medical_benefits_14c22 = explode_inn_oon_costshare(df_medical_benefits_14c22)
df_medical_benefits_14c22 = drop_pbp_mrx_columns(df_medical_benefits_14c22)
write_pd_to_csv(df_medical_benefits_14c22,  'MedicalBenefits_14c22')

In [None]:
#validate benefit 7d
df_medicare_displaying_benefits.fillna('', inplace=True)
df_medical_benefits_14c22.fillna('', inplace=True)
df_medicare_displaying_benefits['CostShare'] = df_medicare_displaying_benefits.apply(lambda x: x.CostShare.replace('(always covered)', ''), axis=1)
df_medicare_displaying_benefits['CostShare'] = df_medicare_displaying_benefits.apply(lambda x: x.CostShare.replace('(Limits apply)', ''), axis=1)
joined_df= pd.merge(df_medical_benefits_14c22, df_medicare_displaying_benefits, how='inner', on=['PlanYear','QID','CategoryName', 'ServiceName', 'Network'])
if len(joined_df) > 0:
    joined_df['Matched'] = joined_df.apply(lambda x: matched(x.CostShare_x, x.CostShare_y), axis=1)
    joined_df.to_csv(DR_TARGET_FOLDER + f'comparison_result_dev.csv', index=False)
else:
    print('no data file to compare')

In [None]:
# drop views from memory if the view exist
for t in spark.catalog.listTables():
    spark.catalog.dropTempView(t.name)