In [1]:
import pandas as pd
import pyodbc
import os
import re

In [2]:
PLAN_YEAR = 2024
DR_TARGET_FOLDER = 'PBP_Benefits_2024_Results/'
BENEFIT_DATA_FILE = f'MedicareBenefits'
COMPARISON_RESULT_FILE = 'comparison_result'
# MEDICARE_CRAWLED_DATA = 'MedicareCrawledData'
MEDICARE_CRAWLED_DATA = 'MedicalBenefits_PBPJSON_20240521'

In [3]:
def read_pd_from_csv_file(file_name):
    return pd.read_csv(DR_TARGET_FOLDER + file_name + '.csv')

def write_pd_to_csv(df, file_name):
    df.to_csv(DR_TARGET_FOLDER + file_name + '.csv', index=False)
    
def get_medicare_site_url(qid):
    contractid = qid[:5]
    planid = qid[5:8]
    segmentid = qid[8:]
    return f'https://www.medicare.gov/plan-compare/#/plan-details/{PLAN_YEAR}-{contractid}-{planid}-{int(segmentid)}?year={PLAN_YEAR}&lang=en#benefits'

def run_query_in_db(query, database):
    # Create a connection to the database
    SERVER = 'docurobot-dev.cmba97i2bfdq.us-east-1.rds.amazonaws.com'
    DATABASE =  database
    USERNAME = 'alee'
    PASSWORD = 'P@$$w0rd'
    connectionString = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}'

    conn = pyodbc.connect(connectionString)
    return pd.read_sql_query(query, conn)

In [4]:
'''
Matching DF for DR..MedicalBenefits
'''
medicare_crawled_data_file_path = f'{DR_TARGET_FOLDER}{MEDICARE_CRAWLED_DATA}.csv'

if os.path.exists(medicare_crawled_data_file_path):
  df_medicare_displaying_benefits = read_pd_from_csv_file(MEDICARE_CRAWLED_DATA)
else:
    
  # Mediare crawled data
  medicare_data_query = '''
    select b.PlanYear, BF.QID, CategoryName, ServiceName, 
    trim(ServiceType) as Network, costShare as CostShare
    from UIPath..Benefits bf 
    inner join UIPath..Batches b on bf.batchID = b.batchID and b.planyear = bf.planYear
    inner join DocuRobot..vv_plans p on p.qid = bf.qid and p.planyear = b.planyear
    inner join DocuRobot..ApplicationPlan ap on ap.planid = p.planid
    left join DocuRobot..ExcludedPlans er on ap.applicationID = er.applicationID and er.planID = ap.planID
    where b.planYear = 2024 
    and ap.applicationID = 183 and er.planID is null and p.carrierID > 0
    and bf.batchID = 252
    group by b.PlanYear, BF.QID, CategoryName, serviceName, ServiceType, costShare
  '''

  medicare_data_query = '''
    select b.PlanYear, BF.QID, p.PlanName, p.MedicalPlanType as PlanType, CategoryName, ServiceName, 
    trim(ServiceType) as Network, costShare as CostShare
    from UIPath..Benefits bf 
    inner join UIPath..Batches b on bf.batchID = b.batchID and b.planyear = bf.planYear
    inner join DocuRobot..vv_plans p on p.qid = bf.qid and p.planyear = b.planyear
    where b.planYear = 2024 
    and bf.batchID = 252
    group by b.PlanYear, BF.QID, CategoryName, serviceName, ServiceType, costShare, p.PlanName, p.MedicalPlanType
  '''

  df_medicare_displaying_benefits = run_query_in_db(medicare_data_query, 'Docurobot')
  write_pd_to_csv(df_medicare_displaying_benefits, MEDICARE_CRAWLED_DATA)

#utility method to compare
def purify_cost_share(benefit):
  if benefit is not None:
    benefit = benefit.strip().lower().replace('not covered','not applicable')
    benefit = benefit.strip().lower().replace(' or ', ', ').replace('<br/>', '').replace('<br />', '').replace('\r','')
    benefit = benefit.strip().lower().replace('(limits apply)','').replace('(always covered)','')
    benefit = benefit.replace(' per item', '').replace('(always covered)','')
    benefit = re.sub('maximum \d+ (other|(every (year|\d? years)))', '', benefit)
    if benefit.strip() == f"$0 copay, 0% coinsurance":
      benefit = '$0 copay'
    if benefit.strip() == f'0% coinsurance':
      benefit = '$0 copay'
  return benefit
#utility method to compare
def matched(benefit1, benefit2):
  if benefit1 is not None and benefit2 is not None:
    benefit1 = purify_cost_share(benefit1)
    benefit2 = purify_cost_share(benefit2)    
      
    benefit1 = benefit1.replace('.', '').replace('$', '').replace('%', '').replace(' ', '').replace(',', '')
    benefit2 = benefit2.replace('.', '').replace('$', '').replace('%', '').replace(' ', '').replace(',', '')
    return benefit1 == benefit2
  return False

In [5]:
pandas_df_medicalbenefits = read_pd_from_csv_file(BENEFIT_DATA_FILE)

In [6]:
# prepare medicare data to compare
df_medicare_displaying_benefits.fillna('', inplace=True)
#Excluded Limits apply and Always covered
pandas_df_medicalbenefits.fillna('', inplace=True)
pandas_df_medicalbenefits['CostShare'] = pandas_df_medicalbenefits.apply(lambda x: x.CostShare.replace('(Limits apply)', ''), axis=1)

In [7]:
#validate benefit 
# pandas_df_medicalbenefits = pandas_df_medicalbenefits[(pandas_df_medicalbenefits.ServiceName == 'Maximum you pay for health services') & (pandas_df_medicalbenefits.QID == 'H0074004000')]
# pandas_df_medicalbenefits = pandas_df_medicalbenefits[(pandas_df_medicalbenefits.QID =='H0028007000') & (pandas_df_medicalbenefits.ServiceName == 'Hearing aids - all types')]
joined_df= pd.merge(pandas_df_medicalbenefits, df_medicare_displaying_benefits, how='inner', on=['PlanYear','QID', 'CategoryName', 'ServiceName', 'Network'])
joined_df['Matched'] = joined_df.apply(lambda x: matched(x.CostShare_x, x.CostShare_y), axis=1)
joined_df['Medicare.gov URL'] = joined_df.apply(lambda x: get_medicare_site_url(x.QID), axis=1)

write_pd_to_csv(joined_df, COMPARISON_RESULT_FILE)