In [1]:
import pandas as pd
import pyodbc
import os
import re
import urllib
import numpy as np
from sqlalchemy import create_engine

In [2]:
PLAN_YEAR = 2025
CATASTROPHIC_LIMIT = 2000
DR_TARGET_FOLDER = 'PBP_Benefits_2025_Results/'
PLAN_DATA_FILE = f'Plans'
BENEFIT_DATA_FILE = f'MedicareBenefits'
PLAN_COVERAGE_DATA_FILE = f'PlanCoverage'

SQL_CONNECTION_PARAMETERS = {    
    "SERVER": 'docurobot-dev.cmba97i2bfdq.us-east-1.rds.amazonaws.com',
    "DATABASE":  'PBP_2025',
    "USERNAME": 'alee',
    "PASSWORD": 'P@$$w0rd'
}

In [3]:
def read_pd_from_csv_file(file_path):
    return pd.read_csv(file_path + '.csv')

def get_db_connection():
    connectionString = f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SQL_CONNECTION_PARAMETERS['SERVER']};DATABASE={SQL_CONNECTION_PARAMETERS['DATABASE']};UID={SQL_CONNECTION_PARAMETERS['USERNAME']};PWD={SQL_CONNECTION_PARAMETERS['PASSWORD']}"
    conn = pyodbc.connect(connectionString)
    return conn

def get_db_engine():
    # Define your SQLAlchemy engine (adjust server and database names)
    engine = create_engine(
        f"mssql+pyodbc://{SQL_CONNECTION_PARAMETERS['USERNAME']}:{urllib.parse.quote(SQL_CONNECTION_PARAMETERS['PASSWORD'])}@{SQL_CONNECTION_PARAMETERS['SERVER']}/{SQL_CONNECTION_PARAMETERS['DATABASE']}?driver=SQL Server Native Client 11.0"
    )
    return engine

In [4]:

df_plans =  pd.read_csv(os.path.join(DR_TARGET_FOLDER, PLAN_DATA_FILE) + '.csv', dtype={"PlanID": str, "SegmentID": str})
df_medicalbenefits = read_pd_from_csv_file(os.path.join(DR_TARGET_FOLDER, BENEFIT_DATA_FILE))
df_plancoverages = pd.read_csv(os.path.join(DR_TARGET_FOLDER, PLAN_COVERAGE_DATA_FILE) + '.csv', dtype={"PlanID": str, "SegmentID": str, "SSAFIPS": str, "CountyFIPS": str})

In [5]:
def get_plan_type_code(plantype):
    if plantype == 'PDP':
        return 10
    if plantype == 'MA':
        return 20
    return 30

def get_snp_ind(SNP_type):
    if SNP_type == 'I-SNP':
        return 3    
    if SNP_type == 'D-SNP':
        return 2
    if SNP_type == 'C-SNP':
        return 1
    return np.nan

def get_medical_plan_type_code(medical_plan_type):
	'''
	Source: hpms.dbo.f_getMedicalPlanTypeCode
	'''
	if medical_plan_type is not None:
		if medical_plan_type == 'HMO':return 10
		if medical_plan_type == 'HMOPOS': return 120 #
		if medical_plan_type == 'Local PPO': return 140 #
		if medical_plan_type == 'PSO (State License)': return 230 ##??
		if medical_plan_type == 'MSA': return 110 ##??
		if medical_plan_type == 'RFB PFFS': return 240 ##??
		if medical_plan_type == 'PFFS': return 220 #
		if medical_plan_type == '1876 Cost': return 80 ##?? cost plan?
		if medical_plan_type == 'HCPP - 1833 Cost': return 80 ##?? cost plan?
		if medical_plan_type == 'National Pace': return 90 ##??
		if medical_plan_type == 'Medicare Prescription Drug Plan': return 190 # # PDP
		if medical_plan_type == 'Employer/Union Only Direct Contract PDP': return 190#  # PDP
		if medical_plan_type == 'Regional PPO': return 130 # # RPPO
		if medical_plan_type == 'RPPO': return 130 # # RPPO
		if medical_plan_type == 'Fallback': return 250 ##??
		if medical_plan_type == 'Employer/Union Only Direct Contract PFFS': return 70 ## PFFS
		if medical_plan_type == 'RFB HMO': return 260 #
		if medical_plan_type == 'RFB HMOPOS': return 270 #
		if medical_plan_type == 'RFB Local PPO': return 280 #
		if medical_plan_type == 'RFB PSO (State License)': return 290 #
		if medical_plan_type == 'Employer Direct PPO': return 300 #
		if medical_plan_type == 'MMP HMO': return 100 #
		if medical_plan_type == 'MMP HMOPOS': return 100 #
	return 0 #'unknown'

In [6]:
# convert text to DR DB Type code
df_plans['PlanID'] = df_plans['PlanID'].apply(lambda x: ('00' + x)[-3:])
df_plans['SegmentID'] = df_plans['SegmentID'].apply(lambda x: ('00' + x)[-3:])
df_plans['PlanTypeCode'] = df_plans['PlanType'].apply(lambda x: get_plan_type_code(x))
df_plans['SNPIND'] = df_plans['SNPType'].apply(lambda x: get_snp_ind(x))
df_plans['CatastrophicLimit'] = df_plans['PlanType'].apply(lambda x: np.nan if x == 'MA' else CATASTROPHIC_LIMIT)
df_plans['MedicalPlanTypeCode'] = df_plans['MedicalPlanType'].apply(lambda x: get_medical_plan_type_code(x))

In [7]:
# upload datasets
uploading_qids = [f"'{qid}'" for qid in df_plans['QID'].drop_duplicates().to_list()]
delete_plans_query = f'''
delete from Plans where planyear = {PLAN_YEAR} and QID in ({','.join(uploading_qids)});
delete from MedicalBenefits where planyear = {PLAN_YEAR} and QID in ({','.join(uploading_qids)});
delete from PlanCoverages where planyear = {PLAN_YEAR} and QID in ({','.join(uploading_qids)});
'''
cur = get_db_connection().cursor()
cur.execute(delete_plans_query)
cur.commit()

In [8]:
df_plans.to_sql(name="Plans", index=False, con=get_db_engine(), if_exists='append')
df_medicalbenefits.to_sql(name="MedicalBenefits", index=False, con=get_db_engine(), if_exists='append')
df_plancoverages.to_sql(name="PlanCoverages", index=False, con=get_db_engine(), if_exists='append')

152