 Eligibility Mock Data - US Healthcare 101

Tab Names:
Demographics Mock Data
Enrollment Mock Data

In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/target_table",True)

Out[1]: False

In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/enrollment_mock_data_table",True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/demographics_mock_data_table",True)

Out[2]: False

Create Tables for Demographics Mock Data & Enrollment Mock Data from Excel File

In [0]:
file_location = "dbfs:/FileStore/Eligibility_Mock_Data___US_Healthcare_Bootcamp.xlsx"
sheet_names = ["'Data Dictionary'", "'Crosswalks'", "'Enrollment Mock Data'", "'Demographics Mock Data'"]

In [0]:
dfs = {}

In [0]:
from pyspark.sql.utils import AnalysisException
for sheet_name in sheet_names:
    try:
        df = spark.read.format("com.crealytics.spark.excel").option("inferschema", True).option("header", True).option("dataAddress", f"{sheet_name}!").option("sheetName", sheet_name).load(file_location)
        processed_sheet_name = sheet_name.lower().replace(" ","_").replace("'","")

        df.createOrReplaceTempView(processed_sheet_name)
        print(f"View Created For Sheet: {processed_sheet_name}")

        spark.sql(f"CREATE TABLE {processed_sheet_name}_table AS SELECT * FROM {processed_sheet_name}")
        print(f"Table Created for View: {processed_sheet_name}_table")
    except AnalysisException as e:
        print(f"Error creating table for view {processed_sheet_name}: {str(e)}")

View Created For Sheet: data_dictionary
Error creating table for view data_dictionary: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View Created For Sheet: crosswalks
Error creating table for view crosswalks: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View Created For Sheet: enrollment_mock_data
Table Created for View: enrollment_mock_data_table
View Created For Sheet: demographics_mock_data
Table Created for View: demographics_mock_data_table


Create tables for Crosswalks From Excel File

In [0]:
table_ranges = ["A2:C5", "A8:B25", "A28:B33", "A36:B43","A46:E62"]
sheet_name = "crosswalks"
for idx, table_range in enumerate(table_ranges, start=1):
    try:
        df = spark.read.format("com.crealytics.spark.excel") \
                   .option("inferschema", True) \
                   .option("header", True) \
                   .option("dataAddress", f"{sheet_name}!{table_range}") \
                   .option("sheetName", sheet_name) \
                   .load(file_location)
        processed_sheet_name = sheet_name.lower().replace(" ", "_").replace("'", "")
        table_name = f"{processed_sheet_name}_Table{idx}" 
        
        df.createOrReplaceTempView(table_name)
        print(f"Table created for range {table_range} as {table_name}")
    except AnalysisException as e:
        print(f"Error creating table for range {table_range}: {str(e)}")

Table created for range A2:C5 as crosswalks_Table1
Table created for range A8:B25 as crosswalks_Table2
Table created for range A28:B33 as crosswalks_Table3
Table created for range A36:B43 as crosswalks_Table4
Table created for range A46:E62 as crosswalks_Table5


In [0]:
%sql
select * from crosswalks_Table2;

Rollup_Code,Rollup_Description
E,Self
S,Spouse
F,Father
M,Mother
C1,Son
C2,Daughter
G1,Grand Father
G2,Grand Mother
G3,Grand Son
G4,Grand Daughter


DDL for target_table

In [0]:
%sql
CREATE TABLE IF NOT EXISTS target_table (
    Abacus_Record_ID VARCHAR(20),
    Abacus_Member_ID VARCHAR(20),
    Member_ID VARCHAR(20),
    Subscriber_ID VARCHAR(20),
    Member_First_Name VARCHAR(75),
    Member_Last_Name VARCHAR(75),
    Member_Middle_Name VARCHAR(75),
    Member_Prefix_Name VARCHAR(10),
    Member_Suffix_Name VARCHAR(10),
    Member_Gender VARCHAR(10),
    Member_Date_of_Birth DATE,
    Member_Relationship_Code VARCHAR(10),
    -- Member_Relationship_Description VARCHAR(20),
    Member_Person_Code INTEGER,
    Member_Address_Line_1 VARCHAR(100),
    Member_Address_Line_2 VARCHAR(100),
    Member_City VARCHAR(20),
    Member_State VARCHAR(20),
    Member_County VARCHAR(50),
    Member_Postal_Code VARCHAR(10),
    Member_Country VARCHAR(20),
    Member_Home_Phone INTEGER,
    Member_Work_Phone INTEGER,
    Member_Mobile_Phone INTEGER,
    Member_Email VARCHAR(100),
    Member_Is_Deceased CHAR(1),
    Member_Date_of_Death DATE,
    Member_Deceased_Reason VARCHAR(100),
    Enrollment_Group_ID VARCHAR(20),
    Enrollment_Group_Name VARCHAR(50),
    Enrollment_SubGroup_ID VARCHAR(20),
    Enrollment_SubGroup_Name VARCHAR(50),
    Enrollment_Coverage_Code VARCHAR(10),
    Enrollment_Coverage_Description VARCHAR(30),
    Enrollment_Plan_ID VARCHAR(10),
    Enrollment_Plan_Name VARCHAR(30),
    Enrollment_Plan_Coverage VARCHAR(50),
    Enrollment_Medical_Effective_Date DATE,
    Enrollment_Medical_Termination_Date DATE,
    Enrollment_Dental_Effective_Date DATE,
    Enrollment_Dental_Termination_Date DATE,
    Enrollment_Vision_Effective_Date DATE,
    Enrollment_Vision_Termination_Date DATE,
    Enrollment_Vendor_Name VARCHAR(20),
    Souce_File_Name VARCHAR(100),
    File_Ingestion_Date DATE
);


Mapping Data from Multiple Sources

In [0]:
%sql
SELECT row_number() OVER (ORDER BY 1) AS Abacus_Record_id, 
E.member_id,
E.member_status as Subscriber_id,
D.first_name AS Member_First_Name,
D.last_name AS Member_Last_Name,
coalesce(D.middle_name,'NONE') AS Member_Middle_Name,
'prefix' AS Member_Prefix_Name,
'suffix' AS Member_Suffix_Name,
Gen.Rollup_Description AS Member_Gender,
D.relationship AS Member_Relationship_Code,
date_format(D.dob, 'MM/dd/yyyy') AS Member_Date_of_Birth,
D.person_code AS Member_Person_Code,
D.address_1 AS Member_Address_Line_1,
D.address_2 AS Member_Address_Line_2,
D.city as Member_City,
D.state as Member_State,
D.county as Member_County,
"member@gmail.com" as email,
D.zip as Member_Postal_Code,
"U.S.A" as Member_Country,
E.group_id as Enrollment_Group_ID,
Grp.group_name as Enrollment_Group_Name,
E.coverage_type as Enrollment_Coverage_Code,
Cov.coverage_description as Enrollment_Coverage_Description,
E.plan_id as Enrollment_Plan_ID,
Pln.`plan name` as Enrollment_Plan_Name,
Pln.benefit_type as Enrollment_Plan_Coverage,

CASE
WHEN pln.BENEFIT_TYPE LIKE '%Medical%' THEN date_format(pln.EFFECTIVE_DATE,'MM/dd/yyyy')
ELSE "NONE"
END AS Enrollment_Medical_Effective_Date,

CASE
WHEN pln.BENEFIT_TYPE LIKE '%Medical%' THEN date_format(pln.TERMINATION_DATE,'MM/dd/yyyy')
ELSE "NONE"
END AS Enrollment_Medical_Termination_Date,

CASE 
WHEN pln.benefit_type like '%Dental%' then date_format(pln.effective_date, 'MM/dd/yyyy')
ELSE "None" 
END AS Enrollment_Dental_Effective_Date,

CASE 
WHEN pln.benefit_type like '%Dental%' then date_format(pln.termination_date, 'MM/dd/yyyy')
ELSE "None" 
END AS Enrollment_Dental_Termination_Date,

CASE 
WHEN pln.benefit_type like '%Vision%' then date_format(pln.effective_date, 'MM/dd/yyyy')
ELSE "None" 
END AS Enrollment_Vision_Effective_Date,

CASE 
WHEN pln.benefit_type like '%Vision%' then date_format(pln.termination_date, 'MM/dd/yyyy')
ELSE "None" 
END AS Enrollment_Vision_Termination_Date,

E.vendor as Enrollment_Vendor_Name,
current_timestamp() as File_Ingestion_Date,
"Member Enrolment" as Source_File_Name

from demographics_mock_data_table D
left join enrollment_mock_data_table E
on E.MEMBER_ID = D.MEMBER_ID
join crosswalks_table1  Gen on Gen.code = D.gender
join crosswalks_table3 Grp on Grp.group_id = E.GROUP_ID
join crosswalks_table4 Cov on Cov.Coverage_ID = E.COVERAGE_TYPE
join crosswalks_table5 Pln on pln.plan_id = E.PLAN_ID;

Abacus_Record_id,member_id,Subscriber_id,Member_First_Name,Member_Last_Name,Member_Middle_Name,Member_Prefix_Name,Member_Suffix_Name,Member_Gender,Member_Relationship_Code,Member_Date_of_Birth,Member_Person_Code,Member_Address_Line_1,Member_Address_Line_2,Member_City,Member_State,Member_County,email,Member_Postal_Code,Member_Country,Enrollment_Group_ID,Enrollment_Group_Name,Enrollment_Coverage_Code,Enrollment_Coverage_Description,Enrollment_Plan_ID,Enrollment_Plan_Name,Enrollment_Plan_Coverage,Enrollment_Medical_Effective_Date,Enrollment_Medical_Termination_Date,Enrollment_Dental_Effective_Date,Enrollment_Dental_Termination_Date,Enrollment_Vision_Effective_Date,Enrollment_Vision_Termination_Date,Enrollment_Vendor_Name,File_Ingestion_Date,Source_File_Name
1,181900,ACTIVE,Zain,Webb,NONE,prefix,suffix,Male,N,10/18/2004,13,203 Sporer Esplanade Unit 14,,Oxford,Massachusetts,Worcester County,member@gmail.com,,U.S.A,4,Digital Convergence Technologies,EP,Employee and Parents,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,2024-03-08T12:56:30.994+0000,Member Enrolment
2,21700,ACTIVE,Alessia,Romero,Mariel,prefix,suffix,Female,G4,07/12/1967,10,913 Schiller Well Apt 80,,Burlington,Massachusetts,Middlesex County,member@gmail.com,1803.0,U.S.A,1,Abacus Insights,EC,Employee and Childrens,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,2024-03-08T12:56:30.994+0000,Member Enrolment
3,17200,ACTIVE,Georgiana,Livingston,NONE,prefix,suffix,Female,G4,09/21/2003,10,814 Blick Avenue Unit 22,,Wayland,Massachusetts,Middlesex County,member@gmail.com,,U.S.A,4,Digital Convergence Technologies,EC,Employee and Childrens,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Cigna,2024-03-08T12:56:30.994+0000,Member Enrolment
4,83300,ACTIVE,Candice,Padilla,NONE,prefix,suffix,Female,F,12/10/1964,3,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,member@gmail.com,,U.S.A,1,Abacus Insights,ES,Employee and Spouse,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Cigna,2024-03-08T12:56:30.994+0000,Member Enrolment
5,150400,ACTIVE,Jayden,Kane,NONE,prefix,suffix,Female,F,07/08/2019,3,1054 Price Glen,,Haverhill,Massachusetts,Essex County,member@gmail.com,1835.0,U.S.A,1,Abacus Insights,E,Employee Only,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,2024-03-08T12:56:30.994+0000,Member Enrolment
6,216900,ACTIVE,Aurora,Lawson,NONE,prefix,suffix,Female,E,10/04/2012,1,194 Robel Skyway,,Boston,Massachusetts,Suffolk County,member@gmail.com,2124.0,U.S.A,4,Digital Convergence Technologies,EC,Employee and Childrens,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,2024-03-08T12:56:30.994+0000,Member Enrolment
7,145000,ACTIVE,Laura,Montoya,NONE,prefix,suffix,Female,F,03/29/2010,3,1094 Keebler Grove,,Gardner,Massachusetts,Worcester County,member@gmail.com,1440.0,U.S.A,1,Abacus Insights,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,2024-03-08T12:56:30.994+0000,Member Enrolment
8,210200,ACTIVE,Mark,Armstrong,NONE,prefix,suffix,Male,G4,03/05/2020,10,730 Bogan Row,,Danvers,Massachusetts,Essex County,member@gmail.com,,U.S.A,1,Abacus Insights,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,2024-03-08T12:56:30.994+0000,Member Enrolment
9,130400,ACTIVE,Neo,Aguirre,NONE,prefix,suffix,Male,C1,03/16/1993,5,489 Nienow Rue,,Fall River,Massachusetts,Bristol County,member@gmail.com,2790.0,U.S.A,8,TechKraft Inc,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Aetna,2024-03-08T12:56:30.994+0000,Member Enrolment
10,32700,ACTIVE,Maya,Townsend,Merilyn,prefix,suffix,Female,G2,12/30/1960,8,763 Smitham Rue,,Worthington,Massachusetts,Hampshire County,member@gmail.com,,U.S.A,1,Abacus Insights,ES,Employee and Spouse,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Aetna,2024-03-08T12:56:30.994+0000,Member Enrolment
