# Enrollment and Demographic Data Mapping with Crosswaks -- US Healthcare 101 

In [0]:
%sql
---dbfs:/FileStore/shared_uploads/satishsubedi18@gmail.com/Eligibility_Mock_Data___US_Healthcare_Bootcamp.xlsx

In [0]:
%fs 
rm -r dbfs:/user/hive/warehouse/enrollment_mock_data_table
rm -r dbfs:/user/hive/warehouse/demographics_mock_data_table


## Creating table for enrollment and demographic shell from excel file

In [0]:
sheet_names = ["'Data Dictionary'", "'Crosswalks'", "'Enrollment Mock Data'", "'Demographics Mock Data'"]
file_location = "dbfs:/FileStore/shared_uploads/satishsubedi18@gmail.com/Eligibility_Mock_Data___US_Healthcare_Bootcamp.xlsx"
 
dfs = {}

from pyspark.sql.utils import AnalysisException
for sheet_name in sheet_names:
    try:
        df = spark.read.format("com.crealytics.spark.excel") \
                   .option("inferschema", True) \
                   .option("header", True) \
                   .option("dataAddress", f"{sheet_name}!") \
                   .option("sheetName", sheet_name) \
                   .load(file_location)
        
        processed_sheet_name = sheet_name.lower().replace(" ", "_").replace("'", "")
    
        df.createOrReplaceTempView(processed_sheet_name)
        print(f"View created for sheet: {processed_sheet_name}")
        
        spark.sql(f"CREATE TABLE {processed_sheet_name}_table AS SELECT * FROM {processed_sheet_name}")
        print(f"Table created for view: {processed_sheet_name}_table")
    except AnalysisException as e:
        print(f"Error creating table for view {processed_sheet_name}: {str(e)}")



View created for sheet: data_dictionary
Error creating table for view data_dictionary: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View created for sheet: crosswalks
Error creating table for view crosswalks: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View created for sheet: enrollment_mock_data
Table created for view: enrollment_mock_data_table
View created for sheet: demographics_mock_data
Table created for view: demographics_mock_data_table


In [0]:
%fs
rm -r dbfs:/user/hive/warehouse/crosswalks_Table5



## Creating table for all the Crosswalks from excel file

In [0]:
table_ranges = ["A2:C5", "A8:B25", "A28:B33", "A36:B43","A46:E62"]
sheet_name = "crosswalks"
for idx, table_range in enumerate(table_ranges, start=1):
    try:
        df = spark.read.format("com.crealytics.spark.excel") \
                   .option("inferschema", True) \
                   .option("header", True) \
                   .option("dataAddress", f"{sheet_name}!{table_range}") \
                   .option("sheetName", sheet_name) \
                   .load(file_location)
        processed_sheet_name = sheet_name.lower().replace(" ", "_").replace("'", "")
        table_name = f"{processed_sheet_name}_Table{idx}" 
        
        df.createOrReplaceTempView(table_name)
        print(f"Table created for range {table_range} as {table_name}")
    except AnalysisException as e:
        print(f"Error creating table for range {table_range}: {str(e)}")

Table created for range A2:C5 as crosswalks_Table1
Table created for range A8:B25 as crosswalks_Table2
Table created for range A28:B33 as crosswalks_Table3
Table created for range A36:B43 as crosswalks_Table4
Table created for range A46:E62 as crosswalks_Table5


In [0]:
%sql
select * from crosswalks_Table5;

PLAN_ID,Plan Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE
0.0,Plan A,Medical,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
1.0,Plan B,Medical and Dental,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
2.0,Plan C,Medical and Vision,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
3.0,Plan D,"Medical, Dental and Vision",2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
4.0,Plan E,Medical,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
5.0,Plan F,Medical and Dental,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
6.0,Plan G,Medical and Vision,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
7.0,Plan H,"Medical, Dental and Vision",2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
8.0,Plan I,Medical,2020-01-01T00:00:00.000+0000,2020-12-31T00:00:00.000+0000
9.0,Plan J,Medical and Dental,2020-01-01T00:00:00.000+0000,2020-12-31T00:00:00.000+0000


## DDL For our targegt Table

In [0]:
%sql
CREATE TABLE IF NOT EXISTS TargetTable (
    Abacus_Record_ID VARCHAR(20),
    Abacus_Member_ID VARCHAR(20),
    Member_ID VARCHAR(20),
    Subscriber_ID VARCHAR(20),
    Member_First_Name VARCHAR(75),
    Member_Last_Name VARCHAR(75),
    Member_Middle_Name VARCHAR(75),
    Member_Prefix_Name VARCHAR(10),
    Member_Suffix_Name VARCHAR(10),
    Member_Gender VARCHAR(10),
    Member_Date_of_Birth DATE,
    Member_Relationship_Code VARCHAR(10),
    Member_Person_Code INTEGER,
    Member_Address_Line_1 VARCHAR(100),
    Member_Address_Line_2 VARCHAR(100),
    Member_City VARCHAR(20),
    Member_State VARCHAR(20),
    Member_County VARCHAR(50),
    Member_Postal_Code VARCHAR(10),
    Member_Country VARCHAR(20),
    Member_Home_Phone INTEGER,
    Member_Work_Phone INTEGER,
    Member_Mobile_Phone INTEGER,
    Member_Email VARCHAR(100),
    Member_Is_Deceased VARCHAR(10),
    Member_Date_of_Death DATE,
    Member_Deceased_Reason VARCHAR(100),
    Enrollment_Group_ID VARCHAR(20),
    Enrollment_Group_Name VARCHAR(50),
    Enrollment_SubGroup_ID VARCHAR(20),
    Enrollment_SubGroup_Name VARCHAR(50),
    Enrollment_Coverage_Code VARCHAR(10),
    Enrollment_Coverage_Description VARCHAR(30),
    Enrollment_Plan_ID VARCHAR(10),
    Enrollment_Plan_Name VARCHAR(30),
    Enrollment_Plan_Coverage VARCHAR(50),
    Enrollment_Medical_Effective_Date DATE,
    Enrollment_Medical_Termination_Date DATE,
    Enrollment_Dental_Effective_Date DATE,
    Enrollment_Dental_Termination_Date DATE,
    Enrollment_Vision_Effective_Date DATE,
    Enrollment_Vision_Termination_Date DATE,
    Enrollment_Vendor_Name VARCHAR(20),
    Souce_File_Name VARCHAR(100),
    File_Ingestion_Date DATE
);


## Data Mapping from multiple Source

In [0]:
%sql
INSERT INTO TargetTable
--Data maping from tables
select
ROW_NUMBER() OVER (order by 1) as Abacus_Record_Id,
left(concat(E.member_id,'-',DATE_FORMAT(TO_DATE(D.dob), 'dyyyyM'),'-',SUBSTR(E.member_status, 1, 1),'-',E.member_id),20) as Abacus_Member_Id,
E.member_id as Member_Id,
-- CASE 
-- WHEN current_date() > to_date(E.termination_date, 'yyyy-MM-dd') THEN 'ACTIVE'
-- ELSE "TERMED"
-- END AS Subscriber_id,
E.member_status as Subscriber_id,
D.first_name as Member_First_Name,
D.last_name as Member_Last_Name, 
coalesce(D.middle_name,'None' ) as Member_Middle_Name,
'prefix' as Member_Prefix_Name,
'suffix' as Member_Suffix_Name,
Gen.Rollup_Description as Member_Gender,
date_format(D.dob,'MM/dd/yyyy') as Member_Date_of_Birth,
D.relationship as Member_Relationship_Code,
D.person_code as Member_Person_Code,
D.address_1 as Member_Address_Line_1,
D.address_2 as Member_Address_Line_2,
D.city as Member_City,
D.state as Member_State,
D.county as Member_County,
D.zip as Member_Postal_Code,
"U.S.A" as Member_Country,
000 Member_Home_Phone,
000 Member_Work_Phone,
000 Member_Mobile_Phone,
"member@gmail.com" as Member_Email,
'None' as  Member_Is_Deceased, 
null as Member_Date_of_Death,
'None' as Member_Deceased_Reason,
E.group_id as Enrollment_Group_ID,
Grp.group_name as Enrollment_Group_Name,
'None' as Enrollment_SubGroup_ID,
'None' Enrollment_SubGroup_Name,
E.coverage_type as Enrollment_Coverage_Code,
Cov.coverage_description as Enrollment_Coverage_Description,
E.plan_id as Enrollment_Plan_ID,
Pln.`plan name` as Enrollment_Plan_Name,
Pln.benefit_type as Enrollment_Plan_Coverage,

CASE 
WHEN Pln.benefit_type like '%Medical%' then date_format(Pln.effective_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Medical_Effective_Date,

CASE 
WHEN Pln.benefit_type like '%Medical%' then date_format(Pln.termination_date, 'MM/dd/yyyy')
ELSE NULL
END AS Enrollment_Medical_Termination_Date,

CASE 
WHEN Pln.benefit_type like '%Dental%' then date_format(Pln.effective_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Dental_Effective_Date,

CASE 
WHEN Pln.benefit_type like '%Dental%' then date_format(Pln.termination_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Dental_Termination_Date,

CASE 
WHEN Pln.benefit_type like '%Vision%' then date_format(Pln.effective_date, 'MM/dd/yyyy')
ELSE NULL
END AS Enrollment_Vision_Effective_Date,

CASE 
WHEN Pln.benefit_type like '%Vision%' then date_format(Pln.termination_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Vision_Termination_Date,

E.vendor as Enrollment_Vendor_Name,
"Member Enrolment" as Source_File_Name,
current_timestamp() as File_Ingestion_Date

from demographics_mock_data_table D
left join enrollment_mock_data_table E
on E.MEMBER_ID = D.MEMBER_ID
join crosswalks_table1  Gen on Gen.code = D.gender
join crosswalks_table3 Grp on Grp.group_id = E.GROUP_ID
join crosswalks_table4 Cov on Cov.Coverage_ID = E.COVERAGE_TYPE
join crosswalks_table5 Pln on Pln.plan_id = E.PLAN_ID;

num_affected_rows,num_inserted_rows
107,107


In [0]:
%sql
select * from TargetTable;

Abacus_Record_ID,Abacus_Member_ID,Member_ID,Subscriber_ID,Member_First_Name,Member_Last_Name,Member_Middle_Name,Member_Prefix_Name,Member_Suffix_Name,Member_Gender,Member_Date_of_Birth,Member_Relationship_Code,Member_Person_Code,Member_Address_Line_1,Member_Address_Line_2,Member_City,Member_State,Member_County,Member_Postal_Code,Member_Country,Member_Home_Phone,Member_Work_Phone,Member_Mobile_Phone,Member_Email,Member_Is_Deceased,Member_Date_of_Death,Member_Deceased_Reason,Enrollment_Group_ID,Enrollment_Group_Name,Enrollment_SubGroup_ID,Enrollment_SubGroup_Name,Enrollment_Coverage_Code,Enrollment_Coverage_Description,Enrollment_Plan_ID,Enrollment_Plan_Name,Enrollment_Plan_Coverage,Enrollment_Medical_Effective_Date,Enrollment_Medical_Termination_Date,Enrollment_Dental_Effective_Date,Enrollment_Dental_Termination_Date,Enrollment_Vision_Effective_Date,Enrollment_Vision_Termination_Date,Enrollment_Vendor_Name,Souce_File_Name,File_Ingestion_Date
1,181900-18200410-A-18,181900,ACTIVE,Zain,Webb,,prefix,suffix,Male,10/18/2004,N,13,203 Sporer Esplanade Unit 14,,Oxford,Massachusetts,Worcester County,,U.S.A,0,0,0,member@gmail.com,,,,4,Digital Convergence Technologies,,,EP,Employee and Parents,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,Member Enrolment,2024-03-08
2,21700-1219677-A-2170,21700,ACTIVE,Alessia,Romero,Mariel,prefix,suffix,Female,07/12/1967,G4,10,913 Schiller Well Apt 80,,Burlington,Massachusetts,Middlesex County,1803.0,U.S.A,0,0,0,member@gmail.com,,,,1,Abacus Insights,,,EC,Employee and Childrens,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,Member Enrolment,2024-03-08
3,17200-2120039-A-1720,17200,ACTIVE,Georgiana,Livingston,,prefix,suffix,Female,09/21/2003,G4,10,814 Blick Avenue Unit 22,,Wayland,Massachusetts,Middlesex County,,U.S.A,0,0,0,member@gmail.com,,,,4,Digital Convergence Technologies,,,EC,Employee and Childrens,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Cigna,Member Enrolment,2024-03-08
4,83300-10196412-A-833,83300,ACTIVE,Candice,Padilla,,prefix,suffix,Female,12/10/1964,F,3,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,U.S.A,0,0,0,member@gmail.com,,,,1,Abacus Insights,,,ES,Employee and Spouse,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Cigna,Member Enrolment,2024-03-08
5,150400-820197-A-1504,150400,ACTIVE,Jayden,Kane,,prefix,suffix,Female,07/08/2019,F,3,1054 Price Glen,,Haverhill,Massachusetts,Essex County,1835.0,U.S.A,0,0,0,member@gmail.com,,,,1,Abacus Insights,,,E,Employee Only,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,Member Enrolment,2024-03-08
6,216900-4201210-A-216,216900,ACTIVE,Aurora,Lawson,,prefix,suffix,Female,10/04/2012,E,1,194 Robel Skyway,,Boston,Massachusetts,Suffolk County,2124.0,U.S.A,0,0,0,member@gmail.com,,,,4,Digital Convergence Technologies,,,EC,Employee and Childrens,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,Member Enrolment,2024-03-08
7,145000-2920103-A-145,145000,ACTIVE,Laura,Montoya,,prefix,suffix,Female,03/29/2010,F,3,1094 Keebler Grove,,Gardner,Massachusetts,Worcester County,1440.0,U.S.A,0,0,0,member@gmail.com,,,,1,Abacus Insights,,,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,Member Enrolment,2024-03-08
8,210200-520203-A-2102,210200,ACTIVE,Mark,Armstrong,,prefix,suffix,Male,03/05/2020,G4,10,730 Bogan Row,,Danvers,Massachusetts,Essex County,,U.S.A,0,0,0,member@gmail.com,,,,1,Abacus Insights,,,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,Member Enrolment,2024-03-08
9,130400-1619933-A-130,130400,ACTIVE,Neo,Aguirre,,prefix,suffix,Male,03/16/1993,C1,5,489 Nienow Rue,,Fall River,Massachusetts,Bristol County,2790.0,U.S.A,0,0,0,member@gmail.com,,,,8,TechKraft Inc,,,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Aetna,Member Enrolment,2024-03-08
10,32700-30196012-A-327,32700,ACTIVE,Maya,Townsend,Merilyn,prefix,suffix,Female,12/30/1960,G2,8,763 Smitham Rue,,Worthington,Massachusetts,Hampshire County,,U.S.A,0,0,0,member@gmail.com,,,,1,Abacus Insights,,,ES,Employee and Spouse,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Aetna,Member Enrolment,2024-03-08
