In [0]:
sheet_names = ["'Data Dictionary'", "'Crosswalks'", "'Enrollment Mock Data'", "'Demographics Mock Data'"]
file_location = "dbfs:/FileStore/shared_uploads/subedisonu03@gmail.com/Eligibility_Mock_Data___US_Healthcare_Bootcamp-2.xlsx" 
dfs = {}

from pyspark.sql.utils import AnalysisException
for sheet_name in sheet_names:
    try:
        df = spark.read.format("com.crealytics.spark.excel") \
                   .option("inferschema", True) \
                   .option("header", True) \
                   .option("dataAddress", f"{sheet_name}!") \
                   .option("sheetName", sheet_name) \
                   .load(file_location)
        
        processed_sheet_name = sheet_name.lower().replace(" ", "_").replace("'", "")
    
        df.createOrReplaceTempView(processed_sheet_name)
        print(f"View created for sheet: {processed_sheet_name}")
        
        spark.sql(f"CREATE TABLE {processed_sheet_name}_table AS SELECT * FROM {processed_sheet_name}")
        print(f"Table created for view: {processed_sheet_name}_table")
    except AnalysisException as e:
        print(f"Error creating table for view {processed_sheet_name}: {str(e)}")

View created for sheet: data_dictionary
Error creating table for view data_dictionary: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View created for sheet: crosswalks
Error creating table for view crosswalks: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View created for sheet: enrollment_mock_data
Error creating table for view enrollment_mock_data: [TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create table or view `default`.`enrollment_mock_data_table` because it already exists.
Choose a different name, drop or replace the existing object, add the IF NOT EXISTS clause to tolerate pre-existing objects, or add the OR REFRESH clause to refresh the existing streaming table.
View created for sheet: demographics_mock_data
Error creating table for view demographics_mock_data: [TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create table or view `defa

Creating table for demographics_mock_data

In [0]:
spark.sql("""
    CREATE or REPLACE TABLE demographics_mock_data_table 
    AS
    SELECT *
    FROM demographics_mock_data
    """)

Out[63]: DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

Creating table for enrollment_mock_data

In [0]:
spark.sql("""
    CREATE or REPLACE TABLE enrollment_mock_data_table 
    AS
    SELECT *
    FROM enrollment_mock_data
""")


Out[64]: DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

Creating table for all the Crosswalks from excel file

In [0]:
table_ranges = ["A2:C5", "A8:B25", "A28:B33", "A36:B43","A46:E62"]
sheet_name = "crosswalks"
for idx, table_range in enumerate(table_ranges, start=1):
    try:
        df = spark.read.format("com.crealytics.spark.excel") \
                   .option("inferschema", True) \
                   .option("header", True) \
                   .option("dataAddress", f"{sheet_name}!{table_range}") \
                   .option("sheetName", sheet_name) \
                   .load(file_location)
        processed_sheet_name = sheet_name.lower().replace(" ", "_").replace("'", "")
        table_name = f"{processed_sheet_name}_Table{idx}" 
        
        df.createOrReplaceTempView(table_name)
        print(f"Table created for range {table_range} as {table_name}")
    except AnalysisException as e:
        print(f"Error creating table for range {table_range}: {str(e)}")

Table created for range A2:C5 as crosswalks_Table1
Table created for range A8:B25 as crosswalks_Table2
Table created for range A28:B33 as crosswalks_Table3
Table created for range A36:B43 as crosswalks_Table4
Table created for range A46:E62 as crosswalks_Table5


In [0]:
%sql
select * from crosswalks_Table4;

Coverage_ID,Coverage_Description
E,Employee Only
ES,Employee and Spouse
F,Family
E1C,Employee and 1 Child
EC,Employee and Childrens
EP,Employee and Parents
U,Unknown


In [0]:
%sql
select * from crosswalks_Table5; 

PLAN_ID,Plan Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE
0.0,Plan A,Medical,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
1.0,Plan B,Medical and Dental,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
2.0,Plan C,Medical and Vision,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
3.0,Plan D,"Medical, Dental and Vision",2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
4.0,Plan E,Medical,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
5.0,Plan F,Medical and Dental,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
6.0,Plan G,Medical and Vision,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
7.0,Plan H,"Medical, Dental and Vision",2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
8.0,Plan I,Medical,2020-01-01T00:00:00.000+0000,2020-12-31T00:00:00.000+0000
9.0,Plan J,Medical and Dental,2020-01-01T00:00:00.000+0000,2020-12-31T00:00:00.000+0000


Schema of Target Table

In [0]:

%sql
CREATE TABLE IF NOT EXISTS TargetTable (
    Abacus_Record_ID STRING, 
    Abacus_Member_ID STRING, 
    Member_ID STRING, 
    Subscriber_ID STRING,
    Member_First_Name STRING,
    Member_Last_Name STRING,
    Member_Middle_Name STRING, 
    Member_Prefix_Name STRING,
    Member_Suffix_Name STRING, 
    Member_Gender STRING, 
    Member_Date_of_Birth STRING,
    Member_Relationship_Code STRING,
    Member_Relationship_Description STRING,
    Member_Person_Code INT, 
    Member_Address_Line_1 STRING,
    Member_Address_Line_2 STRING,
    Member_City STRING,
    Member_State STRING,
    Member_County STRING,
    Member_Postal_Code STRING,
    Member_Country_Code STRING,
    Member_Home_Phone INT,
    Member_Work_Phone INT,
    Member_Mobile_Phone INT,
    Member_Email STRING, --
    Member_Is_Deceased BOOLEAN, 
    Member_Date_of_Death String,
    Member_Deceased_Reason STRING,
    Enrollment_Group_ID STRING,
    Enrollment_Group_Name STRING, 
    Enrollment_SubGroup_ID STRING,
    Enrollment_SubGroup_Name STRING,
    Enrollment_Coverage_Code STRING,
    Enrollment_Coverage_Description STRING,
    Enrollment_Plan_ID STRING,
    Enrollment_Plan_Name STRING,
    Enrollment_Plan_Coverage STRING, 
    Enrollment_Medical_Effective_Date STRING, 
    Enrollment_Medical_Termination_Date STRING, 
    Enrollment_Dental_Effective_Date STRING, 
    Enrollment_Dental_Termination_Date STRING,
    Enrollment_Vision_Effective_Date STRING, 
    Enrollment_Vision_Termination_Date STRING,
    Enrollment_Vendor_Name STRING,
    Source_File_Name STRING,
    File_Ingestion_Date DATE 
)

In [0]:
%sql
WITH MedicalTemp AS (
    SELECT
        plan_id,
        CASE 
            WHEN Pln.benefit_type LIKE '%Medical%' THEN date_format(Pln.effective_date, 'MM/dd/yyyy')
            ELSE 'None'
        END AS Enrollment_Medical_Effective_Date,
        CASE 
            WHEN Pln.benefit_type LIKE '%Medical%' THEN date_format(Pln.termination_date, 'MM/dd/yyyy')
            ELSE 'None'
        END AS Enrollment_Medical_Termination_Date,
        CASE 
            WHEN Pln.benefit_type LIKE '%Dental%' THEN date_format(Pln.effective_date, 'MM/dd/yyyy')
            ELSE 'None'
        END AS Enrollment_Dental_Effective_Date,
        CASE 
            WHEN Pln.benefit_type LIKE '%Dental%' THEN date_format(Pln.termination_date, 'MM/dd/yyyy')
            ELSE 'None'
        END AS Enrollment_Dental_Termination_Date,
        CASE 
            WHEN Pln.benefit_type LIKE '%Vision%' THEN date_format(Pln.effective_date, 'MM/dd/yyyy')
            ELSE 'None'
        END AS Enrollment_Vision_Effective_Date,
        CASE 
            WHEN Pln.benefit_type LIKE '%Vision%' THEN date_format(Pln.termination_date, 'MM/dd/yyyy')
            ELSE 'None'
        END AS Enrollment_Vision_Termination_Date
    FROM
        crosswalks_Table5 Pln
)

INSERT INTO TargetTable
SELECT
    ROW_NUMBER() OVER (ORDER BY 1) AS Abacus_Record_Id,
    LEFT(
        CONCAT(
            SUBSTR(E.member_id, 1, 2),
            '',
            DATE_FORMAT(TO_DATE(D.dob), 'dyyyyM'),
            '',
            SUBSTR(E.member_status, 1, 1),
            '',
            E.member_id
        ),
        20
    ) AS Abacus_Member_Id,
    E.member_id AS Member_Id,
    E.member_status AS Subscriber_id,
    D.first_name AS Member_First_Name,
    D.last_name AS Member_Last_Name,
    COALESCE(D.middle_name, '') AS Member_Middle_Name,
    'prefix' AS Member_Prefix_Name,
    'suffix' AS Member_Suffix_Name,
    Gen.Rollup_Description AS gender,
    DATE_FORMAT(D.dob, 'MM/dd/yyyy') AS Member_Date_of_Birth,
    D.relationship AS Member_Relationship_Code,
    0 AS Member_Relationship_Description,
    D.person_code AS Member_Person_Code,
    D.address_1 AS Member_Address_Line_1,
    D.address_2 AS Member_Address_Line_2,
    D.city AS Member_City,
    D.state AS Member_State,
    D.county AS Member_County,
    D.zip AS Member_Postal_Code,
    'U.S' AS Member_Country_Code,
    0 AS Member_Home_Phone,
    0 AS Member_Work_Phone,
    0 AS Member_Mobile_Phone,
    '' AS Member_Email,
    0 AS Member_Is_Deceased,
    NULL AS Member_Date_of_Death,
    0 AS Member_Deceased_Reason,
    E.group_id AS Enrollment_Group_ID,
    Grp.group_name AS Enrollment_Group_Name,
    0 AS Enrollment_SubGroup_ID,
    '' AS Enrollment_SubGroup_Name,
    E.coverage_type AS Enrollment_Coverage_Code,
    Cov.coverage_description AS Enrollment_Coverage_Description,
    E.plan_id AS Enrollment_Plan_ID,
    Pln.`plan name` AS Enrollment_Plan_Name,
    Pln.benefit_type AS Enrollment_Plan_Coverage,
    med.Enrollment_Medical_Effective_Date,
    med.Enrollment_Medical_Termination_Date,
    med.Enrollment_Dental_Effective_Date,
    med.Enrollment_Dental_Termination_Date,
    med.Enrollment_Vision_Effective_Date,
    med.Enrollment_Vision_Termination_Date,
    E.vendor AS Enrollment_Vendor_Name,
    'Member_Enrolment' AS Source_File_Name,
    CURRENT_TIMESTAMP() AS File_Ingestion_Date
FROM
    demographics_mock_data_table D
LEFT JOIN
    enrollment_mock_data_table E ON E.MEMBER_ID = D.MEMBER_ID
JOIN
    crosswalks_Table1 Gen ON Gen.code = D.gender
JOIN
    crosswalks_Table3 Grp ON Grp.group_id = E.GROUP_ID
JOIN
    crosswalks_Table4 Cov ON Cov.Coverage_ID = E.COVERAGE_TYPE
JOIN
    crosswalks_Table5 Pln ON Pln.plan_id = E.PLAN_ID
JOIN
    MedicalTemp med ON med.plan_id = Pln.plan_id;


num_affected_rows,num_inserted_rows
107,107


In [0]:
%sql
select * from TargetTable;


Abacus_Record_ID,Abacus_Member_ID,Member_ID,Subscriber_ID,Member_First_Name,Member_Last_Name,Member_Middle_Name,Member_Prefix_Name,Member_Suffix_Name,Member_Gender,Member_Date_of_Birth,Member_Relationship_Code,Member_Relationship_Description,Member_Person_Code,Member_Address_Line_1,Member_Address_Line_2,Member_City,Member_State,Member_County,Member_Postal_Code,Member_Country_Code,Member_Home_Phone,Member_Work_Phone,Member_Mobile_Phone,Member_Email,Member_Is_Deceased,Member_Date_of_Death,Member_Deceased_Reason,Enrollment_Group_ID,Enrollment_Group_Name,Enrollment_SubGroup_ID,Enrollment_SubGroup_Name,Enrollment_Coverage_Code,Enrollment_Coverage_Description,Enrollment_Plan_ID,Enrollment_Plan_Name,Enrollment_Plan_Coverage,Enrollment_Medical_Effective_Date,Enrollment_Medical_Termination_Date,Enrollment_Dental_Effective_Date,Enrollment_Dental_Termination_Date,Enrollment_Vision_Effective_Date,Enrollment_Vision_Termination_Date,Enrollment_Vendor_Name,Source_File_Name,File_Ingestion_Date
1,15820197A150400,150400,ACTIVE,Jayden,Kane,,prefix,suffix,Female,07/08/2019,F,0,3,1054 Price Glen,,Haverhill,Massachusetts,Essex County,1835.0,U.S,0,0,0,,False,,0,1,Abacus Insights,0,,E,Employee Only,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,Member_Enrolment,2024-03-10
2,8310196412A83300,83300,ACTIVE,Candice,Padilla,,prefix,suffix,Female,12/10/1964,F,0,3,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,U.S,0,0,0,,False,,0,1,Abacus Insights,0,,ES,Employee and Spouse,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Cigna,Member_Enrolment,2024-03-10
3,172120039A17200,17200,ACTIVE,Georgiana,Livingston,,prefix,suffix,Female,09/21/2003,G4,0,10,814 Blick Avenue Unit 22,,Wayland,Massachusetts,Middlesex County,,U.S,0,0,0,,False,,0,4,Digital Convergence Technologies,0,,EC,Employee and Childrens,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Cigna,Member_Enrolment,2024-03-10
4,211219677A21700,21700,ACTIVE,Alessia,Romero,Mariel,prefix,suffix,Female,07/12/1967,G4,0,10,913 Schiller Well Apt 80,,Burlington,Massachusetts,Middlesex County,1803.0,U.S,0,0,0,,False,,0,1,Abacus Insights,0,,EC,Employee and Childrens,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,Member_Enrolment,2024-03-10
5,1818200410A181900,181900,ACTIVE,Zain,Webb,,prefix,suffix,Male,10/18/2004,N,0,13,203 Sporer Esplanade Unit 14,,Oxford,Massachusetts,Worcester County,,U.S,0,0,0,,False,,0,4,Digital Convergence Technologies,0,,EP,Employee and Parents,0.0,Plan A,Medical,01/01/2018,12/31/2018,,,,,Aetna,Member_Enrolment,2024-03-10
6,22920195A22900,22900,ACTIVE,Pauline,Knight,,prefix,suffix,Female,05/09/2019,N,0,13,376 Skiles Forge Suite 48,,Westminster,Massachusetts,Worcester County,,U.S,0,0,0,,False,,0,1,Abacus Insights,0,,E,Employee Only,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Aetna,Member_Enrolment,2024-03-10
7,161019604A160400,160400,ACTIVE,Isabel,Ballard,,prefix,suffix,Female,04/10/1960,N,0,13,330 Hermiston Trafficway,,Westborough,Massachusetts,Worcester County,,U.S,0,0,0,,False,,0,8,TechKraft Inc,0,,E,Employee Only,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,Member_Enrolment,2024-03-10
8,3230196012A32700,32700,ACTIVE,Maya,Townsend,Merilyn,prefix,suffix,Female,12/30/1960,G2,0,8,763 Smitham Rue,,Worthington,Massachusetts,Hampshire County,,U.S,0,0,0,,False,,0,1,Abacus Insights,0,,ES,Employee and Spouse,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Aetna,Member_Enrolment,2024-03-10
9,142920103A145000,145000,ACTIVE,Laura,Montoya,,prefix,suffix,Female,03/29/2010,F,0,3,1094 Keebler Grove,,Gardner,Massachusetts,Worcester County,1440.0,U.S,0,0,0,,False,,0,1,Abacus Insights,0,,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,Member_Enrolment,2024-03-10
10,21520203A210200,210200,ACTIVE,Mark,Armstrong,,prefix,suffix,Male,03/05/2020,G4,0,10,730 Bogan Row,,Danvers,Massachusetts,Essex County,,U.S,0,0,0,,False,,0,1,Abacus Insights,0,,F,Family,1.0,Plan B,Medical and Dental,01/01/2018,12/31/2018,01/01/2018,12/31/2018,,,Cigna,Member_Enrolment,2024-03-10
