In [0]:
%python
sheet_names = ["'Data Dictionary'", "'Crosswalks'", "'Enrollment Mock Data'", "'Demographics Mock Data'"]
file_location = "/FileStore/tables/EligibilityMock-1.xlsx"
 

dfs = {}

from pyspark.sql.utils import AnalysisException
for sheet_name in sheet_names:
    try:
        df = spark.read.format("com.crealytics.spark.excel") \
                   .option("inferschema", True) \
                   .option("header", True) \
                   .option("dataAddress", f"{sheet_name}!") \
                   .option("sheetName", sheet_name) \
                   .load(file_location)
        
        processed_sheet_name = sheet_name.lower().replace(" ", "_").replace("'", "")
    
        df.createOrReplaceTempView(processed_sheet_name)
        print(f"View created for sheet: {processed_sheet_name}")
        
        spark.sql(f"CREATE TABLE {processed_sheet_name}_table AS SELECT * FROM {processed_sheet_name}")
        print(f"Table created for view: {processed_sheet_name}_table")
    except AnalysisException as e:
        print(f"Error creating table for view {processed_sheet_name}: {str(e)}")

 

View created for sheet: data_dictionary
Error creating table for view data_dictionary: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View created for sheet: crosswalks
Error creating table for view crosswalks: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. Please use other characters and try again.
View created for sheet: enrollment_mock_data
Table created for view: enrollment_mock_data_table
View created for sheet: demographics_mock_data
Table created for view: demographics_mock_data_table


In [0]:
%python

spark.sql("""
    CREATE or REPLACE TABLE enrollment_mock_data_table 
    AS
    SELECT *
    FROM enrollment_mock_data
""")


Out[8]: DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

In [0]:
%python
spark.sql("""
    CREATE or REPLACE TABLE demographics_mock_data_table 
    AS
    SELECT *
    FROM demographics_mock_data
    """)

Out[9]: DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

In [0]:
%python
table_ranges = ["A2:C5", "A8:B25", "A28:B33", "A36:B43","A46:E62"]
sheet_name = "crosswalks"
for idx, table_range in enumerate(table_ranges, start=1):
    try:
        df = spark.read.format("com.crealytics.spark.excel") \
                   .option("inferschema", True) \
                   .option("header", True) \
                   .option("dataAddress", f"{sheet_name}!{table_range}") \
                   .option("sheetName", sheet_name) \
                   .load(file_location)
        processed_sheet_name = sheet_name.lower().replace(" ", "_").replace("'", "")
        table_name = f"{processed_sheet_name}_Table{idx}" 
        
        df.createOrReplaceTempView(table_name)
        print(f"Table created for range {table_range} as {table_name}")
    except AnalysisException as e:
        print(f"Error creating table for range {table_range}: {str(e)}")

Table created for range A2:C5 as crosswalks_Table1
Table created for range A8:B25 as crosswalks_Table2
Table created for range A28:B33 as crosswalks_Table3
Table created for range A36:B43 as crosswalks_Table4
Table created for range A46:E62 as crosswalks_Table5


In [0]:
%sql
select * from crosswalks_Table3;

GROUP_ID,GROUP_NAME
8,TechKraft Inc
1,Abacus Insights
4,Digital Convergence Technologies
3,Facebook
2,Google


In [0]:
%sql
select * from crosswalks_Table2;


Rollup_Code,Rollup_Description
E,Self
S,Spouse
F,Father
M,Mother
C1,Son
C2,Daughter
G1,Grand Father
G2,Grand Mother
G3,Grand Son
G4,Grand Daughter


In [0]:
%sql
select * from crosswalks_Table1;


Code,Rollup_Code,Rollup_Description
0.0,F,Female
1.0,M,Male
2.0,U,Unknown


In [0]:
%sql
select * from crosswalks_Table4;


Coverage_ID,Coverage_Description
E,Employee Only
ES,Employee and Spouse
F,Family
E1C,Employee and 1 Child
EC,Employee and Childrens
EP,Employee and Parents
U,Unknown


In [0]:
%sql
select * from crosswalks_Table5;


PLAN_ID,Plan Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE
0.0,Plan A,Medical,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
1.0,Plan B,Medical and Dental,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
2.0,Plan C,Medical and Vision,2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
3.0,Plan D,"Medical, Dental and Vision",2018-01-01T00:00:00.000+0000,2018-12-31T00:00:00.000+0000
4.0,Plan E,Medical,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
5.0,Plan F,Medical and Dental,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
6.0,Plan G,Medical and Vision,2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
7.0,Plan H,"Medical, Dental and Vision",2019-01-01T00:00:00.000+0000,2019-12-31T00:00:00.000+0000
8.0,Plan I,Medical,2020-01-01T00:00:00.000+0000,2020-12-31T00:00:00.000+0000
9.0,Plan J,Medical and Dental,2020-01-01T00:00:00.000+0000,2020-12-31T00:00:00.000+0000


In [0]:
%sql
CREATE TABLE IF NOT EXISTS TargetTable (
    Abacus_Record_ID VARCHAR(20),
    Abacus_Member_ID VARCHAR(20),
    Member_ID VARCHAR(20),
    Subscriber_ID VARCHAR(20),
    Member_First_Name VARCHAR(75),
    Member_Last_Name VARCHAR(75),
    Member_Middle_Name VARCHAR(75),
    Member_Prefix_Name VARCHAR(10),
    Member_Suffix_Name VARCHAR(10),
    Member_Gender VARCHAR(10),
    Member_Date_of_Birth DATE,
    Member_Relationship_Code VARCHAR(10),
    Member_Person_Code INTEGER,
    Member_Address_Line_1 VARCHAR(100),
    Member_Address_Line_2 VARCHAR(100),
    Member_City VARCHAR(20),
    Member_State VARCHAR(20),
    Member_County VARCHAR(50),
    Member_Postal_Code VARCHAR(10),
    Member_Country VARCHAR(20),
    Member_Home_Phone INTEGER,
    Member_Work_Phone INTEGER,
    Member_Mobile_Phone INTEGER,
    Member_Email VARCHAR(100),
    Member_Is_Deceased VARCHAR(10),
    Member_Date_of_Death DATE,
    Member_Deceased_Reason VARCHAR(100),
    Enrollment_Group_ID VARCHAR(20),
    Enrollment_Group_Name VARCHAR(50),
    Enrollment_SubGroup_ID VARCHAR(20),
    Enrollment_SubGroup_Name VARCHAR(50),
    Enrollment_Coverage_Code VARCHAR(10),
    Enrollment_Coverage_Description VARCHAR(30),
    Enrollment_Plan_ID VARCHAR(10),
    Enrollment_Plan_Name VARCHAR(30),
    Enrollment_Plan_Coverage VARCHAR(50),
    Enrollment_Medical_Effective_Date DATE,
    Enrollment_Medical_Termination_Date DATE,
    Enrollment_Dental_Effective_Date DATE,
    Enrollment_Dental_Termination_Date DATE,
    Enrollment_Vision_Effective_Date DATE,
    Enrollment_Vision_Termination_Date DATE,
    Enrollment_Vendor_Name VARCHAR(20),
    Souce_File_Name VARCHAR(100),
    File_Ingestion_Date DATE
);


In [0]:
%sql
INSERT INTO TargetTable

select
ROW_NUMBER() OVER (order by 1) as Abacus_Record_Id,
left(concat(E.member_id,'-',DATE_FORMAT(TO_DATE(D.dob), 'dyyyyM'),'-',SUBSTR(E.member_status, 1, 1),'-',E.member_id),20) as Abacus_Member_Id,
E.member_id as Member_Id,

E.member_status as Subscriber_id,
D.first_name as Member_First_Name,
D.last_name as Member_Last_Name, 
coalesce(D.middle_name,'None' ) as Member_Middle_Name,
'prefix' as Member_Prefix_Name,
'suffix' as Member_Suffix_Name,
Gen.Rollup_Description as Member_Gender,
date_format(D.dob,'MM/dd/yyyy') as Member_Date_of_Birth,
D.relationship as Member_Relationship_Code,
D.person_code as Member_Person_Code,
D.address_1 as Member_Address_Line_1,
D.address_2 as Member_Address_Line_2,
D.city as Member_City,
D.state as Member_State,
D.county as Member_County,
D.zip as Member_Postal_Code,
"U.S.A" as Member_Country,
000 Member_Home_Phone,
000 Member_Work_Phone,
000 Member_Mobile_Phone,
"member@gmail.com" as Member_Email,
'None' as  Member_Is_Deceased, 
null as Member_Date_of_Death,
'None' as Member_Deceased_Reason,
E.group_id as Enrollment_Group_ID,
Grp.group_name as Enrollment_Group_Name,
'None' as Enrollment_SubGroup_ID,
'None' Enrollment_SubGroup_Name,
E.coverage_type as Enrollment_Coverage_Code,
Cov.coverage_description as Enrollment_Coverage_Description,
E.plan_id as Enrollment_Plan_ID,
Pln.`plan name` as Enrollment_Plan_Name,
Pln.benefit_type as Enrollment_Plan_Coverage,

CASE 
WHEN Pln.benefit_type like '%Medical%' then date_format(Pln.effective_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Medical_Effective_Date,

CASE 
WHEN Pln.benefit_type like '%Medical%' then date_format(Pln.termination_date, 'MM/dd/yyyy')
ELSE NULL
END AS Enrollment_Medical_Termination_Date,

CASE 
WHEN Pln.benefit_type like '%Dental%' then date_format(Pln.effective_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Dental_Effective_Date,

CASE 
WHEN Pln.benefit_type like '%Dental%' then date_format(Pln.termination_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Dental_Termination_Date,

CASE 
WHEN Pln.benefit_type like '%Vision%' then date_format(Pln.effective_date, 'MM/dd/yyyy')
ELSE NULL
END AS Enrollment_Vision_Effective_Date,

CASE 
WHEN Pln.benefit_type like '%Vision%' then date_format(Pln.termination_date, 'MM/dd/yyyy')
ELSE NULL 
END AS Enrollment_Vision_Termination_Date,

E.vendor as Enrollment_Vendor_Name,
"Member Enrolment" as Source_File_Name,
current_timestamp() as File_Ingestion_Date

from demographics_mock_data_table D
left join enrollment_mock_data_table E
on E.MEMBER_ID = D.MEMBER_ID
join crosswalks_table1  Gen on Gen.code = D.gender
join crosswalks_table3 Grp on Grp.group_id = E.GROUP_ID
join crosswalks_table4 Cov on Cov.Coverage_ID = E.COVERAGE_TYPE
join crosswalks_table5 Pln on Pln.plan_id = E.PLAN_ID;

In [0]:
%sql
select * from TargetTable