In [0]:
%sql
-- dbfs:/FileStore/tables/coverage_ref.csv
-- dbfs:/FileStore/tables/demographic_data.csv
-- dbfs:/FileStore/tables/gender_ref.csv
-- dbfs:/FileStore/tables/group_ref.csv
-- dbfs:/FileStore/tables/relationship_reference.csv
-- dbfs:/FileStore/tables/plan.csv
-- dbfs:/FileStore/tables/enrollment_data.csv


In [0]:
from pyspark.sql import functions as F

### tables

#### enrollment_mock_data

In [0]:

enrollment_mock_data = spark.read.format("csv") \
                              .option("inferSchema", "true") \
                              .option("delimiter", "\t") \
                              .option("header", "true") \
                              .load("dbfs:/FileStore/tables/enrollment_data.csv")
display(enrollment_mock_data)
enrollment_mock_data.printSchema()

MEMBER_ID,MEMBER_STATUS,GROUP_ID,COVERAGE_TYPE,PLAN_ID,VENDOR,EFFECTIVE_DATE,TERMINATION_DATE
173400,ACTIVE,1,E1C,11,Cigna,2021-01-01,2021-02-28
83300,ACTIVE,1,ES,0,Cigna,2019-12-01,2020-01-31
106800,ACTIVE,1,U,4,Aetna,2020-09-01,2020-10-31
52900,ACTIVE,1,ES,15,Aetna,2021-02-01,2021-03-31
97600,ACTIVE,1,E,10,Aetna,2021-01-01,2021-01-31
22000,ACTIVE,1,EC,15,Cigna,2020-03-01,2020-04-30
22900,ACTIVE,1,E,1,Aetna,2020-07-01,2020-07-31
122500,ACTIVE,1,F,10,Cigna,2020-11-01,2020-12-31
21800,ACTIVE,8,E1C,4,Aetna,2020-12-01,2021-01-31
145000,ACTIVE,1,F,1,Cigna,2020-03-01,2020-04-30


root
 |-- MEMBER_ID: integer (nullable = true)
 |-- MEMBER_STATUS: string (nullable = true)
 |-- GROUP_ID: integer (nullable = true)
 |-- COVERAGE_TYPE: string (nullable = true)
 |-- PLAN_ID: integer (nullable = true)
 |-- VENDOR: string (nullable = true)
 |-- EFFECTIVE_DATE: date (nullable = true)
 |-- TERMINATION_DATE: date (nullable = true)



In [0]:
enrollment_mock_data = enrollment_mock_data.withColumnRenamed("EFFECTIVE_DATE","effective_date_member")
enrollment_mock_data = enrollment_mock_data.withColumnRenamed("TERMINATION_DATE","termination_date_member")

In [0]:
df = enrollment_mock_data.withColumn("test",F.when(enrollment_mock_data.termination_date_member <= enrollment_mock_data.effective_date_member,'yes').otherwise('no'))
display(df)

MEMBER_ID,MEMBER_STATUS,GROUP_ID,COVERAGE_TYPE,PLAN_ID,VENDOR,effective_date_member,termination_date_member,test
173400,ACTIVE,1,E1C,11,Cigna,2021-01-01,2021-02-28,no
83300,ACTIVE,1,ES,0,Cigna,2019-12-01,2020-01-31,no
106800,ACTIVE,1,U,4,Aetna,2020-09-01,2020-10-31,no
52900,ACTIVE,1,ES,15,Aetna,2021-02-01,2021-03-31,no
97600,ACTIVE,1,E,10,Aetna,2021-01-01,2021-01-31,no
22000,ACTIVE,1,EC,15,Cigna,2020-03-01,2020-04-30,no
22900,ACTIVE,1,E,1,Aetna,2020-07-01,2020-07-31,no
122500,ACTIVE,1,F,10,Cigna,2020-11-01,2020-12-31,no
21800,ACTIVE,8,E1C,4,Aetna,2020-12-01,2021-01-31,no
145000,ACTIVE,1,F,1,Cigna,2020-03-01,2020-04-30,no


#### coverage_crosswalk

In [0]:
coverage_crosswalk = spark.read.format("csv") \
                              .option("inferSchema", "true") \
                              .option("delimiter", "\t") \
                              .option("header", "true") \
                              .load("dbfs:/FileStore/tables/coverage_ref.csv")
display(coverage_crosswalk)
coverage_crosswalk.printSchema()

Coverage_ID,Coverage_Description
E,Employee Only
ES,Employee and Spouse
F,Family
E1C,Employee and 1 Child
EC,Employee and Childrens
EP,Employee and Parents
U,Unknown


root
 |-- Coverage_ID: string (nullable = true)
 |-- Coverage_Description: string (nullable = true)



In [0]:
coverage_crosswalk = coverage_crosswalk.withColumnRenamed("Coverage_ID","COVERAGE_TYPE")

#### gender_crosswalk

In [0]:
gender_crosswalk = spark.read.format("csv") \
                              .option("inferSchema", "true") \
                              .option("delimiter", "\t") \
                              .option("header", "true") \
                              .load("dbfs:/FileStore/tables/gender_ref.csv")
display(gender_crosswalk)
gender_crosswalk.printSchema()

Code,Rollup_Code,Rollup_Description
0,F,Female
1,M,Male
2,U,Unknown


root
 |-- Code: integer (nullable = true)
 |-- Rollup_Code: string (nullable = true)
 |-- Rollup_Description: string (nullable = true)



In [0]:
gender_crosswalk = gender_crosswalk.withColumnRenamed("Code","Gender_code")
gender_crosswalk = gender_crosswalk.withColumnRenamed("Rollup_Description","Gender_Description")

#### group_crosswalk

In [0]:
group_crosswalk = spark.read.format("csv") \
                              .option("inferSchema", "true") \
                              .option("delimiter", "\t") \
                              .option("header", "true") \
                              .load("dbfs:/FileStore/tables/group_ref.csv")
display(group_crosswalk)
group_crosswalk.printSchema()

GROUP_ID,GROUP_NAME
8,TechKraft Inc
1,Abacus Insights
4,Digital Convergence Technologies
3,Facebook
2,Google


root
 |-- GROUP_ID: integer (nullable = true)
 |-- GROUP_NAME: string (nullable = true)



#### relationship crosswalk

In [0]:
relationship_crosswalk = spark.read.format("csv") \
                              .option("inferSchema", "true") \
                              .option("delimiter", "\t") \
                              .option("header", "true") \
                              .load("dbfs:/FileStore/tables/relationship_reference.csv")
display(relationship_crosswalk)
relationship_crosswalk.printSchema()

Rollup_Code,Rollup_Description
E,Self
S,Spouse
F,Father
M,Mother
C1,Son
C2,Daughter
G1,Grand Father
G2,Grand Mother
G3,Grand Son
G4,Grand Daughter


root
 |-- Rollup_Code: string (nullable = true)
 |-- Rollup_Description: string (nullable = true)



In [0]:
relationship_crosswalk = relationship_crosswalk.withColumnRenamed("Rollup_code","Relationship_Code")
relationship_crosswalk = relationship_crosswalk.withColumnRenamed("Rollup_Description","Relationship_with_Suscriber")

In [0]:
df = relationship_crosswalk.select("Relationship_with_Suscriber").distinct()
display(df)

Relationship_with_Suscriber
Fiance
Grand Father
Spouse
Father
Self
Son
Unknown
Adapted Daughter
Grand Mother
Adopted Son


#### plan_crosswalk


In [0]:
plan_crosswalk = spark.read.format("csv") \
                              .option("inferSchema", "true") \
                              .option("delimiter", "\t") \
                              .option("header", "true") \
                              .load("dbfs:/FileStore/tables/plan.csv")
display(plan_crosswalk)
plan_crosswalk.printSchema()

PLAN_ID,Plan Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE
0,Plan A,Medical,2018-01-01,2018-12-31
1,Plan B,Medical and Dental,2018-01-01,2018-12-31
2,Plan C,Medical and Vision,2018-01-01,2018-12-31
3,Plan D,"Medical, Dental and Vision",2018-01-01,2018-12-31
4,Plan E,Medical,2019-01-01,2019-12-31
5,Plan F,Medical and Dental,2019-01-01,2019-12-31
6,Plan G,Medical and Vision,2019-01-01,2019-12-31
7,Plan H,"Medical, Dental and Vision",2019-01-01,2019-12-31
8,Plan I,Medical,2020-01-01,2020-12-31
9,Plan J,Medical and Dental,2020-01-01,2020-12-31


root
 |-- PLAN_ID: integer (nullable = true)
 |-- Plan Name: string (nullable = true)
 |-- BENEFIT_TYPE: string (nullable = true)
 |-- EFFECTIVE_DATE: date (nullable = true)
 |-- TERMINATION_DATE: date (nullable = true)



In [0]:
plan_crosswalk = plan_crosswalk.withColumnRenamed("Plan Name","Plan_Name")

In [0]:
display(plan_crosswalk)

PLAN_ID,Plan_Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE
0,Plan A,Medical,2018-01-01,2018-12-31
1,Plan B,Medical and Dental,2018-01-01,2018-12-31
2,Plan C,Medical and Vision,2018-01-01,2018-12-31
3,Plan D,"Medical, Dental and Vision",2018-01-01,2018-12-31
4,Plan E,Medical,2019-01-01,2019-12-31
5,Plan F,Medical and Dental,2019-01-01,2019-12-31
6,Plan G,Medical and Vision,2019-01-01,2019-12-31
7,Plan H,"Medical, Dental and Vision",2019-01-01,2019-12-31
8,Plan I,Medical,2020-01-01,2020-12-31
9,Plan J,Medical and Dental,2020-01-01,2020-12-31


In [0]:
df = plan_crosswalk.select("BENEFIT_TYPE").distinct()
display(df)

BENEFIT_TYPE
"Medical, Dental and Vision"
Medical and Vision
Medical
Medical and Dental


In [0]:
df = plan_crosswalk.withColumn("test",F.when(plan_crosswalk.TERMINATION_DATE <= plan_crosswalk.EFFECTIVE_DATE,'yes').otherwise('no'))
display(df)

PLAN_ID,Plan_Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE,test
0,Plan A,Medical,2018-01-01,2018-12-31,no
1,Plan B,Medical and Dental,2018-01-01,2018-12-31,no
2,Plan C,Medical and Vision,2018-01-01,2018-12-31,no
3,Plan D,"Medical, Dental and Vision",2018-01-01,2018-12-31,no
4,Plan E,Medical,2019-01-01,2019-12-31,no
5,Plan F,Medical and Dental,2019-01-01,2019-12-31,no
6,Plan G,Medical and Vision,2019-01-01,2019-12-31,no
7,Plan H,"Medical, Dental and Vision",2019-01-01,2019-12-31,no
8,Plan I,Medical,2020-01-01,2020-12-31,no
9,Plan J,Medical and Dental,2020-01-01,2020-12-31,no


#### demographic_mock_data

In [0]:
demographic_mock_data = spark.read.format("csv") \
                              .option("inferSchema", "true") \
                              .option("delimiter", ",") \
                              .option("header", "true") \
                              .load("dbfs:/FileStore/tables/demographic_data.csv")
display(demographic_mock_data)
demographic_mock_data.printSchema()

MEMBER_ID,EMPLOYEE_ID,FIRST_NAME,LAST_NAME,MIDDLE_NAME,GENDER,DOB,RELATIONSHIP,PERSON_CODE,RACE,ETHNICITY,ADDRESS_1,ADDRESS_2,CITY,STATE,COUNTY,ZIP
173400,160951,Freya,Lynn,Marlys,0,1988-02-23,S,2,white,nonhispanic,771 Kirlin Haven,,Attleboro,Massachusetts,Bristol County,2703.0
83300,160951,Candice,Padilla,,0,1964-12-10,F,3,white,nonhispanic,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,
106800,111325,Leanne,Swanson,,0,2015-05-28,D2,16,white,nonhispanic,686 Shields Dam,,Easthampton,Massachusetts,Hampshire County,
52900,114177,Millicent,Byrne,,0,1948-04-22,N,13,white,nonhispanic,1009 DuBuque Ville Unit 54,,Springfield,Massachusetts,Hampden County,1106.0
97600,117067,Charley,Lynn,,0,1976-01-12,A1,11,white,nonhispanic,114 Cummerata Parade,,West Tisbury,Massachusetts,Dukes County,
22000,12745,Flynn,Morrison,,0,1989-03-30,M,4,white,nonhispanic,164 Stokes Lodge Apt 34,,Winchendon,Massachusetts,Worcester County,1475.0
22900,13466,Pauline,Knight,,0,2019-05-09,N,13,white,nonhispanic,376 Skiles Forge Suite 48,,Westminster,Massachusetts,Worcester County,
122500,135688,Elspeth,Gonzalez,Marliss,0,2014-09-09,C2,6,asian,hispanic,570 Nikolaus Wynd,,Cambridge,Massachusetts,Middlesex County,2139.0
21800,13587,Amelie,Harmon,,0,1984-04-08,D2,16,other,nonhispanic,958 Robel Run Unit 83,,Carver,Massachusetts,Plymouth County,
145000,138507,Laura,Montoya,,0,2010-03-29,F,3,white,nonhispanic,1094 Keebler Grove,,Gardner,Massachusetts,Worcester County,1440.0


root
 |-- MEMBER_ID: integer (nullable = true)
 |-- EMPLOYEE_ID: integer (nullable = true)
 |-- FIRST_NAME: string (nullable = true)
 |-- LAST_NAME: string (nullable = true)
 |-- MIDDLE_NAME: string (nullable = true)
 |-- GENDER: integer (nullable = true)
 |-- DOB: date (nullable = true)
 |-- RELATIONSHIP: string (nullable = true)
 |-- PERSON_CODE: integer (nullable = true)
 |-- RACE: string (nullable = true)
 |-- ETHNICITY: string (nullable = true)
 |-- ADDRESS_1: string (nullable = true)
 |-- ADDRESS_2: string (nullable = true)
 |-- CITY: string (nullable = true)
 |-- STATE: string (nullable = true)
 |-- COUNTY: string (nullable = true)
 |-- ZIP: integer (nullable = true)



In [0]:
demographic_mock_data = demographic_mock_data.withColumnRenamed("GENDER","Gender_code")
demographic_mock_data = demographic_mock_data.withColumnRenamed("RELATIONSHIP","Relationship_Code")

In [0]:
df = demographic_mock_data.withColumn("test",F.when(demographic_mock_data.FIRST_NAME == demographic_mock_data.MIDDLE_NAME,'yes').otherwise('no'))
display(df)

MEMBER_ID,EMPLOYEE_ID,FIRST_NAME,LAST_NAME,MIDDLE_NAME,Gender_code,DOB,Relationship_Code,PERSON_CODE,RACE,ETHNICITY,ADDRESS_1,ADDRESS_2,CITY,STATE,COUNTY,ZIP,test
173400,160951,Freya,Lynn,Marlys,0,1988-02-23,S,2,white,nonhispanic,771 Kirlin Haven,,Attleboro,Massachusetts,Bristol County,2703.0,no
83300,160951,Candice,Padilla,,0,1964-12-10,F,3,white,nonhispanic,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,no
106800,111325,Leanne,Swanson,,0,2015-05-28,D2,16,white,nonhispanic,686 Shields Dam,,Easthampton,Massachusetts,Hampshire County,,no
52900,114177,Millicent,Byrne,,0,1948-04-22,N,13,white,nonhispanic,1009 DuBuque Ville Unit 54,,Springfield,Massachusetts,Hampden County,1106.0,no
97600,117067,Charley,Lynn,,0,1976-01-12,A1,11,white,nonhispanic,114 Cummerata Parade,,West Tisbury,Massachusetts,Dukes County,,no
22000,12745,Flynn,Morrison,,0,1989-03-30,M,4,white,nonhispanic,164 Stokes Lodge Apt 34,,Winchendon,Massachusetts,Worcester County,1475.0,no
22900,13466,Pauline,Knight,,0,2019-05-09,N,13,white,nonhispanic,376 Skiles Forge Suite 48,,Westminster,Massachusetts,Worcester County,,no
122500,135688,Elspeth,Gonzalez,Marliss,0,2014-09-09,C2,6,asian,hispanic,570 Nikolaus Wynd,,Cambridge,Massachusetts,Middlesex County,2139.0,no
21800,13587,Amelie,Harmon,,0,1984-04-08,D2,16,other,nonhispanic,958 Robel Run Unit 83,,Carver,Massachusetts,Plymouth County,,no
145000,138507,Laura,Montoya,,0,2010-03-29,F,3,white,nonhispanic,1094 Keebler Grove,,Gardner,Massachusetts,Worcester County,1440.0,no


### table1

#### demographic_mock_data join gender_crosswalk

In [0]:
# while joining both the names should either be alias or the original name
table1 = demographic_mock_data.alias("demography").join(gender_crosswalk.alias("gender"),
        on = "Gender_code",
        how = 'left')
display(table1)
table1.printSchema()

Gender_code,MEMBER_ID,EMPLOYEE_ID,FIRST_NAME,LAST_NAME,MIDDLE_NAME,DOB,Relationship_Code,PERSON_CODE,RACE,ETHNICITY,ADDRESS_1,ADDRESS_2,CITY,STATE,COUNTY,ZIP,Rollup_Code,Gender_Description
0,173400,160951,Freya,Lynn,Marlys,1988-02-23,S,2,white,nonhispanic,771 Kirlin Haven,,Attleboro,Massachusetts,Bristol County,2703.0,F,Female
0,83300,160951,Candice,Padilla,,1964-12-10,F,3,white,nonhispanic,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,F,Female
0,106800,111325,Leanne,Swanson,,2015-05-28,D2,16,white,nonhispanic,686 Shields Dam,,Easthampton,Massachusetts,Hampshire County,,F,Female
0,52900,114177,Millicent,Byrne,,1948-04-22,N,13,white,nonhispanic,1009 DuBuque Ville Unit 54,,Springfield,Massachusetts,Hampden County,1106.0,F,Female
0,97600,117067,Charley,Lynn,,1976-01-12,A1,11,white,nonhispanic,114 Cummerata Parade,,West Tisbury,Massachusetts,Dukes County,,F,Female
0,22000,12745,Flynn,Morrison,,1989-03-30,M,4,white,nonhispanic,164 Stokes Lodge Apt 34,,Winchendon,Massachusetts,Worcester County,1475.0,F,Female
0,22900,13466,Pauline,Knight,,2019-05-09,N,13,white,nonhispanic,376 Skiles Forge Suite 48,,Westminster,Massachusetts,Worcester County,,F,Female
0,122500,135688,Elspeth,Gonzalez,Marliss,2014-09-09,C2,6,asian,hispanic,570 Nikolaus Wynd,,Cambridge,Massachusetts,Middlesex County,2139.0,F,Female
0,21800,13587,Amelie,Harmon,,1984-04-08,D2,16,other,nonhispanic,958 Robel Run Unit 83,,Carver,Massachusetts,Plymouth County,,F,Female
0,145000,138507,Laura,Montoya,,2010-03-29,F,3,white,nonhispanic,1094 Keebler Grove,,Gardner,Massachusetts,Worcester County,1440.0,F,Female


root
 |-- Gender_code: integer (nullable = true)
 |-- MEMBER_ID: integer (nullable = true)
 |-- EMPLOYEE_ID: integer (nullable = true)
 |-- FIRST_NAME: string (nullable = true)
 |-- LAST_NAME: string (nullable = true)
 |-- MIDDLE_NAME: string (nullable = true)
 |-- DOB: date (nullable = true)
 |-- Relationship_Code: string (nullable = true)
 |-- PERSON_CODE: integer (nullable = true)
 |-- RACE: string (nullable = true)
 |-- ETHNICITY: string (nullable = true)
 |-- ADDRESS_1: string (nullable = true)
 |-- ADDRESS_2: string (nullable = true)
 |-- CITY: string (nullable = true)
 |-- STATE: string (nullable = true)
 |-- COUNTY: string (nullable = true)
 |-- ZIP: integer (nullable = true)
 |-- Rollup_Code: string (nullable = true)
 |-- Gender_Description: string (nullable = true)



#### demographic_mock_data join relationship_crosswalk

In [0]:
table1 = table1.join(relationship_crosswalk.alias("relationship"),
        on = "Relationship_Code",
        how = 'left')
# display(table1)

In [0]:
display(table1)

Relationship_Code,Gender_code,MEMBER_ID,EMPLOYEE_ID,FIRST_NAME,LAST_NAME,MIDDLE_NAME,DOB,PERSON_CODE,RACE,ETHNICITY,ADDRESS_1,ADDRESS_2,CITY,STATE,COUNTY,ZIP,Rollup_Code,Gender_Description,Relationship_with_Suscriber
S,0,173400,160951,Freya,Lynn,Marlys,1988-02-23,2,white,nonhispanic,771 Kirlin Haven,,Attleboro,Massachusetts,Bristol County,2703.0,F,Female,Spouse
F,0,83300,160951,Candice,Padilla,,1964-12-10,3,white,nonhispanic,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,F,Female,Fiance
F,0,83300,160951,Candice,Padilla,,1964-12-10,3,white,nonhispanic,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,F,Female,Father
D2,0,106800,111325,Leanne,Swanson,,2015-05-28,16,white,nonhispanic,686 Shields Dam,,Easthampton,Massachusetts,Hampshire County,,F,Female,Divorced Wife
N,0,52900,114177,Millicent,Byrne,,1948-04-22,13,white,nonhispanic,1009 DuBuque Ville Unit 54,,Springfield,Massachusetts,Hampden County,1106.0,F,Female,Niece
A1,0,97600,117067,Charley,Lynn,,1976-01-12,11,white,nonhispanic,114 Cummerata Parade,,West Tisbury,Massachusetts,Dukes County,,F,Female,Adopted Son
M,0,22000,12745,Flynn,Morrison,,1989-03-30,4,white,nonhispanic,164 Stokes Lodge Apt 34,,Winchendon,Massachusetts,Worcester County,1475.0,F,Female,Mother
N,0,22900,13466,Pauline,Knight,,2019-05-09,13,white,nonhispanic,376 Skiles Forge Suite 48,,Westminster,Massachusetts,Worcester County,,F,Female,Niece
C2,0,122500,135688,Elspeth,Gonzalez,Marliss,2014-09-09,6,asian,hispanic,570 Nikolaus Wynd,,Cambridge,Massachusetts,Middlesex County,2139.0,F,Female,Daughter
D2,0,21800,13587,Amelie,Harmon,,1984-04-08,16,other,nonhispanic,958 Robel Run Unit 83,,Carver,Massachusetts,Plymouth County,,F,Female,Divorced Wife


### table2

#### enrollment_mock_data join group_crosswalk


In [0]:
table2 = enrollment_mock_data.join(group_crosswalk,on="GROUP_ID",how="left")

#### enrollment_mock_data join coverage_crosswalk

In [0]:
table2 = table2.join(coverage_crosswalk,on="COVERAGE_TYPE",how='left')

#### enrollment_mock_data join plan_crosswalk

In [0]:
table2 = table2.join(plan_crosswalk,on="PLAN_ID",how='left')

In [0]:
display(table2)

PLAN_ID,COVERAGE_TYPE,GROUP_ID,MEMBER_ID,MEMBER_STATUS,VENDOR,effective_date_member,termination_date_member,GROUP_NAME,Coverage_Description,Plan_Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE
11,E1C,1,173400,ACTIVE,Cigna,2021-01-01,2021-02-28,Abacus Insights,Employee and 1 Child,Plan L,"Medical, Dental and Vision",2020-01-01,2020-12-31
0,ES,1,83300,ACTIVE,Cigna,2019-12-01,2020-01-31,Abacus Insights,Employee and Spouse,Plan A,Medical,2018-01-01,2018-12-31
4,U,1,106800,ACTIVE,Aetna,2020-09-01,2020-10-31,Abacus Insights,Unknown,Plan E,Medical,2019-01-01,2019-12-31
15,ES,1,52900,ACTIVE,Aetna,2021-02-01,2021-03-31,Abacus Insights,Employee and Spouse,Plan P,"Medical, Dental and Vision",2021-01-01,2021-12-31
10,E,1,97600,ACTIVE,Aetna,2021-01-01,2021-01-31,Abacus Insights,Employee Only,Plan K,Medical and Vision,2020-01-01,2020-12-31
15,EC,1,22000,ACTIVE,Cigna,2020-03-01,2020-04-30,Abacus Insights,Employee and Childrens,Plan P,"Medical, Dental and Vision",2021-01-01,2021-12-31
1,E,1,22900,ACTIVE,Aetna,2020-07-01,2020-07-31,Abacus Insights,Employee Only,Plan B,Medical and Dental,2018-01-01,2018-12-31
10,F,1,122500,ACTIVE,Cigna,2020-11-01,2020-12-31,Abacus Insights,Family,Plan K,Medical and Vision,2020-01-01,2020-12-31
4,E1C,8,21800,ACTIVE,Aetna,2020-12-01,2021-01-31,TechKraft Inc,Employee and 1 Child,Plan E,Medical,2019-01-01,2019-12-31
1,F,1,145000,ACTIVE,Cigna,2020-03-01,2020-04-30,Abacus Insights,Family,Plan B,Medical and Dental,2018-01-01,2018-12-31


### final table

In [0]:
final_table = table1.join(table2,on="MEMBER_ID",how="left")

In [0]:
display(final_table)
final_table.printSchema()
# count(final_table.columns())

MEMBER_ID,Relationship_Code,Gender_code,EMPLOYEE_ID,FIRST_NAME,LAST_NAME,MIDDLE_NAME,DOB,PERSON_CODE,RACE,ETHNICITY,ADDRESS_1,ADDRESS_2,CITY,STATE,COUNTY,ZIP,Rollup_Code,Gender_Description,Relationship_with_Suscriber,PLAN_ID,COVERAGE_TYPE,GROUP_ID,MEMBER_STATUS,VENDOR,effective_date_member,termination_date_member,GROUP_NAME,Coverage_Description,Plan_Name,BENEFIT_TYPE,EFFECTIVE_DATE,TERMINATION_DATE
173400,S,0,160951,Freya,Lynn,Marlys,1988-02-23,2,white,nonhispanic,771 Kirlin Haven,,Attleboro,Massachusetts,Bristol County,2703.0,F,Female,Spouse,11,E1C,1,ACTIVE,Cigna,2021-01-01,2021-02-28,Abacus Insights,Employee and 1 Child,Plan L,"Medical, Dental and Vision",2020-01-01,2020-12-31
83300,F,0,160951,Candice,Padilla,,1964-12-10,3,white,nonhispanic,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,F,Female,Fiance,0,ES,1,ACTIVE,Cigna,2019-12-01,2020-01-31,Abacus Insights,Employee and Spouse,Plan A,Medical,2018-01-01,2018-12-31
83300,F,0,160951,Candice,Padilla,,1964-12-10,3,white,nonhispanic,182 Trantow Mission Unit 20,,Bellingham,Massachusetts,Norfolk County,,F,Female,Father,0,ES,1,ACTIVE,Cigna,2019-12-01,2020-01-31,Abacus Insights,Employee and Spouse,Plan A,Medical,2018-01-01,2018-12-31
106800,D2,0,111325,Leanne,Swanson,,2015-05-28,16,white,nonhispanic,686 Shields Dam,,Easthampton,Massachusetts,Hampshire County,,F,Female,Divorced Wife,4,U,1,ACTIVE,Aetna,2020-09-01,2020-10-31,Abacus Insights,Unknown,Plan E,Medical,2019-01-01,2019-12-31
52900,N,0,114177,Millicent,Byrne,,1948-04-22,13,white,nonhispanic,1009 DuBuque Ville Unit 54,,Springfield,Massachusetts,Hampden County,1106.0,F,Female,Niece,15,ES,1,ACTIVE,Aetna,2021-02-01,2021-03-31,Abacus Insights,Employee and Spouse,Plan P,"Medical, Dental and Vision",2021-01-01,2021-12-31
97600,A1,0,117067,Charley,Lynn,,1976-01-12,11,white,nonhispanic,114 Cummerata Parade,,West Tisbury,Massachusetts,Dukes County,,F,Female,Adopted Son,10,E,1,ACTIVE,Aetna,2021-01-01,2021-01-31,Abacus Insights,Employee Only,Plan K,Medical and Vision,2020-01-01,2020-12-31
22000,M,0,12745,Flynn,Morrison,,1989-03-30,4,white,nonhispanic,164 Stokes Lodge Apt 34,,Winchendon,Massachusetts,Worcester County,1475.0,F,Female,Mother,15,EC,1,ACTIVE,Cigna,2020-03-01,2020-04-30,Abacus Insights,Employee and Childrens,Plan P,"Medical, Dental and Vision",2021-01-01,2021-12-31
22900,N,0,13466,Pauline,Knight,,2019-05-09,13,white,nonhispanic,376 Skiles Forge Suite 48,,Westminster,Massachusetts,Worcester County,,F,Female,Niece,1,E,1,ACTIVE,Aetna,2020-07-01,2020-07-31,Abacus Insights,Employee Only,Plan B,Medical and Dental,2018-01-01,2018-12-31
122500,C2,0,135688,Elspeth,Gonzalez,Marliss,2014-09-09,6,asian,hispanic,570 Nikolaus Wynd,,Cambridge,Massachusetts,Middlesex County,2139.0,F,Female,Daughter,10,F,1,ACTIVE,Cigna,2020-11-01,2020-12-31,Abacus Insights,Family,Plan K,Medical and Vision,2020-01-01,2020-12-31
21800,D2,0,13587,Amelie,Harmon,,1984-04-08,16,other,nonhispanic,958 Robel Run Unit 83,,Carver,Massachusetts,Plymouth County,,F,Female,Divorced Wife,4,E1C,8,ACTIVE,Aetna,2020-12-01,2021-01-31,TechKraft Inc,Employee and 1 Child,Plan E,Medical,2019-01-01,2019-12-31


root
 |-- MEMBER_ID: integer (nullable = true)
 |-- Relationship_Code: string (nullable = true)
 |-- Gender_code: integer (nullable = true)
 |-- EMPLOYEE_ID: integer (nullable = true)
 |-- FIRST_NAME: string (nullable = true)
 |-- LAST_NAME: string (nullable = true)
 |-- MIDDLE_NAME: string (nullable = true)
 |-- DOB: date (nullable = true)
 |-- PERSON_CODE: integer (nullable = true)
 |-- RACE: string (nullable = true)
 |-- ETHNICITY: string (nullable = true)
 |-- ADDRESS_1: string (nullable = true)
 |-- ADDRESS_2: string (nullable = true)
 |-- CITY: string (nullable = true)
 |-- STATE: string (nullable = true)
 |-- COUNTY: string (nullable = true)
 |-- ZIP: integer (nullable = true)
 |-- Rollup_Code: string (nullable = true)
 |-- Gender_Description: string (nullable = true)
 |-- Relationship_with_Suscriber: string (nullable = true)
 |-- PLAN_ID: integer (nullable = true)
 |-- COVERAGE_TYPE: string (nullable = true)
 |-- GROUP_ID: integer (nullable = true)
 |-- MEMBER_STATUS: string (n

In [0]:
final_table.createOrReplaceTempView("finalTable")

### targetTable

In [0]:
%sql
CREATE OR REPLACE TABLE targetTable AS(
SELECT
 row_number() OVER (ORDER BY (SELECT NULL)) AS Abacus_Record_ID,
 CONCAT(substring(MEMBER_ID,0,3),ROW_NUMBER() OVER (PARTITION BY MEMBER_ID ORDER BY (SELECT NULL)),Gender_code,substring(DOB,9,2)) AS Abacus_Member_Id,
 MEMBER_ID AS Member_id,
 EMPLOYEE_ID AS Subscriber_id,
 FIRST_NAME AS Member_First_Name,
 LAST_NAME AS Member_Last_Name,
 MIDDLE_NAME AS Member_Middle_Name,
 CASE
    WHEN Gender_Description = 'Male' THEN 'Mr.'
    WHEN Gender_Description = 'Female' AND Relationship_with_Suscriber in ('Fiance','Spouse','Grand Mother','Mother') THEN 'Mrs.'
    WHEN Gender_Description = 'Female' AND Relationship_with_Suscriber in ('Adapted Daughter','Niece','Daughter','Divorced Wife','Grand Daughter','Self') THEN 'Ms.'
    WHEN Gender_Description = 'Female' AND Relationship_with_Suscriber LIKE 'Unknown' THEN 'Ms.'
    WHEN Gender_Description = 'Male' AND Relationship_with_Suscriber LIKE 'Unknown' THEN 'Mr.'
    ELSE null
  END AS Member_Prefix_Name,
  -- self,adapted daughter,niece,Daughter,unknown
 CASE
    WHEN FIRST_NAME = MIDDLE_NAME AND Relationship_with_Suscriber = 'Son' THEN 'I'
    WHEN FIRST_NAME = MIDDLE_NAME AND Relationship_with_Suscriber = 'Grand Son' THEN 'II'
    ELSE NULL
  END AS Member_Suffix_Name,
 Gender_Description AS Member_Gender,
 date_format(DOB,'MM-dd-yyyy') AS Member_Date_of_Birth,
 Relationship_Code AS Member_Relationship_Code,
 Relationship_with_Suscriber AS Member_Relationship_Description,
 PERSON_CODE AS Member_Person_Code,
 ADDRESS_1 AS Member_Address_Line_1,
 ADDRESS_2 AS Member_Address_Line_2,
 CITY AS Member_City,
 STATE AS Member_State,
 COUNTY AS Member_County,
 ZIP AS Member_Postal_Code,
 'USA' AS Member_Country,
 'null' AS Member_Home_Phone,
 'null' AS Member_Work_Phone,
 'null' AS Member_Mobile_Phone,
 'null' AS Member_Email,
 'null' AS Member_Is_Deceased,
 'null' AS Member_Date_of_Death,
 'null' AS Member_Deceased_Reason,
 GROUP_ID AS Enrollment_Group_ID,
 GROUP_NAME AS Enrollment_Group_Name,
 'null' AS Enrollment_SubGroup_ID,
 'null' AS Enrollment_SubGroup_Name,
 COVERAGE_TYPE AS Enrollment_Coverage_Code,
 Coverage_Description AS Enrollment_Coverage_Description,
 VENDOR AS Enrollment_Plan_ID,
 Plan_Name AS Enrollment_Plan_Name,
 BENEFIT_TYPE AS Enrollment_Plan_Coverage,
 CASE
    WHEN BENEFIT_TYPE IN ('Medical','Medical and Vision','Medical, Dental and Vision','Medical and Dental') THEN date_format(effective_date_member,'MM-dd-yyyy')
    ELSE NULL
 END AS Enrollment_Medical_Effective_Date,
 CASE
   WHEN BENEFIT_TYPE IN ('Medical','Medical and Vision','Medical, Dental and Vision','Medical and Dental') THEN date_format(termination_date_member,'MM-dd-yyyy')
   ELSE NULL
 END AS Enrollment_Medical_Termination_Date,
 CASE
   WHEN BENEFIT_TYPE IN ('Medical and Dental','Medical, Dental and Vision') THEN date_format(effective_date_member,'MM-dd-yyyy')
   ELSE NULL
 END AS Enrollment_Dental_Effective_Date,
 CASE
   WHEN BENEFIT_TYPE IN ('Medical and Dental','Medical, Dental and Vision') THEN date_format(termination_date_member,'MM-dd-yyyy')
   ELSE NULL
 END AS Enrollment_Dental_Termination_Date,
 CASE
   WHEN BENEFIT_TYPE IN ('Medical and Vision','Medical, Dental and Vision') THEN date_format(effective_date_member,'MM-dd-yyyy')
   ELSE NULL
 END AS Enrollment_Vision_Effective_Date,
 CASE
   WHEN BENEFIT_TYPE IN ('Medical and Vision','Medical, Dental and Vision') THEN date_format(termination_date_member,'MM-dd-yyyy')
   ELSE NULL
 END AS Enrollment_Vision_Termination_Date,
 'DCT' AS Enrollment_Vendor_Name,
 'Enrollment_data & Demographic_data' AS Source_File_Name,
  current_timestamp() AS File_Ingestion_Date
FROM finalTable
)

num_affected_rows,num_inserted_rows


In [0]:
%sql
SELECT * FROM targetTable

Abacus_Record_ID,Abacus_Member_Id,Member_id,Subscriber_id,Member_First_Name,Member_Last_Name,Member_Middle_Name,Member_Prefix_Name,Member_Suffix_Name,Member_Gender,Member_Date_of_Birth,Member_Relationship_Code,Member_Relationship_Description,Member_Person_Code,Member_Address_Line_1,Member_Address_Line_2,Member_City,Member_State,Member_County,Member_Postal_Code,Member_Country,Member_Home_Phone,Member_Work_Phone,Member_Mobile_Phone,Member_Email,Member_Is_Deceased,Member_Date_of_Death,Member_Deceased_Reason,Enrollment_Group_ID,Enrollment_Group_Name,Enrollment_SubGroup_ID,Enrollment_SubGroup_Name,Enrollment_Coverage_Code,Enrollment_Coverage_Description,Enrollment_Plan_ID,Enrollment_Plan_Name,Enrollment_Plan_Coverage,Enrollment_Medical_Effective_Date,Enrollment_Medical_Termination_Date,Enrollment_Dental_Effective_Date,Enrollment_Dental_Termination_Date,Enrollment_Vision_Effective_Date,Enrollment_Vision_Termination_Date,Enrollment_Vendor_Name,Source_File_Name,File_Ingestion_Date
1,1901123,1900,38256,Ameer,Mcmahon,,Mr.,,Male,04-23-2011,F,Fiance,3,1032 Koch Gardens,,Sharon,Massachusetts,Norfolk County,,USA,,,,,,,,1,Abacus Insights,,,F,Family,Aetna,Plan D,"Medical, Dental and Vision",07-01-2020,08-31-2020,07-01-2020,08-31-2020,07-01-2020,08-31-2020,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
2,1902123,1900,38256,Ameer,Mcmahon,,Mr.,,Male,04-23-2011,F,Father,3,1032 Koch Gardens,,Sharon,Massachusetts,Norfolk County,,USA,,,,,,,,1,Abacus Insights,,,F,Family,Aetna,Plan D,"Medical, Dental and Vision",07-01-2020,08-31-2020,07-01-2020,08-31-2020,07-01-2020,08-31-2020,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
3,2701129,2700,38256,Keeley,Preston,,Mr.,,Male,03-29-1956,D2,Divorced Wife,16,333 Hilpert Route,,Ware,Massachusetts,Hampshire County,,USA,,,,,,,,8,TechKraft Inc,,,EC,Employee and Childrens,Cigna,Plan N,Medical and Dental,04-01-2021,05-31-2021,04-01-2021,05-31-2021,,,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
4,2801125,2800,95182,Ted,Summers,,Mr.,,Male,08-25-1990,D2,Divorced Wife,16,410 Homenick Lane,,Duxbury,Massachusetts,Plymouth County,2332.0,USA,,,,,,,,1,Abacus Insights,,,ES,Employee and Spouse,Aetna,Plan D,"Medical, Dental and Vision",04-01-2020,05-31-2020,04-01-2020,05-31-2020,04-01-2020,05-31-2020,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
5,1401014,14000,44147,Demi-Leigh,Sanders,Myrtle,,,Female,01-14-2021,C1,Son,5,746 Weissnat Landing,,Boston,Massachusetts,Suffolk County,2203.0,USA,,,,,,,,1,Abacus Insights,,,F,Family,Aetna,Plan G,Medical and Vision,11-01-2020,12-31-2020,,,11-01-2020,12-31-2020,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
6,1721021,17200,39986,Georgiana,Livingston,,Ms.,,Female,09-21-2003,G4,Grand Daughter,10,814 Blick Avenue Unit 22,,Wayland,Massachusetts,Middlesex County,,USA,,,,,,,,4,Digital Convergence Technologies,,,EC,Employee and Childrens,Cigna,Plan A,Medical,05-01-2020,06-30-2020,,,,,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
7,1821101,18200,38256,Aarav,Kent,,Mr.,,Male,01-01-1943,M,Mother,4,718 Hamill Club,,Easthampton,Massachusetts,Hampshire County,,USA,,,,,,,,3,Facebook,,,ES,Employee and Spouse,Aetna,Plan E,Medical,10-01-2020,11-30-2020,,,,,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
8,1822101,18200,38256,Aarav,Kent,,Mr.,,Male,01-01-1943,M,Mother,4,718 Hamill Club,,Easthampton,Massachusetts,Hampshire County,,USA,,,,,,,,3,Facebook,,,F,Family,Cigna,Plan L,"Medical, Dental and Vision",07-01-2020,08-31-2020,07-01-2020,08-31-2020,07-01-2020,08-31-2020,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
9,1823101,18200,38256,Aarav,Kent,,Mr.,,Male,01-01-1943,M,Mother,4,718 Hamill Club,,Easthampton,Massachusetts,Hampshire County,,USA,,,,,,,,3,Facebook,,,ES,Employee and Spouse,Aetna,Plan O,Medical and Vision,11-01-2020,12-31-2020,,,11-01-2020,12-31-2020,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000
10,1824101,18200,38256,Steffan,Figueroa,Kippar,Mr.,,Male,01-01-1943,E,Self,1,718 Hamill Club,,Easthampton,Massachusetts,Hampshire County,,USA,,,,,,,,3,Facebook,,,ES,Employee and Spouse,Aetna,Plan E,Medical,10-01-2020,11-30-2020,,,,,DCT,Enrollment_data & Demographic_data,2024-03-08T07:51:35.076+0000


In [0]:
%sql
DESCRIBE targetTable

col_name,data_type,comment
Abacus_Record_ID,int,
Abacus_Member_Id,string,
Member_id,int,
Subscriber_id,int,
Member_First_Name,string,
Member_Last_Name,string,
Member_Middle_Name,string,
Member_Prefix_Name,string,
Member_Suffix_Name,string,
Member_Gender,string,


In [0]:
%sql
CREATE TABLE targetTable_final (
    Abacus_Record_ID STRING,
    Abacus_Member_ID STRING,
    Member_ID VARCHAR(20),
    Subscriber_ID VARCHAR(20),
    Member_First_Name VARCHAR(75),
    Member_Last_Name VARCHAR(75),
    Member_Middle_Name VARCHAR(75),
    Member_Prefix_Name VARCHAR(10),
    Member_Suffix_Name VARCHAR(10),
    Member_Gender VARCHAR(10),
    Member_Date_of_Birth DATE,
    Member_Relationship_Code VARCHAR(10),
    Member_Relationship_Description VARCHAR(20),
    Member_Person_Code INTEGER,
    Member_Address_Line_1 VARCHAR(100),
    Member_Address_Line_2 VARCHAR(100),
    Member_City VARCHAR(20),
    Member_State VARCHAR(20),
    Member_County VARCHAR(50),
    Member_Postal_Code VARCHAR(10),
    Member_Country VARCHAR(20),
    Member_Home_Phone INTEGER,
    Member_Work_Phone INTEGER,
    Member_Mobile_Phone INTEGER,
    Member_Email VARCHAR(100),
    Member_Is_Deceased INTEGER,
    Member_Date_of_Death DATE,
    Member_Deceased_Reason VARCHAR(100),
    Enrollment_Group_ID VARCHAR(20),
    Enrollment_Group_Name VARCHAR(50),
    Enrollment_SubGroup_ID VARCHAR(20),
    Enrollment_SubGroup_Name VARCHAR(50),
    Enrollment_Coverage_Code VARCHAR(10),
    Enrollment_Coverage_Description VARCHAR(30),
    Enrollment_Plan_ID VARCHAR(10),
    Enrollment_Plan_Name VARCHAR(30),
    Enrollment_Plan_Coverage VARCHAR(50),
    Enrollment_Medical_Effective_Date DATE,
    Enrollment_Medical_Termination_Date DATE,
    Enrollment_Dental_Effective_Date DATE,
    Enrollment_Dental_Termination_Date DATE,
    Enrollment_Vision_Effective_Date DATE,
    Enrollment_Vision_Termination_Date DATE,
    Enrollment_Vendor_Name VARCHAR(20),
    Source_File_Name VARCHAR(100),
    File_Ingestion_Date DATE
);


In [0]:
%sql
describe targetTable

col_name,data_type,comment
Abacus_Record_ID,int,
Abacus_Member_Id,string,
Member_id,int,
Subscriber_id,int,
Member_First_Name,string,
Member_Last_Name,string,
Member_Middle_Name,string,
Member_Prefix_Name,string,
Member_Suffix_Name,string,
Member_Gender,string,


#### type "STRING" cannot be cast to "DATE" because it is malformed. datatype error


In [0]:
%sql
INSERT INTO targetTable_final (Abacus_Record_ID, Abacus_Member_ID, Member_ID, Subscriber_id, Member_First_Name, Member_Last_Name, Member_Middle_Name, Member_Prefix_Name, Member_Suffix_Name, Member_Gender, Member_Date_of_Birth, Member_Relationship_Code, Member_Relationship_Description, Member_Person_Code, Member_Address_Line_1, Member_Address_Line_2, Member_City, Member_State, Member_County, Member_Postal_Code, Member_Country, Member_Home_Phone, Member_Work_Phone, Member_Mobile_Phone, Member_Email, Member_Is_Deceased, Member_Date_of_Death, Member_Deceased_Reason, Enrollment_Group_ID, Enrollment_Group_Name, Enrollment_SubGroup_ID, Enrollment_SubGroup_Name, Enrollment_Coverage_Code, Enrollment_Coverage_Description, Enrollment_Plan_ID, Enrollment_Plan_Name, Enrollment_Plan_Coverage, Enrollment_Medical_Effective_Date, Enrollment_Medical_Termination_Date, Enrollment_Dental_Effective_Date, Enrollment_Dental_Termination_Date, Enrollment_Vision_Effective_Date, Enrollment_Vision_Termination_Date, Enrollment_Vendor_Name, Source_File_Name, File_Ingestion_Date)
SELECT Abacus_Record_ID, Abacus_Member_ID, Member_ID, Subscriber_id, Member_First_Name, Member_Last_Name, Member_Middle_Name, Member_Prefix_Name, Member_Suffix_Name, Member_Gender, Member_Date_of_Birth, Member_Relationship_Code, Member_Relationship_Description, Member_Person_Code, Member_Address_Line_1, Member_Address_Line_2, Member_City, Member_State, Member_County, Member_Postal_Code, Member_Country, Member_Home_Phone, Member_Work_Phone, Member_Mobile_Phone, Member_Email, Member_Is_Deceased, Member_Date_of_Death, Member_Deceased_Reason, Enrollment_Group_ID, Enrollment_Group_Name, Enrollment_SubGroup_ID, Enrollment_SubGroup_Name, Enrollment_Coverage_Code, Enrollment_Coverage_Description, Enrollment_Plan_ID, Enrollment_Plan_Name, Enrollment_Plan_Coverage, Enrollment_Medical_Effective_Date, Enrollment_Medical_Termination_Date, Enrollment_Dental_Effective_Date, Enrollment_Dental_Termination_Date, Enrollment_Vision_Effective_Date, Enrollment_Vision_Termination_Date, Enrollment_Vendor_Name, Source_File_Name, File_Ingestion_Date
FROM targetTable;


[0;31m---------------------------------------------------------------------------[0m
[0;31mPy4JJavaError[0m                             Traceback (most recent call last)
File [0;32m<command-2859670625050130>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-2859670625050130>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;