
## Overview

This notebook will show you how to create and query a table or DataFrame that you uploaded to DBFS. [DBFS](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) is a Databricks File System that allows you to store data for querying inside of Databricks. This notebook assumes that you have a file already inside of DBFS that you would like to read from.

This notebook is written in **Python** so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` syntax. Python, Scala, SQL, and R are all supported.

In [0]:
%fs
rm -r dbfs:/user/hive/warehouse/subscriber

In [0]:
from pyspark.sql import SparkSession
# Create a SparkSession
spark = SparkSession.builder.appName("ReadExcel").getOrCreate()
df_address = spark.read.format("com.crealytics.spark.excel") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load("dbfs:/FileStore/shared_uploads/vishalthapacs@gmail.com/Address.xlsx")

# df_address.write.format("parquet").saveAsTable("permanent_address_table") # permanent table create garcha
# result = spark.sql("SELECT * FROM address_information")
# display(result)

# 2. Create or replace a temporary view
df_address.createOrReplaceTempView("temp_address_view")
# 3. Create or replace a table using Spark SQL
spark.sql("CREATE OR REPLACE TABLE address_info AS SELECT * FROM temp_address_view")
# 4. Querying the created table
spark.sql("SELECT * FROM address_info").show()

+-------+-------------------+-------------------+--------------------+--------------+--------------+------------+----------+
|     id|               city|        issued_date|      address_line_1|address_line_2|         state|address_type|   zipcode|
+-------+-------------------+-------------------+--------------------+--------------+--------------+------------+----------+
|70001.0|       Griffinhaven|2007-06-10 00:00:00|      046 Cox Lights|     Suite 600|South Carolina|  Residental|     71709|
|70001.0|           New Kyle|2016-05-26 00:00:00|4307 Ashley Villa...|          null|  North Dakota|        Mail|     87337|
|70002.0|      South Natalie|2019-09-15 00:00:00|  183 Dalton Viaduct|     Suite 844|        Oregon|        Mail|     77714|
|70002.0|      North Heather|2022-12-01 00:00:00|       737 Banks Row|      Apt. 505|      Arkansas|  Residental|      2341|
|70003.0|          West Ruth|2003-11-25 00:00:00|27634 Miller Prairie|          null|  North Dakota|  Residental|     73151|


In [0]:
%sql
select * from address_info 
where address_line_2 is null

id,city,issued_date,address_line_1,address_line_2,state,address_type,zipcode
70380.0,Meganberg,2008-09-14T00:00:00.000+0000,1291 Angela Stravenue,,Washington,Residental,67937
70384.0,South Jamieport,2009-01-09T00:00:00.000+0000,886 Morgan Lights,,Mississippi,Residental,577
70385.0,North Ericstad,2000-08-25T00:00:00.000+0000,288 Whitney Circles Suite 170,,Alaska,Residental,63052
70390.0,North Steven,2007-11-04T00:00:00.000+0000,641 Chung Branch Suite 698,,Maryland,Residental,92949
70391.0,South Jessica,2019-03-08T00:00:00.000+0000,391 Maddox Crossing Suite 561,,Rhode Island,Residental,4931
70392.0,Gardnerberg,2023-01-30T00:00:00.000+0000,83524 Flynn Crossing,,Mississippi,Residental,72580
70399.0,Jamesport,2000-02-16T00:00:00.000+0000,4220 Stone Field Suite 490,,Arkansas,Residental,79313
70402.0,Annefurt,2009-10-09T00:00:00.000+0000,473 Gary Alley,,South Carolina,Residental,68158
70404.0,Guzmanside,2014-05-08T00:00:00.000+0000,750 Kimberly Parkway,,Missouri,Residental,85900
70409.0,Thomasmouth,2008-09-06T00:00:00.000+0000,2927 Leblanc Mills,,New York,Residental,87852


In [0]:
%sql
select * from address_info 
-- where address_line_1 is null
-- where city is null
-- where issued_date is null
-- where state is null
where zipcode is null

id,city,issued_date,address_line_1,address_line_2,state,address_type,zipcode


In [0]:
# 1. File location and type
detail_file_location = "/FileStore/tables/Detail.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

df_detail = spark.read.format(file_type) \
    .option("inferSchema", infer_schema) \
    .option("header", first_row_is_header) \
    .option("sep", delimiter) \
    .load(detail_file_location)

# 2. Create or replace a temporary view
df_detail.createOrReplaceTempView("temp_detail_view")

# 3. Create or replace a table using Spark SQL
spark.sql("CREATE OR REPLACE TABLE detail_info AS SELECT * FROM temp_detail_view")

# Querying the created table
spark.sql("SELECT * FROM detail_info").show()

+-----+-----------+-----------+-----------+--------------------+-----------+------+------------+--------------+-------------+-------------+--------------------+-----------------+---------+--------------------+------------+
|   id| first_name|middle_name|  last_name|               email|        ssn|gender|    religion|marital_status|date_of_birth|deceased_date|   spoken_language_1|spoken_language_2|  company|            job_role|job_hiredate|
+-----+-----------+-----------+-----------+--------------------+-----------+------+------------+--------------+-------------+-------------+--------------------+-----------------+---------+--------------------+------------+
|70001|     Hettie|       null|Keenlayside|jkeenlayside0@dis...|168-92-1075|     F|    Buddhism|       Widowed|   1939-08-05|         null|        West Frisian|          Swahili|  Gabcube| Clinical Specialist|  1964-01-29|
|70002|      Reade|       null|   Laverenz|dlaverenz1@senate...|782-24-9907|     M|Christianity|       Widow

In [0]:

df_contact = spark.read.text("/FileStore/tables/contactinfo.txt")
df_contact.createOrReplaceTempView("temp_contact_view")
spark.sql("CREATE OR REPLACE TABLE contact_info AS SELECT * FROM temp_contact_view")
spark.sql("SELECT * FROM contact_info").show()

+--------------------+
|               value|
+--------------------+
|id\tphone\tusage_...|
|70001\t(455) 3130...|
|70002\t(994) 4561...|
|70003\t(771) 6498...|
|70004\t(450) 8886...|
|70005\t(423) 1700...|
|70006\t(334) 1254...|
|70007\t(915) 7431...|
|70008\t(816) 4980...|
|70009\t(460) 8203...|
|70010\t(577) 3110...|
|70011\t(530) 6708...|
|70012\t(843) 1974...|
|70013\t(602) 3129...|
|70014\t(844) 3426...|
|70016\t(402) 3025...|
|70017\t(304) 6146...|
|70018\t(432) 9954...|
|70019\t(283) 7665...|
|70020\t(436) 3907...|
+--------------------+
only showing top 20 rows



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import split

# Create a Spark session
spark = SparkSession.builder.appName("separate_columns").getOrCreate()

# Read the text file without using the header option
df_txt = spark.read.text("/FileStore/tables/contactinfo.txt")

# Skip the first row
header = df_txt.first()[0]
df_txt = df_txt.filter(df_txt.value != header)

# Split the text into columns using tab as the delimiter
df_txt = df_txt.withColumn("id", split("value", "\t")[0].cast("int")).withColumn("phone", split("value", "\t")[1]).withColumn("usage_type", split("value", "\t")[2])

df_contact = df_txt.drop("value")
df_contact.show(truncate=False)

+-----+-------------+----------+
|id   |phone        |usage_type|
+-----+-------------+----------+
|70001|(455) 3130004|Work      |
|70002|(994) 4561640|Work      |
|70003|(771) 6498755|Work      |
|70004|(450) 8886723|Work      |
|70005|(423) 1700133|Work      |
|70006|(334) 1254061|Work      |
|70007|(915) 7431041|Work      |
|70008|(816) 4980330|Work      |
|70009|(460) 8203658|Work      |
|70010|(577) 3110757|Work      |
|70011|(530) 6708617|Work      |
|70012|(843) 1974981|Work      |
|70013|(602) 3129571|Work      |
|70014|(844) 3426438|Work      |
|70016|(402) 3025907|Work      |
|70017|(304) 6146445|Work      |
|70018|(432) 9954065|Work      |
|70019|(283) 7665423|Work      |
|70020|(436) 3907382|Work      |
|70021|(875) 4821793|Work      |
+-----+-------------+----------+
only showing top 20 rows



In [0]:
# Read the JSON file into a DataFrame
df_header = spark.read.option("multiline", "true").json("/FileStore/tables/header.json")
df_header.createOrReplaceTempView("temp_header_view")
spark.sql("CREATE OR REPLACE TABLE header_info AS SELECT * FROM temp_header_view")


Out[30]: DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

In [0]:
%sql
-- Create the newtable table
CREATE or REPLACE TABLE newtable(
  source_id STRING,
  subscriber_id STRING,
  first_name STRING,
  middle_name STRING,
  last_name STRING,
  prefix_name STRING,
  suffix_name STRING,
  name STRING,
  record_created_ts TIMESTAMP,
  is_verified BOOLEAN,
  addresses ARRAY<STRUCT<
        address_type: STRING,
        address_line_1: STRING,
        address_line_2: STRING,
        city: STRING,
        state: STRING,
        zip_code: STRING,
        postal_code: STRING,
        country: STRING
    >>,
  phones ARRAY<STRUCT<
        phone: STRING,
        usage_type:  STRING
    >>,
  email STRING,
  national_id STRING,
  gender STRING,
  marital_status STRING,
  date_of_birth DATE,
  year_of_birth DATE,
  deceased_ind BOOLEAN,
  deceased_age INT,
  deceased_date DATE,
  languages STRUCT<spoken_language_1: STRING, spoken_language_2: STRING>,
  employment STRUCT<
        name: STRING, 
        job_role: STRING, 
        marital_status: STRING, 
        job_hiredate: DATE
    >
);


In [0]:
%fs
rm -r dbfs:/user/hive/warehouse/subscriber

In [0]:
%sql
select * from newtable

source_id,subscriber_id,first_name,middle_name,last_name,prefix_name,suffix_name,name,record_created_ts,is_verified,addresses,phones,email,national_id,gender,marital_status,date_of_birth,year_of_birth,deceased_ind,deceased_age,deceased_date,languages,employment


In [0]:
%sql
SELECT 
    header.id AS source_id,
    header.insurer_id AS subscriber_id,
    detail.first_name,
    detail.middle_name,
    detail.last_name,
    COALESCE(CONCAT(detail.first_name, ' ', detail.last_name, ' ', detail.middle_name), COALESCE(detail.first_name, detail.last_name, detail.middle_name)) AS name,
    CASE
        WHEN detail.gender = 'M' THEN 'Mr.'
        WHEN detail.gender = 'F' AND detail.marital_status IS NULL THEN 'Miss.'
        WHEN detail.gender = 'F' AND detail.marital_status IN ('Married', 'Divorced', 'Widowed') THEN 'Mrs.'
        WHEN detail.gender IS NULL AND detail.marital_status IS NULL OR detail.marital_status IN ('Married', 'Divorced', 'Widowed') THEN 'Misc.'
        ELSE 'Not Known'
    END AS prefix_name,
    CASE 
        WHEN detail.job_role = 'Staff Scientist' THEN 'SS'
        WHEN detail.job_role = 'Research Associate' THEN 'RA'
        WHEN detail.job_role = 'Registered Nurse' THEN 'RN'
        ELSE 'Not Known'
    END AS suffix_name,
    CURRENT_TIMESTAMP AS record_created_ts,
    CASE
        WHEN detail.email LIKE '%@%.%'             -- checks for at least one "@" and one "."
             AND detail.email NOT LIKE '%@%@%'     -- Contains only one "@"
             AND detail.email NOT LIKE '%..%'      -- Does not contain consecutive periods
             AND detail.email NOT LIKE '%.@%' AND email NOT LIKE '%@.%'      -- "@" is not the first or last character
             AND LEN(detail.email) - LEN(REPLACE(detail.email, '@', '')) = 1        -- Contains exactly one "@"
             THEN true
        ELSE false
    END AS is_verified,
    COLLECT_LIST(
        STRUCT(
            address.address_type,
            address.address_line_1,
            address.address_line_2,
            address.city,
            address.state,
            'USA' AS country,
            CASE
                WHEN LENGTH(address.zipcode) = 5 THEN address.zipcode
                ELSE NULL
            END AS postal_code,
            CASE
                WHEN LENGTH(address.zipcode) = 9 THEN SPLIT(address.zipcode, '-')[1]
                ELSE NULL
            END AS zip_code
        )
    ) AS addresses,
    COLLECT_LIST(
        STRUCT(
            -- contact.phone AS number,
            contact.value AS phone_number,
            'phone_type' AS phone_type
        )
    ) AS phones,
    detail.email,
    detail.ssn AS national_id,
    detail.gender AS gender, 
    detail.marital_status AS marital_status,
    TO_DATE(detail.date_of_birth, 'M/d/yyyy') AS date_of_birth,
    TO_DATE(detail.date_of_birth, 'yyyy') AS year_of_birth,
    CASE
        WHEN COALESCE(detail.deceased_date, 'false') = 'false' THEN false
        ELSE true
    END AS deceased_ind,
    CASE
        WHEN COALESCE(detail.deceased_date, 'false') = 'false' THEN NULL
        ELSE TRY_CAST(DATEDIFF(TO_DATE(detail.deceased_date, 'M/d/yyyy'), TO_DATE(detail.date_of_birth, 'M/d/yyyy')) / 365 AS INT)
    END AS deceased_age,
    TO_DATE(detail.deceased_date, 'M/d/yyyy') AS deceased_date,
    SPLIT(COALESCE(CONCAT_WS(',', detail.spoken_language_1, detail.spoken_language_2), detail.spoken_language_1, detail.spoken_language_2), ',') AS languages,
    COLLECT_LIST(
        STRUCT(
            detail.company AS employer_name,
            detail.job_role AS employee_role,
            CASE
                WHEN detail.job_hiredate IS NOT NULL THEN 'Active'
                ELSE 'Inactive'
            END AS employee_status,
            TO_DATE(detail.job_hiredate, 'M/d/yyyy') AS employee_hiredate
        )
    ) AS employment
FROM header_info AS header
LEFT JOIN detail_info AS detail ON header.id = detail.id
LEFT JOIN contact_info AS contact ON header.id = contact.value
LEFT JOIN address_info AS address ON header.id = address.id
GROUP BY ALL;


source_id,subscriber_id,first_name,middle_name,last_name,name,prefix_name,suffix_name,record_created_ts,is_verified,addresses,phones,email,national_id,gender,marital_status,date_of_birth,year_of_birth,deceased_ind,deceased_age,deceased_date,languages,employment
70001,40184,Hettie,,Keenlayside,Hettie,Mrs.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Mail, 4307 Ashley Village Suite 758, null, New Kyle, North Dakota, USA, 87337, null), List(Residental, 046 Cox Lights, Suite 600, Griffinhaven, South Carolina, USA, 71709, null))","List(List(null, phone_type), List(null, phone_type))",jkeenlayside0@disqus.com,168-92-1075,F,Widowed,1939-08-05,1939-08-05,False,,,"List(West Frisian, Swahili)","List(List(Gabcube, Clinical Specialist, Active, 1964-01-29), List(Gabcube, Clinical Specialist, Active, 1964-01-29))"
70002,40092,Reade,,Laverenz,Reade,Mr.,SS,2024-02-07T16:29:48.089+0000,True,"List(List(Residental, 737 Banks Row, Apt. 505, North Heather, Arkansas, USA, null, null), List(Mail, 183 Dalton Viaduct, Suite 844, South Natalie, Oregon, USA, 77714, null))","List(List(null, phone_type), List(null, phone_type))",dlaverenz1@senate.gov,782-24-9907,M,Widowed,1941-05-14,1941-05-14,False,,,"List(Swati, Danish)","List(List(Skibox, Staff Scientist, Active, 1958-05-18), List(Skibox, Staff Scientist, Active, 1958-05-18))"
70003,40233,Minnnie,,Baack,Minnnie,Mrs.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Mail, 807 Jesus Mills Suite 598, Suite 735, Churchbury, Texas, USA, 97223, null), List(Residental, 27634 Miller Prairie, null, West Ruth, North Dakota, USA, 73151, null))","List(List(null, phone_type), List(null, phone_type))",dbaack2@sina.com.cn,726-01-1271,F,Married,1982-11-20,1982-11-20,False,,,List(Swati),"List(List(Dabjam, Paralegal, Active, 2011-06-10), List(Dabjam, Paralegal, Active, 2011-06-10))"
70004,40058,Tana,Agata,Aiken,Tana Aiken Agata,Miss.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Residental, 9831 Robert Falls, Apt. 086, Michelleland, Oregon, USA, null, null), List(Mail, 36717 Philip Common, Suite 278, Thomasborough, Idaho, USA, null, null))","List(List(null, phone_type), List(null, phone_type))",aaiken3@nydailynews.com,492-62-0968,F,,1929-02-18,1929-02-18,False,,,"List(New Zealand Sign Language, Punjabi)","List(List(Aimbu, VP Marketing, Active, 2014-10-08), List(Aimbu, VP Marketing, Active, 2014-10-08))"
70005,40088,Cyndia,,Tolomelli,Cyndia,Miss.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Mail, 95855 Davis Lodge, Suite 059, Kimberlymouth, Louisiana, USA, 33733, null), List(Residental, 0861 Caldwell Dam, Suite 783, North Robertborough, New Mexico, USA, 31718, null))","List(List(null, phone_type), List(null, phone_type))",ltolomelli4@istockphoto.com,802-24-1062,F,,1920-05-31,1920-05-31,False,,,List(Albanian),"List(List(Edgepulse, Senior Developer, Active, 1931-01-16), List(Edgepulse, Senior Developer, Active, 1931-01-16))"
70006,40170,Johnny,Renaud,Gibben,Johnny Gibben Renaud,Mr.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Residental, 8737 Flores Extension Suite 549, null, Jasonbury, Idaho, USA, 20277, null), List(Mail, 1049 Riggs Stream Suite 632, Suite 465, New Christopher, Maine, USA, 43342, null))","List(List(null, phone_type), List(null, phone_type))",rgibben5@tumblr.com,563-98-1576,M,Single,1958-07-01,1958-07-01,False,,,List(Georgian),"List(List(Oodoo, Human Resources Assistant I, Active, 2021-12-26), List(Oodoo, Human Resources Assistant I, Active, 2021-12-26))"
70007,40194,Judas,,Mitford,Judas,Mr.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Residental, 7475 Michael Land, Suite 392, New Latoyamouth, Hawaii, USA, 89157, null), List(Mail, 16915 Michelle Fields Apt. 930, Suite 488, South Pamela, New Mexico, USA, 44394, null))","List(List(null, phone_type), List(null, phone_type))",bmitford6@github.io,626-84-9457,M,Divorced,1993-07-30,1993-07-30,False,,,"List(New Zealand Sign Language, Nepali)","List(List(Bluejam, Data Coordinator, Active, 2018-06-11), List(Bluejam, Data Coordinator, Active, 2018-06-11))"
70008,40079,Wilden,Tobin,Huertas,Wilden Huertas Tobin,Mr.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Residental, 891 Frank Squares Suite 096, Apt. 809, Whitefort, Virginia, USA, 17694, null), List(Mail, 64558 Alexis Club, Apt. 683, Lake Lindaside, New Hampshire, USA, null, null))","List(List(null, phone_type), List(null, phone_type))",thuertas7@yahoo.co.jp,667-45-8806,M,Widowed,1906-08-02,1906-08-02,False,,,List(Norwegian),"List(List(Roomm, Database Administrator I, Active, 1923-03-07), List(Roomm, Database Administrator I, Active, 1923-03-07))"
70009,40466,Gaelan,,Smitheman,Gaelan,Mr.,Not Known,2024-02-07T16:29:48.089+0000,True,"List(List(Mail, 839 Garcia Highway, Apt. 915, Jermaineborough, North Dakota, USA, 25837, null), List(Residental, 7163 Thompson Park Suite 842, null, Kevinside, Nebraska, USA, 74553, null))","List(List(null, phone_type), List(null, phone_type))",msmitheman8@ezinearticles.com,854-32-5148,M,Divorced,1926-03-04,1926-03-04,False,,,"List(Japanese, Catalan)","List(List(Trupe, Analyst Programmer, Active, 1998-12-15), List(Trupe, Analyst Programmer, Active, 1998-12-15))"
70010,40061,Letti,,Folkard,Letti,Mrs.,SS,2024-02-07T16:29:48.089+0000,True,"List(List(Residental, 7357 Beck Garden Apt. 240, null, Hunterfort, Wyoming, USA, 69080, null), List(Mail, 388 Perry Mills, Suite 521, Monroechester, Minnesota, USA, 13191, null))","List(List(null, phone_type), List(null, phone_type))",tfolkard9@biblegateway.com,867-58-4596,F,Divorced,1900-03-07,1900-03-07,False,,,"List(Tajik, Tamil)","List(List(Yambee, Staff Scientist, Active, 2005-03-11), List(Yambee, Staff Scientist, Active, 2005-03-11))"


In [0]:
%sql
INSERT INTO newtable (
    source_id,
    subscriber_id,
    first_name,
    middle_name,
    last_name,
    prefix_name,
    suffix_name,
    name,
    record_created_ts,
    is_verified,
    addresses,
    phones,
    email,
    national_id,
    gender,
    marital_status,
    date_of_birth,
    year_of_birth,
    deceased_ind,
    deceased_age,
    deceased_date,
    languages,
    employment
)
SELECT 
    header.id AS source_id,
    header.insurer_id AS subscriber_id,
    detail.first_name,
    detail.middle_name,
    detail.last_name,
    CASE
        WHEN detail.gender = 'M' THEN 'Mr.'
        WHEN detail.gender = 'F' AND detail.marital_status IS NULL THEN 'Miss.'
        WHEN detail.gender = 'F' AND detail.marital_status IN ('Married', 'Divorced', 'Widowed') THEN 'Mrs.'
        WHEN detail.gender IS NULL AND detail.marital_status IS NULL OR detail.marital_status IN ('Married', 'Divorced', 'Widowed') THEN 'Misc.'
        ELSE 'Not Known'
    END AS prefix_name,
    CASE 
        WHEN detail.job_role = 'Staff Scientist' THEN 'SS'
        WHEN detail.job_role = 'Research Associate' THEN 'RA'
        WHEN detail.job_role = 'Registered Nurse' THEN 'RN'
        ELSE 'Not Known'
    END AS suffix_name,
    COALESCE(CONCAT(detail.first_name, ' ', detail.last_name, ' ', detail.middle_name), COALESCE(detail.first_name, detail.last_name, detail.middle_name)) AS name,
    CURRENT_TIMESTAMP AS record_created_ts,
    CASE
        WHEN detail.email LIKE '%@%.%' AND
             detail.email NOT LIKE '%@%@%' AND
             detail.email NOT LIKE '%..%' AND
             detail.email NOT LIKE '%.@%' AND
             detail.email NOT LIKE '%@.%' AND
             LEN(detail.email) - LEN(REPLACE(detail.email, '@', '')) = 1
             THEN true
        ELSE false
    END AS is_verified,
    COLLECT_LIST(
        STRUCT(
            address.address_type,
            address.address_line_1,
            address.address_line_2,
            address.city,
            address.state,
            'USA' AS country,
            CASE
                WHEN LENGTH(address.zipcode) = 5 THEN address.zipcode
                ELSE NULL
            END AS postal_code,
            CASE
                WHEN LENGTH(address.zipcode) = 9 THEN SPLIT(address.zipcode, '-')[1]
                ELSE NULL
            END AS zip_code
        )
    ) AS addresses,
    COLLECT_LIST(
        STRUCT(
            contact.value AS phone_number,
            'phone_type' AS usage_type
        )
    ) AS phones,
    detail.email,
    detail.ssn AS national_id,
    detail.gender AS gender, 
    detail.marital_status AS marital_status,
    TO_DATE(detail.date_of_birth, 'M/d/yyyy') AS date_of_birth,
    TO_DATE(detail.date_of_birth, 'yyyy') AS year_of_birth,
    CASE
        WHEN COALESCE(detail.deceased_date, 'false') = 'false' THEN false
        ELSE true
    END AS deceased_ind,
    CASE
        WHEN COALESCE(detail.deceased_date, 'false') = 'false' THEN NULL
        ELSE TRY_CAST(DATEDIFF(TO_DATE(detail.deceased_date, 'M/d/yyyy'), TO_DATE(detail.date_of_birth, 'M/d/yyyy')) / 365 AS INT)
    END AS deceased_age,
    TO_DATE(detail.deceased_date, 'M/d/yyyy') AS deceased_date,
    STRUCT(detail.spoken_language_1 AS spoken_language_1, detail.spoken_language_2 AS spoken_language_2) AS languages,
    STRUCT(
        detail.company AS name,
        detail.job_role AS job_role,
        CASE
            WHEN detail.job_hiredate IS NOT NULL THEN 'Active'
            ELSE 'Inactive'
        END AS marital_status,
        TO_DATE(detail.job_hiredate, 'M/d/yyyy') AS job_hiredate
    ) AS employment

FROM header_info AS header
LEFT JOIN detail_info AS detail ON header.id = detail.id
LEFT JOIN contact_info AS contact ON header.id = contact.value
LEFT JOIN address_info AS address ON header.id = address.id
GROUP BY
    header.id,
    header.insurer_id,
    detail.first_name,
    detail.middle_name,
    detail.last_name,
    detail.gender,
    detail.marital_status,
    detail.email,
    detail.ssn,
    detail.date_of_birth,
    detail.deceased_date,
    detail.spoken_language_1,
    detail.spoken_language_2,
    detail.company,
    detail.job_role,
    detail.job_hiredate;


num_affected_rows,num_inserted_rows
1500,1500
