# Building structured Claims data tables from x12 EDI with EDI ember

In [0]:
pip install git+https://github.com/databricks-industry-solutions/x12-edi-parser

Collecting git+https://github.com/databricks-industry-solutions/x12-edi-parser
  Cloning https://github.com/databricks-industry-solutions/x12-edi-parser to /tmp/pip-req-build-ieav33b5
  Running command git clone --filter=blob:none --quiet https://github.com/databricks-industry-solutions/x12-edi-parser /tmp/pip-req-build-ieav33b5
  Resolved https://github.com/databricks-industry-solutions/x12-edi-parser to commit 2e10c403236bff33113f06449d44f237b618f7e1
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
dbutils.library.restartPython()

### Solution: Databricks’ X12 EDI Ember
Databricks has developed an open source code repository, x12-edi-parser, also called EDI Ember, to accelerate value and time to insight by parsing your EDI data using Spark workflows. We have worked with our partner, CitiusTech, who has contributed to the repo functionality and can help enterprises scale EDI and/or claims-based functions such as:

- Transaction-type discovery: Automatically detect and classify functional groups as Institutional Claims (837I), Professional Claims (837P), or other X12 transaction sets
- Rich claim-segment extraction: Pull out financial and clinical data—claim amounts, procedure codes, service lines, revenue codes, diagnoses, and more
- Hierarchical loop recognition: To preserve EDI’s nested loops, identify which loop each claim belongs to, extract billing provider, subscriber, dependents, and capture the sender/receiver interchange partners
JSON conversion and downstream readiness: Flatten and normalize all segments into clean, schema-on-read JSON objects, ready for analytics, data lakes, or downstream systems


This schematic illustrates how the X12 EDI Parser flows from raw transaction segments to structured healthcare claims data using the `HealthcareManager` in Databricks.

![X12 Parser Schematic](../images/parser_schematic.jpg)

In [0]:
from ember import *
from ember.hls.healthcare import HealthcareManager as hm
import json, os
from pyspark.sql.functions import input_file_name

df = spark.read.text("file:///" + os.getcwd() + "/../sampledata/837/*txt", wholetext = True)

rdd = (
  df.withColumn("filename", input_file_name()).rdd #convert to rdd
  .map(lambda row: (row.filename, EDI(row.value))) #parse as an EDI format
  .flatMap(lambda edi: hm.flatten(edi[1], filename = edi[0])) #extract out healthcare specific groupings, one row per claim/remittance/enrollment etc
  )

claims_rdd = (
rdd.repartition(4) #Repartition number should be >= # of cores in cluster and <= number of rows in rdd / DataFrame
.map(lambda x: hm.flatten_to_json(x))
.map(lambda x: json.dumps(x))
)

claims = spark.read.json(claims_rdd)

In [0]:
claims.createOrReplaceTempView("stg_claims")

In [0]:
%sql
select * from stg_claims

EDI.control_number,EDI.date,EDI.recipient_qualifier_id,EDI.sender_qualifier_id,EDI.standard_version,EDI.time,FunctionalGroup.control_number,FunctionalGroup.date,FunctionalGroup.receiver,FunctionalGroup.sender,FunctionalGroup.standard_version,FunctionalGroup.time,FunctionalGroup.transaction_type,Transaction.transaction_type,claim_header,claim_lines,diagnosis,filename,patient,payer,providers,receiver,submitter,subscriber
1507,180807,30421406317,01030240928,501,1202,150700,20180807,421406317,030240928,005010X222A1,12022605,222,222,"List(, , 600, List(List(20180625, 435, D8)), TD-R192ICE00094, List(), , , , 11:B:1, List(List(), List(, , )), List())","List(List(1, 1:2, 300, HO,HQ, List(), 11, H0004, HC, null, List(List(20180625-20180625, 472, RD8)), 12, UN), List(2, 1:2, 300, HO,HQ, List(), 11, H0004, HC, null, List(List(20180629-20180629, 472, RD8)), 12, UN))","List(, , , , List(), F341, F341, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CC_837P_EDI.txt,"List(Elgin, 19960215, D8, F, , TEST MEMBER , , IL, 123 MAIN STREET, U0000000001, 18, 601200001)","List(2, 421406317, PI, CENTENE)","List(null, List(WOODSTOCK, 999999999, EI, Organization, Test Provider, 9999999999, , IL, 123 MAIN STREET, , 600980000), null, null, List(, , , Individual, , , , , , , ), List(, , , Individual, , , , , , , ), List(, , , Individual, , , , , , , ))","List(Centene Corporation, Organization)","List(Centene Corporation, Sambit, List(List(TE, 3143202264), List(EX, 8099264)), Organization)","List(Elgin, 19960215, D8, F, , TEST MEMBER , , IL, 123 MAIN STREET, U0000000001, 18, 601200001)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 20, List(), 1805080AV3648339, List(List(7349065509, D9)), , , , 57:B:1, List(List(), List(, , )), List())","List(List(1, 1, 20, , List(), , H0003, HC, null, List(List(20180428, 472, D8)), 1, UN))","List(, , , , List(), F1120, F1120, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, List(, , , Individual, , , , , , , ), List(VANCOUVER, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST SUITE A1, , 98662), List(, , , Individual, PROVIDER JAMES , 1112223338, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 50.1, List(), 1805080AV3648347, List(List(7349065730, D9)), , , , 57:B:1, List(List(), List(, , )), List())","List(List(1, 1:2, 50.1, HF, List(), , 96153, HC, null, List(List(20180426, 472, D8)), 6, UN))","List(, , , , List(), F1520, F1520, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881031, D8, F, , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, List(, , , Individual, , , , , , , ), List(VANCOUVER, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST SUITE A1, , 98662), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881031, D8, F, , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 11.64, List(), 1805080AV3648340, List(List(7349065492, D9)), , , , 57:B:1, List(List(), List(, , )), List())","List(List(1, 1:2, 11.64, HF, List(), , T1017, HC, null, List(List(20180427, 472, D8)), 1, UN))","List(, , , , List(), F1020, F1020, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, List(, , , Individual, , , , , , , ), List(VANCOUVER, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST SUITE A1, , 98662), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 234, List(), 1805080AV3648353, List(List(7349064290, D9)), , , , 53:B:1, List(List(), List(, , )), List())","List(List(1, 1, 234, , List(), , 90853, HC, null, List(List(20180427, 472, D8)), 120, UN))","List(, , , , List(), F251, F251, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881031, D8, F, , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, List(, , , Individual, , , , , , , ), List(VANCOUVER, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST SUITE A1, , 98662), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 251S00000X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881031, D8, F, , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 20, List(), 1805080AV3648355, List(List(7349064036, D9)), , , , 57:B:1, List(List(), List(, , )), List())","List(List(1, 1:2, 20, , List(), , H0003, HC, null, List(List(20180427, 472, D8)), 1, UN))","List(, , , , List(), F1020, F1020, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, List(, , , Individual, , , , , , , ), List(VANCOUVER, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST SUITE A1, , 98662), List(, , , Individual, PROVIDER JAMES , 1112223338, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 1800, List(List(20180123, 454, D8), List(20180123, 304, D8)), ABC11111, List(List(6123456749, EA), List(012345678901234, D9)), , , , 22:B:1, List(List(), List(, , )), List())","List(List(1, 1, 1800, QZ, List(), , 01710, HC, null, List(List(20180123-20180123, 472, RD8), List(20180713, 454, D8), List(20180713, 304, D8)), 88, MJ), List(1, 1, 984, AA,P3, List(), , 00731, HC, null, List(List(20180713-20180713, 472, RD8), List(20180713, 454, D8), List(20180713, 304, D8)), 24, MJ), List(1, 1, 1353, AA, List(), , 00840, HC, null, List(List(20180713-20180713, 472, RD8), List(20180713, 454, D8), List(20180713, 304, D8)), 62, MJ), List(1, 1, 1230, AA, List(), , 00902, HC, null, List(List(20180713-20180713, 472, RD8)), 65, MJ), List(2, 2, 738, 59, List(), , 62322, HC, null, List(List(20180713-20180713, 472, RD8)), 1, UN))","List(, , , , List(), G5621, G5621, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19650101, D8, M, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 123456789, EI, Organization, YYYY HEALTHCARE ABC, 1222222220, , NY, 123 ADDRESS1, , 908021112), null, null, List(, , , Individual, ABCD STUVW , 1234567890, , , , , ), List(FAKE CITY, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334460, , CA, 123 ADDRESS4, , 908021114), List(, , , Individual, TUVWX MNOPQR , 1234567891, PE, , , 367500000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19650101, D8, M, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 984, List(List(20180713, 454, D8), List(20180713, 304, D8)), ABC111112, List(List(1254789634, EA), List(012345678901231, D9)), , , , 22:B:1, List(List(), List(, , )), List())","List(List(1, 1, 984, AA,P3, List(), , 00731, HC, null, List(List(20180713-20180713, 472, RD8), List(20180713, 454, D8), List(20180713, 304, D8)), 24, MJ), List(1, 1, 1353, AA, List(), , 00840, HC, null, List(List(20180713-20180713, 472, RD8), List(20180713, 454, D8), List(20180713, 304, D8)), 62, MJ), List(1, 1, 1230, AA, List(), , 00902, HC, null, List(List(20180713-20180713, 472, RD8)), 65, MJ), List(2, 2, 738, 59, List(), , 62322, HC, null, List(List(20180713-20180713, 472, RD8)), 1, UN))","List(, , , , List(), K219, K219, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19760101, D8, F, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS7, 11111117, 18, 908021117)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 543211234, EI, Organization, ABCDE EFGHIJ GROUP PC, 1222222223, , CA, 123 ADDRESS5, , 908021115), null, null, List(, , , Individual, ABCDEFG OPQRST A, 1122334460, , , , , ), List(FAKE CITY, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334450, , CA, 123 ADDRESS8, , 908021118), List(, , , Individual, STUVW KLMNOP H, 1122334461, PE, , , 207L00000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19760101, D8, F, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS7, 11111117, 18, 908021117)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 1353, List(List(20180713, 454, D8), List(20180713, 304, D8)), ABC111113, List(List(963852741, EA), List(548721986532651, D9)), , , , 22:B:1, List(List(), List(, , )), List())","List(List(1, 1, 1353, AA, List(), , 00840, HC, null, List(List(20180713-20180713, 472, RD8), List(20180713, 454, D8), List(20180713, 304, D8)), 62, MJ), List(1, 1, 1230, AA, List(), , 00902, HC, null, List(List(20180713-20180713, 472, RD8)), 65, MJ), List(2, 2, 738, 59, List(), , 62322, HC, null, List(List(20180713-20180713, 472, RD8)), 1, UN))","List(, , , , List(), K4090, K4090, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 20180409, D8, M, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS11, 111111111, 18, 908021111)","List(2, YTHF281123456, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 931013923, EI, Organization, ABCDE EFGHIJ GROUP PC, 1477527786, , CA, 123 ADDRESS9, , 908021119), null, null, List(, , , Individual, ABCDEFGH OPQRSTU A, 1122334456, , , , , ), List(FAKE CITY, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334458, , CA, 123 ADDRESS12, , 908021112), List(, , , Individual, STUVWX KLMNOPQ H, 1122334457, PE, , , 207L00000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 20180409, D8, M, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS11, 111111111, 18, 908021111)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 1968, List(List(20180713, 454, D8), List(20180713, 304, D8)), ABC111114, List(List(123456789, EA), List(215487986532544, D9)), , , , 22:B:1, List(List(), List(, , )), List())","List(List(1, 1, 1230, AA, List(), , 00902, HC, null, List(List(20180713-20180713, 472, RD8)), 65, MJ), List(2, 2, 738, 59, List(), , 62322, HC, null, List(List(20180713-20180713, 472, RD8)), 1, UN))","List(, , , , List(), Q423, Q423, , )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 20180504, D8, F, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS13, 111111112, 18, 908021113)","List(2, YTHF281123456, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 931013923, EI, Organization, ABCDE EFGHIJ GROUP PC, 1477527786, , CA, 123 ADDRESS9, , 908021119), null, null, List(, , , Individual, ABCDEFGHI OPQRSTUV A, 1122334455, , , , , ), List(FAKE CITY, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334457, , CA, 123 ADDRESS15, , 908021115), List(, , , Individual, STUVWXY KLMNOPQR H, 1122334456, PE, , , 207L00000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 20180504, D8, F, , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS13, 111111112, 18, 908021113)"


| Concept        | Claim Header                                          | Claim Line                                            |
| -------------- | ----------------------------------------------------- | ----------------------------------------------------- |
| Level          | Summary-level (1 per claim)                           | Line-item level (multiple per claim)                  |
| Content        | Patient, provider, payer, total charge                | CPT/HCPCS codes, modifiers, units, charge per service |
| EDI Source     | CLM segment (837P/I ST loop)                          | SV1 (837P) or SV2 (837I) segments                     |



claim_header = encounter/visit-level grouping (like a bill).

claim_line = each service rendered during that visit.


In [0]:
%sql
drop table if exists raven_catalog.x12_edi_parser.claim_header;
create table raven_catalog.x12_edi_parser.claim_header as 
select * except(claim_lines)
from stg_claims
;

SELECT * FROM raven_catalog.x12_edi_parser.claim_header

EDI.control_number,EDI.date,EDI.recipient_qualifier_id,EDI.sender_qualifier_id,EDI.standard_version,EDI.time,FunctionalGroup.control_number,FunctionalGroup.date,FunctionalGroup.receiver,FunctionalGroup.sender,FunctionalGroup.standard_version,FunctionalGroup.time,FunctionalGroup.transaction_type,Transaction.transaction_type,claim_header,diagnosis,filename,patient,payer,providers,receiver,submitter,subscriber
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 20, List(), 1805080AV3648339, , , , 57:B:1)","List(, , , F1120, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER JAMES , 1112223338, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 50.1, List(), 1805080AV3648347, , , , 57:B:1)","List(, , , F1520, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 11.64, List(), 1805080AV3648340, , , , 57:B:1)","List(, , , F1020, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 234, List(), 1805080AV3648353, , , , 53:B:1)","List(, , , F251, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 251S00000X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(, , 20, List(), 1805080AV3648355, , , , 57:B:1)","List(, , , F1020, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER JAMES , 1112223338, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 1800, List(List(20180123, 454, D8), List(20180123, 304, D8)), ABC11111, , , , 22:B:1)","List(, , , G5621, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 123456789, EI, Organization, YYYY HEALTHCARE ABC, 1222222220, , NY, 123 ADDRESS1, , 908021112), null, null, null, List(, , , Individual, ABCD STUVW , 1234567890, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334460, , , , , ), List(, , , Individual, TUVWX MNOPQR , 1234567891, PE, , , 367500000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 984, List(List(20180713, 454, D8), List(20180713, 304, D8)), ABC111112, , , , 22:B:1)","List(, , , K219, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19760101, D8, F, 125478963, SY, ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS7, 11111117, 18, 908021117)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 543211234, EI, Organization, ABCDE EFGHIJ GROUP PC, 1222222223, , CA, 123 ADDRESS5, , 908021115), null, null, null, List(, , , Individual, ABCDEFG OPQRST A, 1122334460, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334450, , , , , ), List(, , , Individual, STUVW KLMNOP H, 1122334461, PE, , , 207L00000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19760101, D8, F, 125478963, SY, ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS7, 11111117, 18, 908021117)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 1353, List(List(20180713, 454, D8), List(20180713, 304, D8)), ABC111113, , , , 22:B:1)","List(, , , K4090, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 20180409, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS11, 111111111, 18, 908021111)","List(2, YTHF281123456, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 931013923, EI, Organization, ABCDE EFGHIJ GROUP PC, 1477527786, , CA, 123 ADDRESS9, , 908021119), null, null, null, List(, , , Individual, ABCDEFGH OPQRSTU A, 1122334456, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334458, , , , , ), List(, , , Individual, STUVWX KLMNOPQ H, 1122334457, PE, , , 207L00000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 20180409, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS11, 111111111, 18, 908021111)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(, , 1968, List(List(20180713, 454, D8), List(20180713, 304, D8)), ABC111114, , , , 22:B:1)","List(, , , Q423, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 20180504, D8, F, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS13, 111111112, 18, 908021113)","List(2, YTHF281123456, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 931013923, EI, Organization, ABCDE EFGHIJ GROUP PC, 1477527786, , CA, 123 ADDRESS9, , 908021119), null, null, null, List(, , , Individual, ABCDEFGHI OPQRSTUV A, 1122334455, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334457, , , , , ), List(, , , Individual, STUVWXY KLMNOPQR H, 1122334456, PE, , , 207L00000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 20180504, D8, F, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS13, 111111112, 18, 908021113)"
1508,180807,ZZ421406317,30133052274,501,1202,150800,20180807,421406317,133052274,005010X223A2,12022761,223,223,"List(1, 1, 17166.7, List(List(0900, 096, TM), List(20180628-20180702, 434, RD8), List(201806280800, 435, DT)), TD-R192ICE00087, 01, DR:885, , 11:A:1)","List(F39, , F1319,F419,F17210,E876, F39, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CC_837I_EDI.txt,"List(Elgin, 19960215, D8, F, , , TEST MEMBER , , IL, 123 MAIN STREET, U0000000001, 18, 601200001)","List(2, 421406317, PI, CENTENE)","List(List(, TD-R192ICE00087, D9, Individual, , , , , , , ), List(WOODSTOCK, 999999999, EI, Organization, Test Provider, 9999999999, , IL, 123 MAIN STREET, , 600980000), List(, , , Individual, , , , , , , ), List(, , , Individual, , , , , , , ), List(, , , Individual, , , , , , , ), null, null, null)","List(CENTENE CORP, Organization)","List(TALEND, SAMBIT, List(List(TE, 3143202264), List(EX, 8099264)), Organization)","List(Elgin, 19960215, D8, F, , , TEST MEMBER , , IL, 123 MAIN STREET, U0000000001, 18, 601200001)"


In [0]:
%sql
drop table if exists raven_catalog.x12_edi_parser.claim_line;
create table raven_catalog.x12_edi_parser.claim_line as 
select *  except(claim_header)
from (
select *, explode(claim_lines) as claim_line
from stg_claims
)
;

SELECT * FROM raven_catalog.x12_edi_parser.claim_line

EDI.control_number,EDI.date,EDI.recipient_qualifier_id,EDI.sender_qualifier_id,EDI.standard_version,EDI.time,FunctionalGroup.control_number,FunctionalGroup.date,FunctionalGroup.receiver,FunctionalGroup.sender,FunctionalGroup.standard_version,FunctionalGroup.time,FunctionalGroup.transaction_type,Transaction.transaction_type,claim_lines,diagnosis,filename,patient,payer,providers,receiver,submitter,subscriber,claim_line
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(List(1, D8, 1, 20, , , H0003, HC, null, 20180428, D8, 472, 1, UN))","List(, , , F1120, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER JAMES , 1112223338, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(1, D8, 1, 20, , , H0003, HC, null, 20180428, D8, 472, 1, UN)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(List(1, D8, 1:2, 50.1, HF, , 96153, HC, null, 20180426, D8, 472, 6, UN))","List(, , , F1520, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(1, D8, 1:2, 50.1, HF, , 96153, HC, null, 20180426, D8, 472, 6, UN)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(List(1, D8, 1:2, 11.64, HF, , T1017, HC, null, 20180427, D8, 472, 1, UN))","List(, , , F1020, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(1, D8, 1:2, 11.64, HF, , T1017, HC, null, 20180427, D8, 472, 1, UN)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(List(1, D8, 1, 234, , , 90853, HC, null, 20180427, D8, 472, 120, UN))","List(, , , F251, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER SUSAN , 1112223346, PE, , , 251S00000X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881031, D8, F, , , PATIENT SUSAN E, , WA, 765 43RD ST, 765123, 18, 986640002)","List(1, D8, 1, 234, , , 90853, HC, null, 20180427, D8, 472, 120, UN)"
697773230,180508,ZZ123456789,01987654321,501,833,212950697,20180508,123456789,CLEARINGHOUSE,005010X222A1,833,222,222,"List(List(1, D8, 1:2, 20, , , H0003, HC, null, 20180427, D8, 472, 1, UN))","List(, , , F1020, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/CHPW_Claimdata.txt,"List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(2, CHPWA, PI, COMMUNITY HEALTH PLAN OF WASHINGTON)","List(null, List(VANCOUVER, 720000000, EI, Organization, BH CLINIC OF VANCOUVER, 1122334455, , WA, 12345 MAIN ST, , 98662), null, null, null, List(, , , Individual, , , , , , , ), List(, , , Organization, BH CLINIC OF VANCOUVER, 1122334455, , , , , ), List(, , , Individual, PROVIDER JAMES , 1112223338, PE, , , 261QR0405X, ))","List(123456789, Organization)","List(CLEARINGHOUSE LLC, CLEARINGHOUSE CLIENT SERVICES, List(List(TE, 8005551212), List(FX, 8005551212)), Organization)","List(VANCOUVER, 19881225, D8, M, , , SUBSCRIBER JOHN J, , WA, 987 65TH PL, 987321, 18, 986640001)","List(1, D8, 1:2, 20, , , H0003, HC, null, 20180427, D8, 472, 1, UN)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(List(1, RD8, 1, 1800, QZ, , 01710, HC, null, 20180123-20180123, RD8, 472, 88, MJ), List(1, RD8, 1, 984, AA,P3, , 00731, HC, null, 20180713-20180713, RD8, 472, 24, MJ), List(1, RD8, 1, 1353, AA, , 00840, HC, null, 20180713-20180713, RD8, 472, 62, MJ), List(1, RD8, 1, 1230, AA, , 00902, HC, null, 20180713-20180713, RD8, 472, 65, MJ), List(2, RD8, 2, 738, 59, , 62322, HC, null, 20180713-20180713, RD8, 472, 1, UN))","List(, , , G5621, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 123456789, EI, Organization, YYYY HEALTHCARE ABC, 1222222220, , NY, 123 ADDRESS1, , 908021112), null, null, null, List(, , , Individual, ABCD STUVW , 1234567890, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334460, , , , , ), List(, , , Individual, TUVWX MNOPQR , 1234567891, PE, , , 367500000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(1, RD8, 1, 1800, QZ, , 01710, HC, null, 20180123-20180123, RD8, 472, 88, MJ)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(List(1, RD8, 1, 1800, QZ, , 01710, HC, null, 20180123-20180123, RD8, 472, 88, MJ), List(1, RD8, 1, 984, AA,P3, , 00731, HC, null, 20180713-20180713, RD8, 472, 24, MJ), List(1, RD8, 1, 1353, AA, , 00840, HC, null, 20180713-20180713, RD8, 472, 62, MJ), List(1, RD8, 1, 1230, AA, , 00902, HC, null, 20180713-20180713, RD8, 472, 65, MJ), List(2, RD8, 2, 738, 59, , 62322, HC, null, 20180713-20180713, RD8, 472, 1, UN))","List(, , , G5621, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 123456789, EI, Organization, YYYY HEALTHCARE ABC, 1222222220, , NY, 123 ADDRESS1, , 908021112), null, null, null, List(, , , Individual, ABCD STUVW , 1234567890, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334460, , , , , ), List(, , , Individual, TUVWX MNOPQR , 1234567891, PE, , , 367500000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(1, RD8, 1, 984, AA,P3, , 00731, HC, null, 20180713-20180713, RD8, 472, 24, MJ)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(List(1, RD8, 1, 1800, QZ, , 01710, HC, null, 20180123-20180123, RD8, 472, 88, MJ), List(1, RD8, 1, 984, AA,P3, , 00731, HC, null, 20180713-20180713, RD8, 472, 24, MJ), List(1, RD8, 1, 1353, AA, , 00840, HC, null, 20180713-20180713, RD8, 472, 62, MJ), List(1, RD8, 1, 1230, AA, , 00902, HC, null, 20180713-20180713, RD8, 472, 65, MJ), List(2, RD8, 2, 738, 59, , 62322, HC, null, 20180713-20180713, RD8, 472, 1, UN))","List(, , , G5621, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 123456789, EI, Organization, YYYY HEALTHCARE ABC, 1222222220, , NY, 123 ADDRESS1, , 908021112), null, null, null, List(, , , Individual, ABCD STUVW , 1234567890, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334460, , , , , ), List(, , , Individual, TUVWX MNOPQR , 1234567891, PE, , , 367500000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(1, RD8, 1, 1353, AA, , 00840, HC, null, 20180713-20180713, RD8, 472, 62, MJ)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(List(1, RD8, 1, 1800, QZ, , 01710, HC, null, 20180123-20180123, RD8, 472, 88, MJ), List(1, RD8, 1, 984, AA,P3, , 00731, HC, null, 20180713-20180713, RD8, 472, 24, MJ), List(1, RD8, 1, 1353, AA, , 00840, HC, null, 20180713-20180713, RD8, 472, 62, MJ), List(1, RD8, 1, 1230, AA, , 00902, HC, null, 20180713-20180713, RD8, 472, 65, MJ), List(2, RD8, 2, 738, 59, , 62322, HC, null, 20180713-20180713, RD8, 472, 1, UN))","List(, , , G5621, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 123456789, EI, Organization, YYYY HEALTHCARE ABC, 1222222220, , NY, 123 ADDRESS1, , 908021112), null, null, null, List(, , , Individual, ABCD STUVW , 1234567890, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334460, , , , , ), List(, , , Individual, TUVWX MNOPQR , 1234567891, PE, , , 367500000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(1, RD8, 1, 1230, AA, , 00902, HC, null, 20180713-20180713, RD8, 472, 65, MJ)"
1508,180807,ZZ421406317,30133052274,501,1202,1770,20180710,OO000011111,000000005D,005010X222A1,214339,222,222,"List(List(1, RD8, 1, 1800, QZ, , 01710, HC, null, 20180123-20180123, RD8, 472, 88, MJ), List(1, RD8, 1, 984, AA,P3, , 00731, HC, null, 20180713-20180713, RD8, 472, 24, MJ), List(1, RD8, 1, 1353, AA, , 00840, HC, null, 20180713-20180713, RD8, 472, 62, MJ), List(1, RD8, 1, 1230, AA, , 00902, HC, null, 20180713-20180713, RD8, 472, 65, MJ), List(2, RD8, 2, 738, 59, , 62322, HC, null, 20180713-20180713, RD8, 472, 1, UN))","List(, , , G5621, )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/837/Molina_Mock_UP_837P_File.txt,"List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, ABCMMPIO, PI, ABCHYI OL POI)","List(null, List(FAKE CITY, 123456789, EI, Organization, YYYY HEALTHCARE ABC, 1222222220, , NY, 123 ADDRESS1, , 908021112), null, null, null, List(, , , Individual, ABCD STUVW , 1234567890, , , , , ), List(, , , Organization, ABCDEFG HIJKLMN HOSP, 1122334460, , , , , ), List(, , , Individual, TUVWX MNOPQR , 1234567891, PE, , , 367500000X, ))","List(ABCHYI OL POI, Organization)","List(ABCDEF, ABCDEF CUSTOMER SOLUTIONS, List(List(TE, 9999999999), List(, )), Organization)","List(FAKE CITY, 19650101, D8, M, , , ABCDEFGH IJKLMNOP B, , CA, 123 ADDRESS3, 111111100, 18, 908021112)","List(2, RD8, 2, 738, 59, , 62322, HC, null, 20180713-20180713, RD8, 472, 1, UN)"


#### Build analytical tables downstream

In [0]:
from pyspark.sql.functions import col, from_json, to_date, split, lit, sha2, coalesce, concat_ws, expr, explode_outer, when, concat, to_timestamp,min as _min, sum as _sum, expr
from pyspark.sql.types import StructType, StringType, DoubleType

# Load raw header
df = spark.table("raven_catalog.x12_edi_parser.claim_header")

#### Patients

In [0]:
# -------- Patient Schema --------
patient_schema = StructType() \
    .add("name", StringType()) \
    .add("dob", StringType()) \
    .add("gender_cd", StringType()) \
    .add("zip", StringType()) \
    .add("state", StringType()) \
    .add("city", StringType()) \
    .add("street", StringType()) \
    .add("subsciber_identifier", StringType())

# -------- Patients Table --------
# -------- PATIENTS --------
df_patients = df.select("patient.*") \
    .withColumn("Id", col("subsciber_identifier")) \
    .withColumn("BIRTHDATE", to_date(col("dob"), "yyyyMMdd")) \
    .withColumn("GENDER", col("gender_cd")) \
    .withColumn("FIRST", split(col("name"), " ").getItem(1)) \
    .withColumn("LAST", split(col("name"), " ").getItem(0)) \
    .withColumnRenamed("city", "CITY") \
    .withColumnRenamed("state", "STATE") \
    .withColumnRenamed("zip", "ZIP") \
    .withColumnRenamed("street", "ADDRESS") \
    .withColumn("DEATHDATE", lit(None).cast("date")) \
    .withColumn("SSN", lit(None).cast("string")) \
    .withColumn("DRIVERS", lit(None).cast("string")) \
    .withColumn("PASSPORT", lit(None).cast("string")) \
    .withColumn("PREFIX", split(col("name"), " ").getItem(2)) \
    .withColumn("MIDDLE", lit(None).cast("string")) \
    .withColumn("SUFFIX", lit(None).cast("string")) \
    .withColumn("MAIDEN", lit(None).cast("string")) \
    .withColumn("MARITAL", lit(None).cast("string")) \
    .withColumn("RACE", lit("white")) \
    .withColumn("ETHNICITY", lit("non-hispanic")) \
    .withColumn("BIRTHPLACE", lit("USA")) \
    .withColumn("COUNTY", lit(None).cast("string")) \
    .withColumn("FIPS", lit(0)) \
    .withColumn("ZIP", col("ZIP").cast("int")) \
    .withColumn("LAT", lit(0.0)) \
    .withColumn("LON", lit(0.0)) \
    .withColumn("HEALTHCARE_EXPENSES", lit(1000.0)) \
    .withColumn("HEALTHCARE_COVERAGE", lit(800.0)) \
    .withColumn("INCOME", lit(50000)) \
    .withColumn("_rescued_data", lit(None).cast("string")) \
    .selectExpr([
        "Id", "BIRTHDATE", "DEATHDATE", "SSN", "DRIVERS", "PASSPORT",
        "PREFIX", "FIRST", "MIDDLE", "LAST", "SUFFIX", "MAIDEN", "MARITAL",
        "RACE", "ETHNICITY", "GENDER", "BIRTHPLACE", "ADDRESS", "CITY",
        "STATE", "COUNTY", "FIPS", "ZIP", "LAT", "LON",
        "HEALTHCARE_EXPENSES", "HEALTHCARE_COVERAGE", "INCOME", "_rescued_data"
    ])

df_patients.write.mode("overwrite").option("mergeSchema", "true").saveAsTable("raven_catalog.x12_edi_parser.patients")

In [0]:
df_patients = spark.read.table("raven_catalog.x12_edi_parser.patients")
display(df_patients)

Id,BIRTHDATE,DEATHDATE,SSN,DRIVERS,PASSPORT,PREFIX,FIRST,MIDDLE,LAST,SUFFIX,MAIDEN,MARITAL,RACE,ETHNICITY,GENDER,BIRTHPLACE,ADDRESS,CITY,STATE,COUNTY,FIPS,ZIP,LAT,LON,HEALTHCARE_EXPENSES,HEALTHCARE_COVERAGE,INCOME,_rescued_data
987321,1988-12-25,,,,,J,JOHN,,SUBSCRIBER,,,,white,non-hispanic,M,USA,987 65TH PL,VANCOUVER,WA,,0,986640001,0.0,0.0,1000.0,800.0,50000,
765123,1988-10-31,,,,,E,SUSAN,,PATIENT,,,,white,non-hispanic,F,USA,765 43RD ST,VANCOUVER,WA,,0,986640002,0.0,0.0,1000.0,800.0,50000,
987321,1988-12-25,,,,,J,JOHN,,SUBSCRIBER,,,,white,non-hispanic,M,USA,987 65TH PL,VANCOUVER,WA,,0,986640001,0.0,0.0,1000.0,800.0,50000,
765123,1988-10-31,,,,,E,SUSAN,,PATIENT,,,,white,non-hispanic,F,USA,765 43RD ST,VANCOUVER,WA,,0,986640002,0.0,0.0,1000.0,800.0,50000,
987321,1988-12-25,,,,,J,JOHN,,SUBSCRIBER,,,,white,non-hispanic,M,USA,987 65TH PL,VANCOUVER,WA,,0,986640001,0.0,0.0,1000.0,800.0,50000,
111111100,1965-01-01,,,,,B,IJKLMNOP,,ABCDEFGH,,,,white,non-hispanic,M,USA,123 ADDRESS3,FAKE CITY,CA,,0,908021112,0.0,0.0,1000.0,800.0,50000,
11111117,1976-01-01,,,,,B,IJKLMNOP,,ABCDEFGH,,,,white,non-hispanic,F,USA,123 ADDRESS7,FAKE CITY,CA,,0,908021117,0.0,0.0,1000.0,800.0,50000,
111111111,2018-04-09,,,,,B,IJKLMNOP,,ABCDEFGH,,,,white,non-hispanic,M,USA,123 ADDRESS11,FAKE CITY,CA,,0,908021111,0.0,0.0,1000.0,800.0,50000,
111111112,2018-05-04,,,,,B,IJKLMNOP,,ABCDEFGH,,,,white,non-hispanic,F,USA,123 ADDRESS13,FAKE CITY,CA,,0,908021113,0.0,0.0,1000.0,800.0,50000,
U0000000001,1996-02-15,,,,,,MEMBER,,TEST,,,,white,non-hispanic,F,USA,123 MAIN STREET,Elgin,IL,,0,601200001,0.0,0.0,1000.0,800.0,50000,


#### Providers

In [0]:

# -------- Provider Schema --------
provider_schema = StructType() \
    .add("billing", StructType()
        .add("name", StringType())
        .add("npi", StringType())
        .add("zip", StringType())
        .add("state", StringType())
        .add("city", StringType())
        .add("street", StringType())
        .add("taxonomy", StringType())) \
        .add("servicing", StructType()
        .add("name", StringType()))


df_providers = df.select(
    col("providers.billing.npi").alias("Id"),
    col("providers.billing.name").alias("ORGANIZATION"),
    col("providers.billing.street").alias("ADDRESS"),
    col("providers.billing.city").alias("CITY"),
    col("providers.billing.state").alias("STATE"),
    col("providers.billing.zip").cast("int").alias("ZIP"),
    col("providers.billing.taxonomy").alias("SPECIALITY"),
    coalesce(  # Use servicing.name or fallback to billing.name
        col("providers.servicing.name"),
        col("providers.billing.name"),
        concat_ws("", lit("Provider_"), sha2(col("providers.billing.npi"), 256).substr(0, 6))
    ).alias("NAME")
) \
.withColumn("GENDER", lit(None).cast("string")) \
.withColumn("LAT", lit(0.0)) \
.withColumn("LON", lit(0.0)) \
.withColumn("ENCOUNTERS", lit(0)) \
.withColumn("PROCEDURES", lit(0)) \
.withColumn("_rescued_data", lit(None).cast("string")) \
.selectExpr([
    "Id", "ORGANIZATION", "NAME", "GENDER", "SPECIALITY", "ADDRESS",
    "CITY", "STATE", "ZIP", "LAT", "LON", "ENCOUNTERS", "PROCEDURES", "_rescued_data"
])

# Save output
df_providers.write.mode("overwrite").option("mergeSchema", "true").saveAsTable("raven_catalog.x12_edi_parser.providers")

In [0]:
df_providers = spark.read.table("raven_catalog.x12_edi_parser.providers")
display(df_providers.limit(20))

Id,ORGANIZATION,NAME,GENDER,SPECIALITY,ADDRESS,CITY,STATE,ZIP,LAT,LON,ENCOUNTERS,PROCEDURES,_rescued_data
1122334455,BH CLINIC OF VANCOUVER,PROVIDER JAMES,,,12345 MAIN ST,VANCOUVER,WA,98662,0.0,0.0,0,0,
1122334455,BH CLINIC OF VANCOUVER,PROVIDER SUSAN,,,12345 MAIN ST,VANCOUVER,WA,98662,0.0,0.0,0,0,
1122334455,BH CLINIC OF VANCOUVER,PROVIDER SUSAN,,,12345 MAIN ST,VANCOUVER,WA,98662,0.0,0.0,0,0,
1122334455,BH CLINIC OF VANCOUVER,PROVIDER SUSAN,,,12345 MAIN ST,VANCOUVER,WA,98662,0.0,0.0,0,0,
1122334455,BH CLINIC OF VANCOUVER,PROVIDER JAMES,,,12345 MAIN ST,VANCOUVER,WA,98662,0.0,0.0,0,0,
1222222220,YYYY HEALTHCARE ABC,TUVWX MNOPQR,,,123 ADDRESS1,FAKE CITY,NY,908021112,0.0,0.0,0,0,
1222222223,ABCDE EFGHIJ GROUP PC,STUVW KLMNOP H,,,123 ADDRESS5,FAKE CITY,CA,908021115,0.0,0.0,0,0,
1477527786,ABCDE EFGHIJ GROUP PC,STUVWX KLMNOPQ H,,,123 ADDRESS9,FAKE CITY,CA,908021119,0.0,0.0,0,0,
1477527786,ABCDE EFGHIJ GROUP PC,STUVWXY KLMNOPQR H,,,123 ADDRESS9,FAKE CITY,CA,908021119,0.0,0.0,0,0,
9999999999,Test Provider,Test Provider,,,123 MAIN STREET,WOODSTOCK,IL,600980000,0.0,0.0,0,0,


#### use both header and lines for Conditions and Encounters tables

In [0]:
from pyspark.sql.functions import sum as _sum, min as _min

# Load both tables
df_header = spark.table("raven_catalog.x12_edi_parser.claim_header")
df_line = spark.table("raven_catalog.x12_edi_parser.claim_line")

# Join line-level to header-level using inferred claim_id
df_joined = df_line.join(
    df_header.select(
        col("claim_header.claim_id").alias("claim_id"),
        col("diagnosis.principal_dx_cd").alias("dx_code"),
        col("payer.payer_identifier").alias("payer_id"),
        col("receiver.name").alias("organization"),
        col("providers.billing.npi").alias("provider_npi"),
        col("patient.subsciber_identifier").alias("patient_id"),
        col("claim_header.facility_type_code").alias("facility_type_code")
    ),
    on=[
        col("patient.subsciber_identifier") == col("patient_id")
    ],
    how="inner"
).withColumn("service_date", to_date(col("claim_line.service_date"), "yyyyMMdd")) \
 .withColumn("line_charge", col("claim_line.line_chrg_amt").cast(DoubleType()))

#### Conditions

In [0]:

# ========== CONDITIONS ==========
df_conditions = df_joined.select(
    col("service_date").alias("START"),
    col("service_date").alias("STOP"),
    col("patient_id").alias("PATIENT"),
    col("claim_id").alias("ENCOUNTER"),
    col("dx_code").cast("bigint").alias("CODE"),
    concat(lit("Diagnosis "), col("dx_code")).alias("DESCRIPTION"),
    lit("ICD-10-CM").alias("SYSTEM"),
    lit(None).cast("string").alias("_rescued_data")
)

df_conditions.write.mode("overwrite").option("mergeSchema", "true").saveAsTable("raven_catalog.x12_edi_parser.conditions")

In [0]:
df_conditions = spark.read.table("raven_catalog.x12_edi_parser.conditions")
display(df_conditions.limit(20))

START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION,SYSTEM,_rescued_data
2018-04-27,2018-04-27,987321,1805080AV3648339,,Diagnosis F1120,ICD-10-CM,
2018-04-27,2018-04-27,987321,1805080AV3648339,,Diagnosis F1120,ICD-10-CM,
2018-04-28,2018-04-28,987321,1805080AV3648339,,Diagnosis F1120,ICD-10-CM,
2018-04-27,2018-04-27,765123,1805080AV3648347,,Diagnosis F1520,ICD-10-CM,
2018-04-26,2018-04-26,765123,1805080AV3648347,,Diagnosis F1520,ICD-10-CM,
2018-04-27,2018-04-27,987321,1805080AV3648340,,Diagnosis F1020,ICD-10-CM,
2018-04-27,2018-04-27,987321,1805080AV3648340,,Diagnosis F1020,ICD-10-CM,
2018-04-28,2018-04-28,987321,1805080AV3648340,,Diagnosis F1020,ICD-10-CM,
2018-04-27,2018-04-27,765123,1805080AV3648353,,Diagnosis F251,ICD-10-CM,
2018-04-26,2018-04-26,765123,1805080AV3648353,,Diagnosis F251,ICD-10-CM,


#### Encounters

In [0]:
# ========== ENCOUNTERS ==========
df_encounters = df_joined.groupBy("claim_id", "patient_id", "provider_npi", "payer_id", "organization", "facility_type_code", "dx_code") \
    .agg(
        _min("service_date").alias("START"),
        _sum("line_charge").alias("TOTAL_CLAIM_COST")
    ) \
    .withColumn("STOP", lit(None).cast("timestamp")) \
    .withColumn("Id", col("claim_id")) \
    .withColumn("ENCOUNTERCLASS", expr("""
        CASE
          WHEN facility_type_code LIKE '11%' THEN 'ambulatory'
          WHEN facility_type_code LIKE '21%' THEN 'inpatient'
          WHEN facility_type_code LIKE '22%' THEN 'outpatient'
          ELSE 'other'
        END
    """)) \
    .withColumn("CODE", col("dx_code").cast("bigint")) \
    .withColumn("DESCRIPTION", concat(lit("Encounter for "), col("dx_code"))) \
    .withColumn("BASE_ENCOUNTER_COST", col("TOTAL_CLAIM_COST") / 2) \
    .withColumn("PAYER_COVERAGE", col("TOTAL_CLAIM_COST") * 0.8) \
    .withColumn("REASONCODE", col("dx_code").cast("bigint")) \
    .withColumn("REASONDESCRIPTION", concat(lit("Reason: "), col("dx_code"))) \
    .withColumn("_rescued_data", lit(None).cast("string")) \
    .selectExpr([
        "Id", "START", "STOP", "patient_id AS PATIENT", "organization AS ORGANIZATION",
        "provider_npi AS PROVIDER", "payer_id AS PAYER", "ENCOUNTERCLASS",
        "CODE", "DESCRIPTION", "BASE_ENCOUNTER_COST", "TOTAL_CLAIM_COST",
        "PAYER_COVERAGE", "REASONCODE", "REASONDESCRIPTION", "_rescued_data"
    ])

df_encounters.write.mode("overwrite").saveAsTable("raven_catalog.x12_edi_parser.encounters")


In [0]:
df_encounters = spark.read.table("raven_catalog.x12_edi_parser.encounters")
display(df_encounters.limit(20))

Id,START,STOP,PATIENT,ORGANIZATION,PROVIDER,PAYER,ENCOUNTERCLASS,CODE,DESCRIPTION,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE,REASONCODE,REASONDESCRIPTION,_rescued_data
1805080AV3648355,2018-04-27,,987321,123456789,1122334455,CHPWA,other,,Encounter for F1020,25.82,51.64,41.312000000000005,,Reason: F1020,
ABC11111,,,111111100,ABCHYI OL POI,1222222220,ABCMMPIO,outpatient,,Encounter for G5621,3052.5,6105.0,4884.0,,Reason: G5621,
1805080AV3648340,2018-04-27,,987321,123456789,1122334455,CHPWA,other,,Encounter for F1020,25.82,51.64,41.312000000000005,,Reason: F1020,
ABC111114,,,111111112,ABCHYI OL POI,1477527786,YTHF281123456,outpatient,,Encounter for Q423,984.0,1968.0,1574.4,,Reason: Q423,
1805080AV3648339,2018-04-27,,987321,123456789,1122334455,CHPWA,other,,Encounter for F1120,25.82,51.64,41.312000000000005,,Reason: F1120,
1000A,2015-11-24,,1234567890,RECEIVER,1122334455,12345,other,,Encounter for I10,140.0,280.0,224.0,,Reason: I10,
1001A,2015-11-24,,9876543201,RECEIVER,1122334455,12345,other,,Encounter for I10,70.0,140.0,112.0,,Reason: I10,
1805080AV3648353,2018-04-26,,765123,123456789,1122334455,CHPWA,other,,Encounter for F251,142.05,284.1,227.28000000000003,,Reason: F251,
ABC111113,,,111111111,ABCHYI OL POI,1477527786,YTHF281123456,outpatient,,Encounter for K4090,1660.5,3321.0,2656.8,,Reason: K4090,
TD-R192ICE00087,,,U0000000001,CENTENE CORP,9999999999,421406317,ambulatory,,Encounter for F39,8883.35,17766.7,14213.36,,Reason: F39,


#### Claims

In [0]:

# Parse date and charges in claim_line
df_line_clean = df_line.withColumn("service_date", to_date(col("claim_line.service_date"), "yyyyMMdd")) \
                       .withColumn("line_charge", col("claim_line.line_chrg_amt").cast(DoubleType()))

# Flatten header fields
df_header_flat = df_header.select(
    col("claim_header.claim_id").alias("claim_id"),
    col("diagnosis.principal_dx_cd").alias("dx_code"),
    col("payer.payer_identifier").alias("payer_id"),
    col("receiver.name").alias("organization"),
    col("providers.billing.npi").alias("provider_id"),
    col("patient.subsciber_identifier").alias("patient_id"),
    col("claim_header.facility_type_code").alias("facility_type_code")
)

# Join header and line on patient and filename (simulate claim-level grouping)
df_joined_claim = df_line_clean.join(
    df_header_flat,
    on=[df_line_clean["patient.subsciber_identifier"] == df_header_flat["patient_id"]],
    how="inner"
)

# Group by header-level fields and aggregate to form claims
df_claims = df_joined_claim.groupBy(
    "claim_id", "patient_id", "provider_id", "payer_id", "facility_type_code", "dx_code",
).agg(
    _min("service_date").alias("SERVICEDATE"),
    _sum("line_charge").alias("OUTSTANDING1")
).withColumn("Id", col("claim_id")) \
 .withColumn("PATIENTID", col("patient_id")) \
 .withColumn("PROVIDERID", col("provider_id")) \
 .withColumn("PRIMARYPATIENTINSURANCEID", col("payer_id")) \
 .withColumn("SECONDARYPATIENTINSURANCEID", lit(None).cast("string")) \
 .withColumn("DEPARTMENTID", lit(1)) \
 .withColumn("PATIENTDEPARTMENTID", lit(1)) \
 .withColumn("DIAGNOSIS1", col("dx_code").cast("bigint")) \
 .withColumn("DIAGNOSIS2", lit(None).cast("bigint")) \
 .withColumn("DIAGNOSIS3", lit(None).cast("bigint")) \
 .withColumn("DIAGNOSIS4", lit(None).cast("bigint")) \
 .withColumn("DIAGNOSIS5", lit(None).cast("bigint")) \
 .withColumn("DIAGNOSIS6", lit(None).cast("bigint")) \
 .withColumn("DIAGNOSIS7", lit(None).cast("bigint")) \
 .withColumn("DIAGNOSIS8", lit(None).cast("int")) \
 .withColumn("REFERRINGPROVIDERID", lit(None).cast("string")) \
 .withColumn("APPOINTMENTID", lit(None).cast("string")) \
 .withColumn("CURRENTILLNESSDATE", col("SERVICEDATE").cast("timestamp")) \
 .withColumn("SUPERVISINGPROVIDERID", lit(None).cast("string")) \
 .withColumn("STATUS1", lit("Pending")) \
 .withColumn("STATUS2", lit("Pending")) \
 .withColumn("STATUSP", lit("Pending")) \
 .withColumn("OUTSTANDING2", lit(None).cast("double")) \
 .withColumn("OUTSTANDINGP", lit(None).cast("double")) \
 .withColumn("LASTBILLEDDATE1", col("SERVICEDATE").cast("timestamp")) \
 .withColumn("LASTBILLEDDATE2", col("SERVICEDATE").cast("timestamp")) \
 .withColumn("LASTBILLEDDATEP", col("SERVICEDATE").cast("timestamp")) \
 .withColumn("HEALTHCARECLAIMTYPEID1", expr("""
     CASE
         WHEN facility_type_code LIKE '21%' THEN 21
         WHEN facility_type_code LIKE '11%' THEN 11
         ELSE 99
     END
 """)) \
 .withColumn("HEALTHCARECLAIMTYPEID2", lit(None).cast("int")) \
 .withColumn("_rescued_data", lit(None).cast("string"))

# Save final claims table
df_claims.write.mode("overwrite").option("mergeSchema", "true").saveAsTable("raven_catalog.x12_edi_parser.claims")


In [0]:
claims_df = spark.read.table("raven_catalog.x12_edi_parser.claims")
display(claims_df.limit(20))

claim_id,patient_id,provider_id,payer_id,facility_type_code,dx_code,SERVICEDATE,OUTSTANDING1,Id,PATIENTID,PROVIDERID,PRIMARYPATIENTINSURANCEID,SECONDARYPATIENTINSURANCEID,DEPARTMENTID,PATIENTDEPARTMENTID,DIAGNOSIS1,DIAGNOSIS2,DIAGNOSIS3,DIAGNOSIS4,DIAGNOSIS5,DIAGNOSIS6,DIAGNOSIS7,DIAGNOSIS8,REFERRINGPROVIDERID,APPOINTMENTID,CURRENTILLNESSDATE,SUPERVISINGPROVIDERID,STATUS1,STATUS2,STATUSP,OUTSTANDING2,OUTSTANDINGP,LASTBILLEDDATE1,LASTBILLEDDATE2,LASTBILLEDDATEP,HEALTHCARECLAIMTYPEID1,HEALTHCARECLAIMTYPEID2,_rescued_data
1805080AV3648340,987321,1122334455,CHPWA,57:B:1,F1020,2018-04-27,51.64,1805080AV3648340,987321,1122334455,CHPWA,,1,1,,,,,,,,,,,2018-04-27T00:00:00Z,,Pending,Pending,Pending,,,2018-04-27T00:00:00Z,2018-04-27T00:00:00Z,2018-04-27T00:00:00Z,99,,
ABC11111,111111100,1222222220,ABCMMPIO,22:B:1,G5621,,6105.0,ABC11111,111111100,1222222220,ABCMMPIO,,1,1,,,,,,,,,,,,,Pending,Pending,Pending,,,,,,99,,
ABC111114,111111112,1477527786,YTHF281123456,22:B:1,Q423,,1968.0,ABC111114,111111112,1477527786,YTHF281123456,,1,1,,,,,,,,,,,,,Pending,Pending,Pending,,,,,,99,,
ABC111112,11111117,1222222223,ABCMMPIO,22:B:1,K219,,4305.0,ABC111112,11111117,1222222223,ABCMMPIO,,1,1,,,,,,,,,,,,,Pending,Pending,Pending,,,,,,99,,
TD-R192ICE00087,U0000000001,9999999999,421406317,11:A:1,F39,,17766.7,TD-R192ICE00087,U0000000001,9999999999,421406317,,1,1,,,,,,,,,,,,,Pending,Pending,Pending,,,,,,11,,
1805080AV3648347,765123,1122334455,CHPWA,57:B:1,F1520,2018-04-26,284.1,1805080AV3648347,765123,1122334455,CHPWA,,1,1,,,,,,,,,,,2018-04-26T00:00:00Z,,Pending,Pending,Pending,,,2018-04-26T00:00:00Z,2018-04-26T00:00:00Z,2018-04-26T00:00:00Z,99,,
1805080AV3648353,765123,1122334455,CHPWA,53:B:1,F251,2018-04-26,284.1,1805080AV3648353,765123,1122334455,CHPWA,,1,1,,,,,,,,,,,2018-04-26T00:00:00Z,,Pending,Pending,Pending,,,2018-04-26T00:00:00Z,2018-04-26T00:00:00Z,2018-04-26T00:00:00Z,99,,
1000A,1234567890,1122334455,12345,19:B:1,I10,2015-11-24,280.0,1000A,1234567890,1122334455,12345,,1,1,,,,,,,,,,,2015-11-24T00:00:00Z,,Pending,Pending,Pending,,,2015-11-24T00:00:00Z,2015-11-24T00:00:00Z,2015-11-24T00:00:00Z,99,,
1805080AV3648355,987321,1122334455,CHPWA,57:B:1,F1020,2018-04-27,51.64,1805080AV3648355,987321,1122334455,CHPWA,,1,1,,,,,,,,,,,2018-04-27T00:00:00Z,,Pending,Pending,Pending,,,2018-04-27T00:00:00Z,2018-04-27T00:00:00Z,2018-04-27T00:00:00Z,99,,
ABC111113,111111111,1477527786,YTHF281123456,22:B:1,K4090,,3321.0,ABC111113,111111111,1477527786,YTHF281123456,,1,1,,,,,,,,,,,,,Pending,Pending,Pending,,,,,,99,,


#### Procedures table

In [0]:
# Prep line-level fields
df_line_clean = df_line.withColumn("START", to_date(col("claim_line.service_date"), "yyyyMMdd")) \
                       .withColumn("STOP", to_date(col("claim_line.service_date"), "yyyyMMdd")) \
                       .withColumn("CODE", col("claim_line.prcdr_cd").cast("bigint")) \
                       .withColumn("BASE_COST", col("claim_line.line_chrg_amt").cast(DoubleType())) \

# Join to get patient, diagnosis
df_proc_joined = df_line_clean.join(
    df_header.select(
        col("diagnosis.principal_dx_cd").alias("REASONCODE"),
        col("patient.subsciber_identifier").alias("PATIENT_ID"),
        col("claim_header.claim_id").alias("ENCOUNTER")
    ),
    df_line_clean["patient.subsciber_identifier"] == col("PATIENT_ID"),
    how="inner"
)

# Final field mappings
df_procedures = df_proc_joined.select(
    col("START"),
    col("STOP"),
    col("PATIENT_ID").alias("PATIENT"),
    col("ENCOUNTER"),
    col("CODE"),
    concat(lit("Procedure "), col("CODE")).alias("DESCRIPTION"),
    col("BASE_COST"),
    col("REASONCODE").cast("bigint"),
    concat(lit("Diagnosis "), col("REASONCODE")).alias("REASONDESCRIPTION"),
    lit("HCPCS").alias("SYSTEM"),  # or use "ICD-10-PCS" if appropriate
    lit(None).cast("string").alias("_rescued_data")
)

df_procedures.write.mode("overwrite").option("mergeSchema", "true").saveAsTable("raven_catalog.x12_edi_parser.procedures")


In [0]:
df_procedures = spark.read.table("raven_catalog.x12_edi_parser.procedures")
display(df_procedures.limit(20))

START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION,BASE_COST,REASONCODE,REASONDESCRIPTION,SYSTEM,_rescued_data
2018-04-27,2018-04-27,987321,1805080AV3648339,,,20.0,,Diagnosis F1120,HCPCS,
2018-04-27,2018-04-27,987321,1805080AV3648339,,,11.64,,Diagnosis F1120,HCPCS,
2018-04-28,2018-04-28,987321,1805080AV3648339,,,20.0,,Diagnosis F1120,HCPCS,
2018-04-27,2018-04-27,765123,1805080AV3648347,90853.0,Procedure 90853,234.0,,Diagnosis F1520,HCPCS,
2018-04-26,2018-04-26,765123,1805080AV3648347,96153.0,Procedure 96153,50.1,,Diagnosis F1520,HCPCS,
2018-04-27,2018-04-27,987321,1805080AV3648340,,,20.0,,Diagnosis F1020,HCPCS,
2018-04-27,2018-04-27,987321,1805080AV3648340,,,11.64,,Diagnosis F1020,HCPCS,
2018-04-28,2018-04-28,987321,1805080AV3648340,,,20.0,,Diagnosis F1020,HCPCS,
2018-04-27,2018-04-27,765123,1805080AV3648353,90853.0,Procedure 90853,234.0,,Diagnosis F251,HCPCS,
2018-04-26,2018-04-26,765123,1805080AV3648353,96153.0,Procedure 96153,50.1,,Diagnosis F251,HCPCS,


# 835 

In [0]:
from ember import *
from ember.hls.healthcare import HealthcareManager as hm
import json, os
from pyspark.sql.functions import input_file_name

df = spark.read.text("file:///" + os.getcwd() + "/../sampledata/835/*txt", wholetext = True)

rdd = (
  df.withColumn("filename", input_file_name()).rdd #convert to rdd
  .map(lambda row: (row.filename, EDI(row.value, strict_transactions=False))) #parse as an EDI format
  .flatMap(lambda edi: hm.flatten(edi[1], filename = edi[0])) #extract out healthcare specific groupings, one row per claim/remittance/enrollment etc
  )

claims_rdd = (
rdd.repartition(4) #Repartition number should be >= # of cores in cluster and <= number of rows in rdd / DataFrame
.map(lambda x: hm.flatten_to_json(x))
.map(lambda x: json.dumps(x))
)

claims = spark.read.json(claims_rdd)

In [0]:
claims.createOrReplaceTempView("stg_remittance")

In [0]:
%sql
--flatten EDI 
drop table if exists raven_catalog.x12_edi_parser.remittance;
CREATE TABLE raven_catalog.x12_edi_parser.remittance 
as 
select *
from stg_remittance 
;

SELECT * FROM raven_catalog.x12_edi_parser.remittance;

EDI.control_number,EDI.date,EDI.recipient_qualifier_id,EDI.sender_qualifier_id,EDI.standard_version,EDI.time,FunctionalGroup.control_number,FunctionalGroup.date,FunctionalGroup.receiver,FunctionalGroup.sender,FunctionalGroup.standard_version,FunctionalGroup.time,FunctionalGroup.transaction_type,Transaction.transaction_type,claim,filename,payee,payer,payment,provider_adjustments
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(72232, MC, , 77777777, List(), 0, 4, List(List(8888888, EA)), List(List(20180314, 232), List(20180317, 233)), , 1, MR, QC, , 77777777777, SMITH, , 6666666666666, List(List(SMITH, QC, TOM, 1, 77777777777, MR), List(PARKER, 74, ALAN, 1, 88888888888, C), List(, PR, PACIFI, 2, 9999, PI), List(CARRY, GB, BARRY, 1, 666666666, MI)), List(List(50016, OA, 147), List(22216, CO, 26)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/sample.txt,"List(6666666666, SUMMER, XX, 111111111)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 1388600002, 100004762, 1, H)",List()
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(3002, MC, , 77777778, List(), 0, 2, List(List(8888888, EA)), List(List(20171001, 232), List(20171002, 233)), , 1, MR, QC, , 55555555555, THOMAS, , 6666666666667, List(List(THOMAS, QC, BOB, 1, 55555555555, MR), List(JACKSON, 74, ALAN, 1, 66666666666, C)), List(List(3002, OA, 176)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/sample.txt,"List(6666666666, VALLEY, XX, 530824679)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 5555555555, 100004765, 1, H)",List()
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(41231.04, MC, , 77777779, List(), 0, 4, List(List(6047740, EA)), List(List(20180220, 232), List(20180221, 233)), , 1, MR, QC, , 77777777778, ALLEN, , 6666666666668, List(List(ALLEN, QC, HELD, 1, 77777777778, MR), List(LARRY, 74, RYAN, 1, 88888888889, C), List(, PR, SENIOR, 2, 8888, PI), List(JANE, GB, MARY, 1, 777777777, MI)), List(List(9365.04, OA, 147), List(31866, CO, 26)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/sample.txt,"List(7777777777, SILVER, XX, 666666666)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 1812.27, 20180727, CHK, 5555555555, 000012382, 1, I)","List(List(-1092.46, 20181231, 888888888, 8888888888, CT), List(-719.81, 20181231, 888888888, 8888888888, CT), List(-181.55, 20181231, 8888888888887, 8888888888, CS), List(181.55, 20181231, 8888888888887, 8888888888, CS), List(-130, 20181231, 8888888888888, 8888888888, CS), List(130, 20181231, 8888888888888, 8888888888, CS))"
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(72232, MC, , 77777777, List(), 0, 4, List(List(8888888, EA)), List(List(20180314, 232), List(20180317, 233)), , 1, MR, QC, , 77777777777, SMITH, , 6666666666666, List(List(SMITH, QC, TOM, 1, 77777777777, MR), List(PARKER, 74, ALAN, 1, 88888888888, C), List(, PR, PACIFI, 2, 9999, PI), List(CARRY, GB, BARRY, 1, 666666666, MI)), List(), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/sample_no_cas.txt,"List(6666666666, SUMMER, XX, 111111111)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 1388600002, 100004762, 1, H)",List()
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(3002, MC, , 77777778, List(), 0, 2, List(List(8888888, EA)), List(List(20171001, 232), List(20171002, 233)), , 1, MR, QC, , 55555555555, THOMAS, , 6666666666667, List(List(THOMAS, QC, BOB, 1, 55555555555, MR), List(JACKSON, 74, ALAN, 1, 66666666666, C)), List(List(3002, OA, 176)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/sample_no_cas.txt,"List(6666666666, VALLEY, XX, 530824679)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 5555555555, 100004765, 1, H)",List()
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(41231.04, MC, , 77777779, List(), 0, 4, List(List(6047740, EA)), List(List(20180220, 232), List(20180221, 233)), , 1, MR, QC, , 77777777778, ALLEN, , 6666666666668, List(List(ALLEN, QC, HELD, 1, 77777777778, MR), List(LARRY, 74, RYAN, 1, 88888888889, C), List(, PR, SENIOR, 2, 8888, PI), List(JANE, GB, MARY, 1, 777777777, MI)), List(List(9365.04, OA, 147), List(31866, CO, 26)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/sample_no_cas.txt,"List(7777777777, SILVER, XX, 666666666)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 1812.27, 20180727, CHK, 5555555555, 000012382, 1, I)","List(List(-1092.46, 20181231, 888888888, 8888888888, CT), List(-719.81, 20181231, 888888888, 8888888888, CT), List(-181.55, 20181231, 8888888888887, 8888888888, CS), List(181.55, 20181231, 8888888888887, 8888888888, CS), List(-130, 20181231, 8888888888888, 8888888888, CS), List(130, 20181231, 8888888888888, 8888888888, CS))"
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(72232, MC, , 77777777, List(), 0, 4, List(List(8888888, EA)), List(List(20180314, 232), List(20180317, 233)), , 1, MR, QC, , 77777777777, SMITH, , 6666666666666, List(List(SMITH, QC, TOM, 1, 77777777777, MR), List(PARKER, 74, ALAN, 1, 88888888888, C), List(, PR, PACIFI, 2, 9999, PI), List(CARRY, GB, BARRY, 1, 666666666, MI)), List(List(50016, OA, 147), List(22216, CO, 26)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/no_plb_sample.txt,"List(6666666666, SUMMER, XX, 111111111)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 1388600002, 100004762, 1, H)",List()
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(3002, MC, , 77777778, List(), 0, 2, List(List(8888888, EA)), List(List(20171001, 232), List(20171002, 233)), , 1, MR, QC, , 55555555555, THOMAS, , 6666666666667, List(List(THOMAS, QC, BOB, 1, 55555555555, MR), List(JACKSON, 74, ALAN, 1, 66666666666, C)), List(List(3002, OA, 176)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/no_plb_sample.txt,"List(6666666666, SUMMER, XX, 111111111)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 1388600002, 100004762, 1, H)",List()
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(41231.04, MC, , 77777779, List(), 0, 4, List(List(6047740, EA)), List(List(20180220, 232), List(20180221, 233)), , 1, MR, QC, , 77777777778, ALLEN, , 6666666666668, List(List(ALLEN, QC, HELD, 1, 77777777778, MR), List(LARRY, 74, RYAN, 1, 88888888889, C), List(, PR, SENIOR, 2, 8888, PI), List(JANE, GB, MARY, 1, 777777777, MI)), List(List(9365.04, OA, 147), List(31866, CO, 26)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/no_plb_sample.txt,"List(6666666666, SUMMER, XX, 111111111)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 1388600002, 100004762, 1, H)",List()
1508,180807,ZZ421406317,30133052274,501,1202,100000300,20180613,99999999,NVMED,005010X221A1,123021,221,221,"List(72232, MC, , 77777777, List(), 0, 4, List(List(8888888, EA)), List(List(20180314, 232), List(20180317, 233)), , 1, MR, QC, , 77777777777, SMITH, , 6666666666666, List(List(SMITH, QC, TOM, 1, 77777777777, MR), List(PARKER, 74, ALAN, 1, 88888888888, C), List(, PR, PACIFI, 2, 9999, PI), List(CARRY, GB, BARRY, 1, 666666666, MI)), List(List(50016, OA, 147), List(22216, CO, 26)), )",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/835/plb_sample.txt,"List(6666666666, SUMMER, XX, 111111111)","List(PR, Carson, List(List(BL, Nevada Medicaid, nvmmis.edisupport@dxc.com, 8776383472)), DIVISON OF HEALTH CARE FINANCING AND POLICY, , , NV, 1100 East William Street Suite 101, 89701)","List(C, 0, 20180615, NON, 1388600002, 100004762, 1, H)","List(List(-14, 20231231, 0202309NN08956B0X.5520NN142, 15483NN082, 72), List(14, 20231231, 0202309NN08956B0X.5520NN142, 15483NN082, WO), List(-14, 20231231, 0202309NN08956B0X.5520NN142, 15483NN082, B2))"


# 834

In [0]:
from ember import *
from ember.hls.healthcare import HealthcareManager as hm
import json, os
from pyspark.sql.functions import input_file_name

df = spark.read.text("file:///" + os.getcwd() + "/../sampledata/834/*txt", wholetext = True)

rdd = (
  df.withColumn("filename", input_file_name()).rdd #convert to rdd
  .map(lambda row: (row.filename, EDI(row.value, strict_transactions=False))) #parse as an EDI format
  .flatMap(lambda edi: hm.flatten(edi[1], filename = edi[0])) #extract out healthcare specific groupings, one row per claim/remittance/enrollment etc
  )

claims_rdd = (
rdd.repartition(4) #Repartition number should be >= # of cores in cluster and <= number of rows in rdd / DataFrame
.map(lambda x: hm.flatten_to_json(x))
.map(lambda x: json.dumps(x))
)

claims = spark.read.json(claims_rdd)

In [0]:
claims.createOrReplaceTempView("stg_enrollment")

In [0]:
%sql
--flatten EDI 
drop table if exists raven_catalog.x12_edi_parser.enrollment;
CREATE TABLE raven_catalog.x12_edi_parser.enrollment 
as 
select *
from stg_enrollment 
;

SELECT * FROM raven_catalog.x12_edi_parser.enrollment;

EDI.control_number,EDI.date,EDI.recipient_qualifier_id,EDI.sender_qualifier_id,EDI.standard_version,EDI.time,FunctionalGroup.control_number,FunctionalGroup.date,FunctionalGroup.receiver,FunctionalGroup.sender,FunctionalGroup.standard_version,FunctionalGroup.time,FunctionalGroup.transaction_type,Transaction.transaction_type,enrollment_member,filename,health_plan
242060001,240724,ZZ8-DIGIT PLAN ID,ZZEMEDNYMCR,501,708,242060001,20240724,ETIN,EMEDNYMCR,005010X220A1,70835,220,220,"List(List(List(Y, 20240724, , 001), List(, ANYTOWN, 12205, NY, 123 ANY STREET), List(9999999999, ), 20020202, SUBSCRIBER B FIRST NAME, F, 299999992, Social Security Number (SSN), SUBSCRIBER B LAST NAME))",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/834/834_test.txt,"List(List(Health, HLT, 20241231, ), List(Dental, DEN, 20241231, ), List(Vision, VIS, 20240401, ))"
12345,50221,30382328142,ZZ386028429,501,602,12345,20050221,382328142,386028429,005010X220A1,602,220,220,"List(List(List(Y, 19960523, 20, 021), List(APT 3G, CAMP HILL, 17011, PA, 100 MARKET ST), List(7172343334, 7172341240), 19400816, JOHN, M, 123456789, Social Security Number (SSN), DOE))",file:/Workspace/Users/raven.mukherjee@databricks.com/x12-edi-parser/sampledata/834/EDI_834.txt,"List(List(Health, HLT, 19960601, ), List(Dental, DEN, 19960601, ), List(Vision, VIS, 19960601, ))"
