In [2]:
import pyspark.sql.functions as F
from credentials import MY_CREDENTIALS
from data_location import DELTA_LOCATION

from spark_bi.constants import ColNames, Extensions
from spark_bi.spark import FutPathlingContext

pc = FutPathlingContext.create(
    app_name="example-spark-app", hadoop_config=MY_CREDENTIALS.to_hadoop_config()
)
delta_lake = pc.read.delta(DELTA_LOCATION)

:: loading settings :: url = jar:file:/Users/mabe/Git/spark-bi/.venv/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.3.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /Users/mabe/.ivy2.5.2/cache
The jars for the packages stored in: /Users/mabe/.ivy2.5.2/jars
au.csiro.pathling#library-runtime added as a dependency
io.delta#delta-spark_2.13 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-7e28e9fb-cc05-41d7-989e-2a96aa462ac1;1.0
	confs: [default]
	found au.csiro.pathling#library-runtime;9.1.0 in local-m2-cache
	found io.delta#delta-spark_2.13;4.0.0 in local-m2-cache
	found io.delta#delta-storage;4.0.0 in local-m2-cache
	found org.antlr#antlr4-runtime;4.13.1 in local-m2-cache
:: resolution report :: resolve 99ms :: artifacts dl 5ms
	:: modules in use:
	au.csiro.pathling#library-runtime;9.1.0 from local-m2-cache in [default]
	io.delta#delta-spark_2.13;4.0.0 from local-m2-cache in [default]
	io.delta#delta-storage;4.0.0 fr

# By anvenderløsning

For at finde anvenderløsning bruger vi coexistence-tags der er beskrevet i: https://ehealth-dk.atlassian.net/wiki/spaces/EDTW/pages/2355986433/Multitenancy

In [3]:
from pyspark.sql.functions import col

qr_with_eoc = delta_lake.view(
    resource="QuestionnaireResponse",
    select=[
        {
            "column": [
                {"name": "qr_id", "path": "getResourceKey()"},
                {
                    "name": ColNames.CREATING_SOLUTION.value,
                    "path": f"meta.tag.where(system='{Extensions.EHEALTH_COLOCATION.value}').code.first()",
                },
                {
                    "name": "qr_eoc_id",
                    "path": f"extension.where('{Extensions.HL7_EPISODE_OF_CARE.value}').valueReference.getReferenceKey().first()",
                },
            ]
        }
    ],
)
qr_with_eoc.filter(col("creating_solution").isNotNull()).head(5)

                                                                                

[Row(qr_id='QuestionnaireResponse/6000033240', creating_solution='xb', qr_eoc_id='EpisodeOfCare/2000470702'),
 Row(qr_id='QuestionnaireResponse/6000033248', creating_solution='xb', qr_eoc_id='EpisodeOfCare/2000471314'),
 Row(qr_id='QuestionnaireResponse/6000033250', creating_solution='xb', qr_eoc_id='EpisodeOfCare/2000471467'),
 Row(qr_id='QuestionnaireResponse/6000034602', creating_solution='xb', qr_eoc_id='EpisodeOfCare/2000471646'),
 Row(qr_id='QuestionnaireResponse/6000034608', creating_solution='xb', qr_eoc_id='EpisodeOfCare/2000472105')]

In [4]:
(qr_with_eoc.groupby("creating_solution").count().toPandas())

Unnamed: 0,creating_solution,count
0,,3105
1,xb,158


Bemærk at antallet er for TRIFORKs testmiljø.

# By besvarelsesstatus

In [26]:
(
    delta_lake.view(
        resource="QuestionnaireResponse",
        select=[
            {
                "column": [
                    {"name": "qr_id", "path": "getResourceKey()"},
                    {"name": "status", "path": "status"},
                ]
            }
        ],
    )
    .groupby("status")
    .count()
    .toPandas()
)

Unnamed: 0,status,count
0,completed,3263


Bemærk at antallet er for TRIFORKs testmiljø.

# By careteam

For at finde, hvilket CareTeam en QuestionnaireResponse tilhører, skal vi lave koblingen:

`QuestionnaireResponse.episodeOfCare <-> EpisodeOfCare.team <-> CareTeam`

In [5]:
eoc_with_teams = delta_lake.view(
    resource="EpisodeOfCare",
    select=[
        {"column": [{"name": "eoc_id", "path": "getResourceKey()"}]},
        {"forEach": "team", "column": [{"name": "team_id", "path": "getReferenceKey()"}]},
    ],
)
eoc_with_teams.head(5)

[Row(eoc_id='EpisodeOfCare/2000000029', team_id='CareTeam/3000108752'),
 Row(eoc_id='EpisodeOfCare/2000000035', team_id='CareTeam/3000108752'),
 Row(eoc_id='EpisodeOfCare/2000000042', team_id='CareTeam/3000108752'),
 Row(eoc_id='EpisodeOfCare/2000000049', team_id='CareTeam/3000108752'),
 Row(eoc_id='EpisodeOfCare/2000000068', team_id='CareTeam/3000108752')]

In [6]:
careteams = delta_lake.view(
    resource="CareTeam", select=[{"column": [{"name": "careteam_id", "path": "getResourceKey()"}]}]
)
careteams.head(5)

[Row(careteam_id='CareTeam/3000000001'),
 Row(careteam_id='CareTeam/3000000002'),
 Row(careteam_id='CareTeam/3000000005'),
 Row(careteam_id='CareTeam/3000000006'),
 Row(careteam_id='CareTeam/3000000007')]

In [7]:
(
    qr_with_eoc.join(eoc_with_teams, qr_with_eoc.qr_eoc_id == eoc_with_teams.eoc_id, "left")
    .join(careteams, eoc_with_teams.team_id == careteams.careteam_id, "left")
    .groupby("team_id")
    .count()
    .toPandas()
)

Unnamed: 0,team_id,count
0,,543
1,CareTeam/3000138554,145
2,CareTeam/3000000002,9
3,CareTeam/3000108752,387
4,CareTeam/3000143203,2179


Bemærk at antallet er for TRIFORKs testmiljø.

# By diagnosis

For at finde hvilken diagnose en questionnaireresponse omhandler, 

`QuestionnaireResponse <-> EpisodeOfCare <-> Condition`

In [8]:
from spark_bi.dfs import compute_patient2condition

eoc_with_condition = delta_lake.view(
    resource="EpisodeOfCare",
    select=[
        {
            "column": [
                {"name": "eoc_id", "path": "getResourceKey()"},
                {"name": "eoc_condition", "path": "diagnosis.condition.getReferenceKey()"},
            ]
        }
    ],
)

conditions_with_code = delta_lake.view(
    resource="Condition",
    select=[
        {
            "column": [
                {"name": "condition_id", "path": "getResourceKey()"},
                {"name": "diagnosis_code", "path": "code.coding.code"},
            ]
        }
    ],
)

(
    qr_with_eoc.join(eoc_with_condition, qr_with_eoc.qr_eoc_id == eoc_with_condition.eoc_id, "left")
    .join(
        conditions_with_code,
        eoc_with_condition.eoc_condition == conditions_with_code.condition_id,
        "left",
    )
    .groupby("diagnosis_code")
    .count()
    .toPandas()
)

Unnamed: 0,diagnosis_code,count
0,,543
1,DJ44,2720


Vær opmærksom på, at disse tal er fra TRIFORKs testmiljø, og derfor ikke repræsentative.

# By kommune

In [9]:
eoc_with_patient = delta_lake.view(
    resource="EpisodeOfCare",
    select=[
        {
            "column": [
                {"name": "eoc_id", "path": "getResourceKey()"},
                {"name": "eoc_pt_id", "path": "patient.getReferenceKey()"},
            ]
        }
    ],
)

eoc_with_patient.head(5)

[Row(eoc_id='EpisodeOfCare/2000000029', eoc_pt_id='Patient/1000264558'),
 Row(eoc_id='EpisodeOfCare/2000000035', eoc_pt_id='Patient/1000264558'),
 Row(eoc_id='EpisodeOfCare/2000000042', eoc_pt_id='Patient/1000264558'),
 Row(eoc_id='EpisodeOfCare/2000000049', eoc_pt_id='Patient/1000264558'),
 Row(eoc_id='EpisodeOfCare/2000000068', eoc_pt_id='Patient/1000264558')]

In [14]:
from spark_bi.dfs import compute_patient2municipality


patients_with_municipality = compute_patient2municipality(delta_lake)

(
    qr_with_eoc.join(eoc_with_patient, qr_with_eoc.qr_eoc_id == eoc_with_patient.eoc_id, "left")
    .join(
        patients_with_municipality,
        eoc_with_patient.eoc_pt_id == patients_with_municipality.patient_id,
        "left",
    )
    .groupby(ColNames.MUNICIPALITY_CODE.value)
    .count()
    .toPandas()
)

Unnamed: 0,municipality_code,count
0,,543
1,410.0,1233
2,185.0,1487


Vær opmærksom på, at disse tal er fra TRIFORKs testmiljø, og derfor ikke repræsentative.

# By organisatorisk enhed

See README.md under "Organistorisk enhed".

# By region

In [15]:
from spark_bi.dfs import compute_patient2region

patients_with_region = compute_patient2region(delta_lake)

(
    qr_with_eoc.join(eoc_with_patient, qr_with_eoc.qr_eoc_id == eoc_with_patient.eoc_id, "left")
    .join(
        patients_with_region, eoc_with_patient.eoc_pt_id == patients_with_region.patient_id, "left"
    )
    .groupby(ColNames.REGION_CODE.value)
    .count()
    .toPandas()
)


Unnamed: 0,region_code,count
0,,543
1,DK-84,1487
2,DK-83,1233


# By spørgeskemaid

In [22]:
(
    delta_lake.view(
        resource="QuestionnaireResponse",
        select=[
            {
                "column": [
                    {"name": "qr_id", "path": "getResourceKey()"},
                    {"name": "questionnaire_id", "path": "questionnaire"},
                ]
            }
        ],
    )
    .groupby("questionnaire_id")
    .count()
    .sort("count", ascending=False)
    .toPandas()
)

Unnamed: 0,questionnaire_id,count
0,https://questionnaire.fut.trifork.com/fhir/Que...,1
1,https://questionnaire.fut.trifork.com/fhir/Que...,1
2,https://questionnaire.fut.trifork.com/fhir/Que...,1
3,https://questionnaire.fut.trifork.com/fhir/Que...,1
4,https://questionnaire.fut.trifork.com/fhir/Que...,1
...,...,...
3258,https://questionnaire.fut.trifork.com/fhir/Que...,1
3259,https://questionnaire.fut.trifork.com/fhir/Que...,1
3260,https://questionnaire.fut.trifork.com/fhir/Que...,1
3261,https://questionnaire.fut.trifork.com/fhir/Que...,1


Det ligner at vi opretter ét questionnaire og besvarer det én gang på TRIFORKs testmiljø.