In [None]:
import pyspark.sql.functions as F
from credentials import MY_CREDENTIALS
from data_location import DELTA_LOCATION

from spark_bi.constants import ColNames, Extensions
from spark_bi.spark import FutPathlingContext

pc = FutPathlingContext.create(
    app_name="example-spark-app", hadoop_config=MY_CREDENTIALS.to_hadoop_config()
)
delta_lake = pc.read.delta(DELTA_LOCATION)

:: loading settings :: url = jar:file:/Users/mabe/Git/spark-bi/.venv/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.3.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /Users/mabe/.ivy2.5.2/cache
The jars for the packages stored in: /Users/mabe/.ivy2.5.2/jars
au.csiro.pathling#library-runtime added as a dependency
io.delta#delta-spark_2.13 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-c173bae2-96c7-481e-8d0b-762921beba32;1.0
	confs: [default]
	found au.csiro.pathling#library-runtime;9.1.0 in local-m2-cache
	found io.delta#delta-spark_2.13;4.0.0 in local-m2-cache
	found io.delta#delta-storage;4.0.0 in local-m2-cache
	found org.antlr#antlr4-runtime;4.13.1 in local-m2-cache
:: resolution report :: resolve 91ms :: artifacts dl 5ms
	:: modules in use:
	au.csiro.pathling#library-runtime;9.1.0 from local-m2-cache in [default]
	io.delta#delta-spark_2.13;4.0.0 from local-m2-cache in [default]
	io.delta#delta-storage;4.0.0 fr

In [2]:
video_appointments_with_participants = (
    delta_lake.view(
        resource="Appointment",
        select=[
            {
                "column": [
                    {"name": "app_id", "path": "getResourceKey()"},
                    {"name": "profile", "path": "meta.profile.first()"},
                    {"name": "scheduled_start", "path": "start"},
                    {"name": "scheduled_end", "path": "end"},
                    {
                        "name": ColNames.CREATING_SOLUTION.value,
                        "path": "meta.tag.where(system='http://ehealth.sundhed.dk/cs/ehealth-system').code.first()",
                    },
                ]
            }
        ],
    )
    .filter(
        F.col("profile")
        == "http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment"
    )
    .withColumn(
        "duration_minutes",
        (F.unix_timestamp("scheduled_end") - F.unix_timestamp("scheduled_start")) / 60,
    )
)
video_appointments_with_participants.head(5)

                                                                                

[Row(app_id='Appointment/1000341100', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', scheduled_start=datetime.datetime(2022, 9, 14, 9, 8, 35, 99000), scheduled_end=datetime.datetime(2022, 9, 14, 9, 8, 35, 99000), creating_solution='xb', duration_minutes=0.0),
 Row(app_id='Appointment/1000342024', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', scheduled_start=datetime.datetime(2022, 9, 17, 9, 7, 53, 855000), scheduled_end=datetime.datetime(2022, 9, 17, 9, 7, 53, 855000), creating_solution='xb', duration_minutes=0.0),
 Row(app_id='Appointment/1000342640', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', scheduled_start=datetime.datetime(2022, 9, 19, 9, 8, 21, 70000), scheduled_end=datetime.datetime(2022, 9, 19, 9, 8, 21, 70000), creating_solution='xb', duration_minutes=0.0),
 Row(app_id='Appointment/1000343880', profile='http://ehealth.sundhed.dk/fhir/St

# By anvenderløsning

For at finde anvenderløsning bruger vi coexistence-tags der er beskrevet i: https://ehealth-dk.atlassian.net/wiki/spaces/EDTW/pages/2355986433/Multitenancy

In [3]:
(
    video_appointments_with_participants.groupBy(ColNames.CREATING_SOLUTION.value)
    .agg(F.count("app_id").alias("n_appointments"))
    .toPandas()
)

Unnamed: 0,creating_solution,n_appointments
0,,2659
1,xb,163


Bemærk at antallet er for TRIFORKs testmiljø.

# By careteam

En video-appointment kan spores til careteam via `VideoAppointment.responsible`.

In [4]:
(
    delta_lake.view(
        resource="Appointment",
        select=[
            {
                "column": [
                    {"name": "app_id", "path": "getResourceKey()"},
                    {"name": "profile", "path": "meta.profile.first()"},
                    {
                        "name": "responsible",
                        "path": "extension('http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-responsible').first().valueReference.reference",
                    },
                ]
            }
        ],
    )
    .filter(
        F.col("profile")
        == "http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment"
    )
    .groupby("responsible")
    .count()
    .toPandas()
)

Unnamed: 0,responsible,count
0,CareTeam/3000138554,139
1,CareTeam/3000108752,625
2,CareTeam/3000143203,2058


# By duration

In [5]:
(
    video_appointments_with_participants.groupby("duration_minutes")
    .agg(F.count("app_id").alias("n_appointments"))
    .head(5)
)

[Row(duration_minutes=0.0, n_appointments=2822)]

Desværre er der ikke nogle videomøder på TRIFORK-miljøet med en planlagt varighed over 0 minutter.

# By diagnosis

For at finde patientens diagnoser/behandlingsområder skal vi lave koblingen:

`Patient <-> EpisodeOfCare <-> CarePlan.addresses <-> Condition.code`

Vi starter fra venstre:

In [6]:
video_appointments_with_patient = (
    delta_lake.view(
        resource="Appointment",
        select=[
            {
                "column": [
                    {"name": "app_id", "path": "getResourceKey()"},
                    {"name": "profile", "path": "meta.profile.first()"},
                ]
            },
            {
                "forEach": "participant",
                "column": [{"name": "participant_reference", "path": "actor.reference"}],
            },
        ],
    )
    .filter(
        F.col("profile")
        == "http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment"
    )
    .filter(F.col("participant_reference").startswith("Patient"))
)
video_appointments_with_patient.head(5)

[Row(app_id='Appointment/1000341100', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', participant_reference='Patient/1000264558'),
 Row(app_id='Appointment/1000342024', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', participant_reference='Patient/1000264558'),
 Row(app_id='Appointment/1000342640', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', participant_reference='Patient/1000264558'),
 Row(app_id='Appointment/1000343880', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', participant_reference='Patient/1000264558'),
 Row(app_id='Appointment/1000344188', profile='http://ehealth.sundhed.dk/fhir/StructureDefinition/ehealth-group-videoappointment', participant_reference='Patient/1000264558')]

In [7]:
from spark_bi.dfs import compute_patient2condition


patient2condition = compute_patient2condition(delta_lake)
patient2condition.head(5)

[Row(eoc_patient_id='Patient/1000264558', diagnosis_code='DJ44'),
 Row(eoc_patient_id='Patient/1000684720', diagnosis_code='DJ44')]

In [8]:
(
    video_appointments_with_patient.join(
        patient2condition,
        patient2condition.eoc_patient_id == video_appointments_with_patient.participant_reference,
        how="left",
    )
    .groupby("diagnosis_code")
    .count()
    .toPandas()
)

Unnamed: 0,diagnosis_code,count
0,DJ44,2681


Vær opmærksom på, at disse tal er fra TRIFORKs testmiljø, og derfor ikke repræsentative.

# By kommune

 En video-aftales "kommune" fortolkes som "patienten der deltager i aftalens bopælskommune".

In [9]:
import spark_bi.dfs

patient2municipality = spark_bi.dfs.compute_patient2municipality(delta_lake)
patient2municipality.head(5)

[Row(patient_id='Patient/1000264558', municipality_code='0410'),
 Row(patient_id='Patient/1000264559', municipality_code='0330'),
 Row(patient_id='Patient/1000264560', municipality_code='0740'),
 Row(patient_id='Patient/1000264604', municipality_code='0190'),
 Row(patient_id='Patient/1000264605', municipality_code='0336')]

In [10]:
(
    video_appointments_with_patient.join(
        patient2municipality,
        patient2municipality.patient_id == video_appointments_with_patient.participant_reference,
        how="left",
    )
    .groupby(ColNames.MUNICIPALITY_CODE.value)
    .count()
    .toPandas()
)

Unnamed: 0,municipality_code,count
0,410,1232
1,185,1449


Bemærk at tallene er fra TRIFORKs testmiljø, og afspejler derfor ikke produktion.

# By organisatorisk enhed

See README.md under "Organistorisk enhed".

# By region

In [13]:
from spark_bi.dfs import compute_patient2region


patients_with_region = compute_patient2region(delta_lake)
patients_with_region.head(5)

[Row(patient_id='Patient/1000264558', regional_subdivision_code='DK-83', region_name='Region of Southern Denmark'),
 Row(patient_id='Patient/1000264559', regional_subdivision_code='DK-85', region_name='Region Zealand'),
 Row(patient_id='Patient/1000264560', regional_subdivision_code='DK-82', region_name='Central Denmark Region'),
 Row(patient_id='Patient/1000264604', regional_subdivision_code='DK-84', region_name='Capital Region of Denmark'),
 Row(patient_id='Patient/1000264605', regional_subdivision_code='DK-85', region_name='Region Zealand')]

In [14]:
(
    video_appointments_with_patient.join(
        patients_with_region,
        patients_with_region.patient_id == video_appointments_with_patient.participant_reference,
        how="left",
    )
    .groupby("region_name")
    .count()
    .toPandas()
)

Unnamed: 0,region_name,count
0,Capital Region of Denmark,1449
1,Region of Southern Denmark,1232


Bemærk at tallene er fra TRIFORKs testmiljø, og afspejler derfor ikke produktion.