In [1]:
import pyspark.sql.functions as F
from credentials import MY_CREDENTIALS
from data_location import DELTA_LOCATION

from spark_bi.constants import ColNames, Extensions
from spark_bi.spark import FutPathlingContext

pc = FutPathlingContext.create(
    app_name="example-spark-app", hadoop_config=MY_CREDENTIALS.to_hadoop_config()
)
delta_lake = pc.read.delta(DELTA_LOCATION)

:: loading settings :: url = jar:file:/Users/mabe/Git/spark-bi/.venv/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.3.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /Users/mabe/.ivy2.5.2/cache
The jars for the packages stored in: /Users/mabe/.ivy2.5.2/jars
au.csiro.pathling#library-runtime added as a dependency
io.delta#delta-spark_2.13 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-03ce0a90-45ae-403a-b1c1-03d4c5cf0048;1.0
	confs: [default]
	found au.csiro.pathling#library-runtime;9.1.0 in local-m2-cache
	found io.delta#delta-spark_2.13;4.0.0 in local-m2-cache
	found io.delta#delta-storage;4.0.0 in local-m2-cache
	found org.antlr#antlr4-runtime;4.13.1 in local-m2-cache
:: resolution report :: resolve 91ms :: artifacts dl 5ms
	:: modules in use:
	au.csiro.pathling#library-runtime;9.1.0 from local-m2-cache in [default]
	io.delta#delta-spark_2.13;4.0.0 from local-m2-cache in [default]
	io.delta#delta-storage;4.0.0 fr

# By anvenderløsning

For at finde anvenderløsning bruger vi coexistence-tags der er beskrevet i: https://ehealth-dk.atlassian.net/wiki/spaces/EDTW/pages/2355986433/Multitenancy

Patienterne selv stammer fra CPR-registret, og har derfor ikke et coexistence-tag. Alle relevante patienter har mindst ét forløb, og derfor tæller vi antal unikke patienter fra episodes of care, og finder disses creating_solution.

In [3]:
from pyspark.sql.functions import col

cp_with_patient_id = delta_lake.view(
    resource="CarePlan",
    select=[
        {
            "column": [
                {"name": "cp_id", "path": "getResourceKey()"},
                {"name": "cp_patient_id", "path": "subject.getReferenceKey()"},
                {
                    "name": ColNames.CREATING_SOLUTION.value,
                    "path": f"meta.tag.where(system='{Extensions.EHEALTH_COLOCATION.value}').code.first()",
                },
            ]
        }
    ],
)
cp_with_patient_id.filter(col("creating_solution").isNotNull()).head(5)

[Row(cp_id='CarePlan/2000000115', cp_patient_id='Patient/1000264558', creating_solution='xb'),
 Row(cp_id='CarePlan/2000000230', cp_patient_id='Patient/1000264558', creating_solution='xb'),
 Row(cp_id='CarePlan/2000000366', cp_patient_id='Patient/1000264558', creating_solution='xb'),
 Row(cp_id='CarePlan/2000000696', cp_patient_id='Patient/1000264558', creating_solution='xb'),
 Row(cp_id='CarePlan/2000000811', cp_patient_id='Patient/1000264558', creating_solution='xb')]

In [4]:
(cp_with_patient_id.groupby("creating_solution").count().toPandas())

Unnamed: 0,creating_solution,count
0,,27496
1,xb,4079


Bemærk at antallet er for TRIFORKs testmiljø.

# By careteam

In [6]:
careplans_with_team = delta_lake.view(
    resource="CarePlan",
    select=[
        {"column": [{"name": "cp_id", "path": "getResourceKey()"}]},
        {"forEach": "careTeam", "column": [{"name": "cp_team_id", "path": "getReferenceKey()"}]},
    ],
)
careplans_with_team.groupby("cp_team_id").count().toPandas()

Unnamed: 0,cp_team_id,count
0,CareTeam/3000138554,1740
1,CareTeam/3000108752,5008
2,CareTeam/3000143203,24827


Bemærk, at dette er fordelingen for TRIFORKs testmiljø.

# By diagnosis

For at finde episode-of-care'ens diagnoser/behandlingsområder skal vi lave koblingen:

`CarePlan.addresses <-> Condition.code`

Vi skal bruge denne beregning flere steder, og genbruger derfor en utility-funktion, der udfører det.

In [9]:
conditions_with_code = delta_lake.view(
    resource="Condition",
    select=[
        {
            "column": [
                {"name": "condition_id", "path": "getResourceKey()"},
                {"name": "diagnosis_code", "path": "code.coding.code"},
            ]
        }
    ],
)

careplans_with_conditions = delta_lake.view(
    resource="CarePlan",
    select=[
        {
            "column": [
                {"name": "cp_id", "path": "getResourceKey()"},
                {"name": "addresses_condition", "path": "addresses.getReferenceKey()"},
            ]
        }
    ],
)

(
    careplans_with_conditions.join(
        conditions_with_code,
        careplans_with_conditions["addresses_condition"] == conditions_with_code["condition_id"],
        how="inner",
    )
    .groupby("diagnosis_code")
    .count()
    .toPandas()
)

Unnamed: 0,diagnosis_code,count
0,DJ44,31575


Vær opmærksom på, at disse tal er fra TRIFORKs testmiljø, og derfor ikke repræsentative.

# By kommune

Fordelt på den patient de omhandlers bopælskommune:

In [11]:
from spark_bi.dfs import compute_patient2municipality


patients_with_municipality = compute_patient2municipality(delta_lake)

(
    cp_with_patient_id.join(
        patients_with_municipality,
        cp_with_patient_id.cp_patient_id == patients_with_municipality.patient_id,
        "inner",
    )
    .groupby(ColNames.MUNICIPALITY_CODE.value)
    .count()
    .toPandas()
)

Unnamed: 0,municipality_code,count
0,410,14132
1,185,17443


Vær opmærksom på, at disse tal er fra TRIFORKs testmiljø, og derfor ikke repræsentative.

# By organisatorisk enhed

See README.md under "Organistorisk enhed".

# By region

In [12]:
from spark_bi.dfs import compute_patient2region

patients_with_region = compute_patient2region(delta_lake)

(
    cp_with_patient_id.join(
        patients_with_region,
        cp_with_patient_id.cp_patient_id == patients_with_region.patient_id,
        "inner",
    )
    .groupby(ColNames.REGION_CODE.value)
    .count()
    .toPandas()
)


Unnamed: 0,region_code,count
0,DK-84,17443
1,DK-83,14132


Vær opmærksom på, at disse tal er fra TRIFORKs testmiljø, og derfor ikke repræsentative.