In [1]:
import pyspark.sql.functions as F
from credentials import MY_CREDENTIALS
from data_location import DELTA_LOCATION

from spark_bi.constants import ColNames, Extensions
from spark_bi.spark import FutPathlingContext

pc = FutPathlingContext.create(
    app_name="example-spark-app", hadoop_config=MY_CREDENTIALS.to_hadoop_config()
)
delta_lake = pc.read.delta(DELTA_LOCATION)

:: loading settings :: url = jar:file:/Users/mabe/Git/spark-bi/.venv/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.3.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /Users/mabe/.ivy2.5.2/cache
The jars for the packages stored in: /Users/mabe/.ivy2.5.2/jars
au.csiro.pathling#library-runtime added as a dependency
io.delta#delta-spark_2.13 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-f2dc8afe-ca81-470a-a9ba-3533977d6cd3;1.0
	confs: [default]
	found au.csiro.pathling#library-runtime;9.1.0 in local-m2-cache
	found io.delta#delta-spark_2.13;4.0.0 in local-m2-cache
	found io.delta#delta-storage;4.0.0 in local-m2-cache
	found org.antlr#antlr4-runtime;4.13.1 in local-m2-cache
:: resolution report :: resolve 92ms :: artifacts dl 7ms
	:: modules in use:
	au.csiro.pathling#library-runtime;9.1.0 from local-m2-cache in [default]
	io.delta#delta-spark_2.13;4.0.0 from local-m2-cache in [default]
	io.delta#delta-storage;4.0.0 fr

# By aktiv/inaktiv

Ej meningsfyldt for careteams.

# By anvenderløsning

For at finde anvenderløsning bruger vi coexistence-tags der er beskrevet i: https://ehealth-dk.atlassian.net/wiki/spaces/EDTW/pages/2355986433/Multitenancy

Coexistence-tags sættes ikke på careteams, hvorfor vi ikke kan spore, hvilken anvenderløsning, der har oprettet dem.

# By careteam

Ej relevant.

# By diagnosis

In [2]:
(
    delta_lake.view(
        resource="CareTeam",
        select=[
            {
                "column": [
                    {"name": "ct_id", "path": "getResourceKey()"},
                    {
                        "name": ColNames.CREATING_SOLUTION.value,
                        "path": "meta.tag.where(system='http://ehealth.sundhed.dk/cs/ehealth-system').code.first()",
                    },
                ]
            },
            {"forEach": "reasonCode", "column": [{"name": "condition", "path": "coding.code"}]},
        ],
    )
    .groupby("condition")
    .count()
    .toPandas()
)

                                                                                

Unnamed: 0,condition,count
0,TBD,22
1,DJ44,11


Vær opmærksom på, at disse tal er fra TRIFORKs testmiljø, og derfor ikke repræsentative.

# By kommune

For at finde et careteams kommune laver vi koblingen:

`Careteam.managingOrganization <-> Organization.municipalityCode`

In [3]:
careteams = delta_lake.view(
    resource="CareTeam",
    select=[
        {
            "column": [
                {"name": "careteam_id", "path": "getResourceKey()"},
                {"name": "org_id", "path": "managingOrganization.first().getReferenceKey()"},
            ]
        }
    ],
)
careteams.filter(F.col("org_id").isNotNull()).head(5)

[Row(careteam_id='CareTeam/3000148060', org_id='Organization/3000038806'),
 Row(careteam_id='CareTeam/3000148061', org_id='Organization/3000029719')]

In [4]:
organizations = delta_lake.view(
    resource="Organization",
    select=[
        {
            "column": [
                {"name": "org_id", "path": "getResourceKey()"},
                {
                    "name": ColNames.MUNICIPALITY_CODE.value,
                    "path": f"extension('{Extensions.EHEALTH_MUNICIPALITY_CODES.value}').valueString",
                },
                {
                    "name": ColNames.REGION_CODE.value,
                    "path": f"extension('{Extensions.EHEALTH_REGION_CODES.value}').valueString",
                },
            ]
        }
    ],
)
organizations.filter(F.col(ColNames.MUNICIPALITY_CODE.value).isNotNull()).head(5)

[Row(org_id='Organization/3000000064', municipality_code='0787', region_code='1081'),
 Row(org_id='Organization/3000000069', municipality_code='0265', region_code='1085'),
 Row(org_id='Organization/3000000072', municipality_code='0173', region_code='1084'),
 Row(org_id='Organization/3000000088', municipality_code='0360', region_code='1085'),
 Row(org_id='Organization/3000000090', municipality_code='0787', region_code='1081')]

In [5]:
joined = careteams.join(organizations, careteams.org_id == organizations.org_id, how="left")

In [6]:
(
    joined.groupBy(ColNames.MUNICIPALITY_CODE.value)
    .agg(F.countDistinct("careteam_id").alias("n_careteams"))
    .orderBy("n_careteams", ascending=False)
    .toPandas()
)

                                                                                

Unnamed: 0,municipality_code,n_careteams
0,,31
1,851.0,1
2,849.0,1


På TRIFORKs testmiljø er der kun 2 careteams der er tilknyttet en kommune.

# By organisatorisk enhed

See README.md under "Organistorisk enhed".

# By region

In [7]:
(
    joined.groupBy(ColNames.REGION_CODE.value)
    .agg(F.countDistinct("careteam_id").alias("n_careteams"))
    .orderBy("n_careteams", ascending=False)
    .toPandas()
)

Unnamed: 0,region_code,n_careteams
0,,31
1,1081.0,2


På TRIFORKs testmiljø er der kun 2 careteams der er tilknyttet en region.