# Lab values distributions

In [6]:
from pathlib import Path

import altair as alt
import polars as pl
from IPython.display import display, Markdown

import plotting

In [7]:
file = "/Users/vllorens/data/fimm/finregistry-kanta-lab-values/risteys/stats_dist_lab_values__sorted.csv"

In [8]:
dataf = (
    pl.read_csv(
        file,
        dtypes={
            "OMOP_ID": pl.String,
        }
    )
    .filter(
        # Just get the unit with the most people in it for each OMOP ID
        (pl.col("NPeopleTotalInDist") == pl.col("NPeopleTotalInDist").max().over("OMOP_ID"))
    )
    .pipe(plotting.extract_bin_left_right, column_bin="Bin")
)

In [9]:
omop_ids = dataf.get_column("OMOP_ID").unique()

In [12]:
for omop_id in omop_ids:
    display(Markdown(f"## OMOP ID: {omop_id}"))

    plot_data = (
        dataf
        .filter(pl.col("OMOP_ID") == omop_id)
        .select(
            pl.col("LAB_UNIT"),
            pl.col("BinLeft"),
            pl.col("BinRight"),
            pl.col("NRecords")
        )
    )
    unit = plot_data.head(1).get_column("LAB_UNIT").item()
    
    if unit == "binary":
        pass  # TODO
    elif unit == "titre":
        chart = (
            alt.Chart(plot_data)
            .mark_bar()
            .encode(
                alt.X("BinLeft:Q").title(f"Lab value ({unit})"),
                alt.Y("NRecords:Q").title("Number of records"),
                tooltip=["BinLeft:Q", "NRecords:Q"]
            )
        )
    else:
        chart = (
            alt.Chart(plot_data)
            .mark_rect()
            #.mark_point()  # Highlight the weird dist overlap
            .encode(
                alt.X("BinLeft:Q").title(f"Lab value ({unit})"),
                alt.X2("BinRight:Q"),
                alt.Y("NRecords:Q").title("Number of records"),
                tooltip=["BinLeft:Q", "BinRight:Q", "NRecords:Q"]
            )
        )

    display(chart)

## OMOP ID: 3035995

## OMOP ID: 3020564

## OMOP ID: 3037522