In [None]:
import sys
from pathlib import Path
import pandas as pd

project_root = str(Path().absolute().parent)
sys.path.append(project_root)

from src.data.unified import UnifiedDataset
from src.data.dataset import DatasetModality

data_root = Path(project_root) / "data"
data = UnifiedDataset(data_root, modality=DatasetModality.CMR, dataset_key="acdc")

records = data.get_all_record_ids()
df = pd.DataFrame(
    [
        {**data.metadata_store.get(record_id), "record_id": record_id}
        for record_id in records
    ]
)
df["labels"] = [
    data[record_id].preprocessed_record.target_labels for record_id in records
]

del data
df.head()

In [None]:
# calc bmi from weight and height
df["bmi"] = df["weight"] / (df["height"] / 100) ** 2
df["bmi"] = df["bmi"].round(2)

# get bmi stats
bmi_stats = df["bmi"].describe().round(2)
bmi_stats

In [None]:
df["weight"].describe().round(2), df["height"].describe().round(2)

In [7]:
df.columns

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# high retina
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set_theme(style="whitegrid")

# set default size
plt.rcParams["figure.figsize"] = [12, 5]

In [None]:
# create violin plot for wieght, height and bmi next to each other. make sure they are vertical


# increae horizontal space between plots

fig, axes = plt.subplots(1, 3)
fig.subplots_adjust(wspace=0.5)

sns.violinplot(y=df["weight"], ax=axes[0], palette="muted", inner="quartile")
axes[0].set_ylabel("Weight [kg]")

sns.violinplot(y=df["height"], ax=axes[1], palette="muted", inner="quartile")
axes[1].set_ylabel("Height [cm]")

sns.violinplot(y=df["bmi"], ax=axes[2], palette="muted", inner="quartile")
axes[2].set_ylabel("BMI")

plt.show()