# Imports

In [None]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)

from collections import defaultdict

import common_functions
import numpy as np
import pandas as pd
import utils
from scipy import stats

# Aim of this notebook  
* Calculate the Estimated Daily Intake values based on the wristband and dust measurements


Based on the concentrations detailed, we estimate exposure levels through the ingestion of dust and wristband. The estimated daily intake (EDI) in milligrams per kilogram of body weight per day (mg/kg bw/day) was determined using a methodology based on the general approach described by McGrath et al., 2022. The EDI is calculated using the following formula:

EDI = (Concentration × Ingestion × Fraction) / Body weight

Here, "concentration" represents the concentrations of quantified compounds."Ingestion" pertains to dust ingestion rates of 20 and 60 mg/day for adults and toddlers in the 50th percentile exposure scenario, and 50 and 100 mg/day for adults and toddlers in the 95th percentile exposure scenario, as specified by USEPA, 2017a."Fraction" refers to the fraction of time individuals spend at the workplace (0.33, 8 hours of work divided by 24 hours), based on research by Klepeis et al., 2001, and Poma et al., 2020. "Body weight" is given in the serum dataframe, expressed in kg unit.

It's worth noting that we assumed 100% bioaccessibility for each compound, providing a conservative estimate of internal exposure, as per Christia et al., 2021.

In [None]:
DUST_DATA_PATH = utils.Configuration.INTERIM_DATA_PATH.joinpath("dust.parquet.gzip")

SERUM_DATA_PATH = utils.Configuration.INTERIM_DATA_PATH.joinpath(
    "HBM4EU_E-waste_template_V3_all_data_INTERIM.parquet.gzip"
)

WRISTBAND_DATA_PATH = utils.Configuration.INTERIM_DATA_PATH.joinpath(
    "wristband.parquet.gzip"
)

dust = pd.read_parquet(DUST_DATA_PATH).groupby("group").mean()
serum = pd.read_parquet(SERUM_DATA_PATH)
wristband = pd.read_parquet(WRISTBAND_DATA_PATH)

## EDI
### Based on the dust measurements
#### 50th percentile

To calculate the 50th percentile exposure scenario, we use dust ingestion rates of 20 mg/day. To get the unit of **mg/kg/day** we will use 0.02 g/day intake and divide the final values by 1000, since the original dust measurements are in µg/g dust.

In [None]:
dust_EDI_p50 = (
    serum.query("main_category == 'Worker'")[["companyID", "weight"]]
    .pipe(
        lambda df: df.assign(
            **{
                col
                + "_dust": lambda df, col=col: df.companyID.map(
                    dict(zip(dust.index, dust[col]))
                )
                for col in dust.columns
            }
        )
    )
    .pipe(
        lambda df: df.assign(
            **{
                col
                + "_EDI_p50": lambda df, col=col: (df[col] * 0.02 * (8 / 24))
                / df["weight"]
                for col in df.loc[:, "PCB 28_dust":"BDE 209_dust"].columns
            }
        )
    )
    .loc[:, "PCB 28_dust_EDI_p50":]
)

# mean EDI values per compound in mg/kg/day
dust_EDI_p50.div(1000).mean()

In [None]:
# percentiles per compound in mg/kg/day
(dust_EDI_p50.div(1000).describe().transpose().loc[:, ["count", "25%", "50%", "75%"]])

#### 95th percentile
To calculate the 95th percentile exposure scenario, we use dust ingestion rates of 60 mg/day, that is 0.06 g/day.

In [None]:
dust_EDI_p95 = (
    serum.query("main_category == 'Worker'")[["companyID", "weight"]]
    .pipe(
        lambda df: df.assign(
            **{
                col
                + "_dust": lambda df, col=col: df.companyID.map(
                    dict(zip(dust.index, dust[col]))
                )
                for col in dust.columns
            }
        )
    )
    .pipe(
        lambda df: df.assign(
            **{
                col
                + "_EDI_p95": lambda df, col=col: (df[col] * 0.06 * (8 / 24))
                / df["weight"]
                for col in df.loc[:, "PCB 28_dust":"BDE 209_dust"].columns
            }
        )
    )
    .loc[:, "PCB 28_dust_EDI_p95":]
)

# mean EDI values per compound in mg/kg/day
dust_EDI_p95.div(1000).mean()

In [None]:
# percentiles per compound in mg/kg/day
(
    dust_EDI_p95.loc[:, "PCB 28_dust_EDI_p95":]
    .div(1000)
    .describe()
    .transpose()
    .loc[:, ["count", "25%", "50%", "75%"]]
)

### Based on the wristband measurements
#### 50th percentile

To get the unit of mg/kg/day we will use 0.02 g/day intake and divide the final values by 1000_000, since the original wristband measurements are in ng/g wristband.

In [None]:
wristband_EDI_p50 = (
    serum[["weight"]]
    .merge(wristband, left_index=True, right_on="ID")
    .drop(columns=["ID", "main_category", "company_ID"])
    .pipe(
        lambda df: df.assign(
            **{
                col
                + "_EDI_p50": lambda df, col=col: (df[col] * 0.02 * (8 / 24))
                / df["weight"]
                for col in df.loc[:, "PCB 101":"BDE 209"].columns
            }
        )
    )
    .loc[:, "PCB 101_EDI_p50":]
    .div(1000_000)
    .describe()
    .transpose()
    .loc[:, ["count", "25%", "50%", "75%"]]
)
wristband_EDI_p50

#### 95th percentile

To get the unit of mg/kg/day we will use 0.06 g/day intake and divide the final values by 1000_000, since the original wristband measurements are in ng/g wristband.

In [None]:
wristband_EDI_p95 = (
    serum[["weight"]]
    .merge(wristband, left_index=True, right_on="ID")
    .drop(columns=["ID", "main_category", "company_ID"])
    .pipe(
        lambda df: df.assign(
            **{
                col
                + "_EDI_p95": lambda df, col=col: (df[col] * 0.06 * (8 / 24))
                / df["weight"]
                for col in df.loc[:, "PCB 101":"BDE 209"].columns
            }
        )
    )
    .loc[:, "PCB 101_EDI_p95":]
    .div(1000_000)
    .describe()
    .transpose()
    .loc[:, ["count", "25%", "50%", "75%"]]
)
wristband_EDI_p95