In [None]:
from dataclasses import dataclass
from io import StringIO
from pathlib import Path
from typing import Optional

import matplotlib.pylab as plt
import pandas as pd
from pandas.testing import assert_frame_equal

import data
import qc
import recovery
import utils

In [None]:
parent_folder = utils.Configuration.RAW_DATA_PATH

df = data.Data(
    quant_file=parent_folder.joinpath("results.csv"),
    is_correspondence_file=parent_folder.joinpath("is_std_table_correspondence.csv"),
    sample_properties_file=parent_folder.joinpath("sample_properties.csv"),
    qc_file=parent_folder.joinpath("qc.csv"),
    is_concentration_file=parent_folder.joinpath("is_std_table_concentration.csv"),
)

In [None]:
df.validate_data()

In [None]:
mock_peak_areas = "name,type,QC_1,QC_2,QC_3,QC_4,QC_5,ISRS_1,ISRS_2,ISRS_3,ISRS_4,ISRS_5\n13C_HCB,Area,20,40,60,80,100,20,40,60,80,100\nCB_207,Area,100,100,100,100,100,1000,1000,1000,1000,1000"
mock_is_concentration_file = "name,amount\n13C_HCB,1000\nCB_207,1000"
mock_sample_properties_file = "sample_name,sample_type,volume\nQC_1,qc,0.5\nQC_2,qc,0.5\nQC_3,qc,0.5\nQC_4,qc,0.5\nQC_5,qc,0.5\nISRS_1,isrs,0.5\nISRS_2,isrs,0.5\nISRS_3,isrs,0.5\nISRS_4,isrs,0.5\nISRS_5,isrs,0.5"
mock_is_correspondence_file = (
    "native,internal_standard,external_standard\nalpaHCH,13C_HCB,CB_207"
)

In [None]:
correction_factors = qc.CorrectionFactor(
    df.quant_file, df.sample_properties_file, df.qc_file
).calculate_correction_factors()

In [None]:
class ConcentrationCalculator:
    def __init__(
        self,
        concentrations,
        sample_properties_file,
        correction_factors,
    ):
        self.concentrations = concentrations.query("type == 'concentration'").set_index(
            "name"
        )
        self.sample_properties_file = sample_properties_file
        self.correction_factors = correction_factors

    @property
    def blank_sample_names(self):
        return self.sample_properties_file.loc[
            self.sample_properties_file.sample_type == "blank", "sample_name"
        ].values

    @property
    def other_sample_names(self):
        return self.sample_properties_file.loc[
            self.sample_properties_file.sample_type.isin(["sample", "qc"]),
            "sample_name",
        ].values

    @property
    def sample_volumes(self):
        return (
            self.sample_properties_file.query(
                "sample_name.isin(@self.other_sample_names)"
            )
            .drop(columns="sample_type")
            .set_index("sample_name")
            .squeeze()
        )

    def calculate_average_blanks(self):
        return self.concentrations.loc[:, self.blank_sample_names].mean(axis=1)

    def calculate_concentrations(self):
        concentrations = (
            (
                self.concentrations.filter(items=self.other_sample_names, axis=1).sub(
                    self.calculate_average_blanks(), axis=0
                )
            )
            .mul(self.correction_factors, axis=0)
            .div(self.sample_volumes)
        )
        return concentrations.mask(concentrations <= 0, 1)


ConcentrationCalculator(
    df.quant_file, df.sample_properties_file, correction_factors
).calculate_concentrations()