In [None]:
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

import matplotlib.pylab as plt
import pandas as pd

import get_data
import utils

In [None]:
parent_folder = utils.Configuration.RAW_DATA_PATH

data = get_data.Data(
    quant_file=parent_folder.joinpath("results.csv"),
    is_correspondence_file=parent_folder.joinpath("is_std_table_correspondence.csv"),
    sample_properties_file=parent_folder.joinpath("sample_properties.csv"),
    qc_file=parent_folder.joinpath("qc.csv"),
    is_concentration_file=parent_folder.joinpath("is_std_table_concentration.csv"),
)

In [None]:
data.is_correspondence_file

In [None]:
class Recovery:
    def __init__(
        self,
        peak_areas,
        is_concentration_file,
        sample_properties_file,
        is_correspondence_file,
    ):
        self.peak_areas = peak_areas.query("type == 'area'").set_index("name")
        self.is_concentration_file = is_concentration_file.set_index("name")
        self.sample_properties_file = sample_properties_file
        self.is_correspondence_file = is_correspondence_file

    @property
    def isrs_sample_names(self):
        return self.sample_properties_file.loc[
            lambda df: df.sample_type == "isrs", "sample_name"
        ].values

    @property
    def is_identity(self):
        return self.is_correspondence_file.internal_standard.unique()

    @property
    def rs_identity(self):
        return self.is_correspondence_file.external_standard.unique()

    # TODO correct filter_dataframe to make calculate_RRF easier to read
    # def filter_dataframe(self, df, identities):
    #     return df.loc[
    #         df.index.isin(identities),
    #         df.columns.isin(self.isrs_sample_names),
    #     ].squeeze()

    def calculate_RRF(self):
        rs_area = self.peak_areas.loc[
            self.peak_areas.index.isin(self.rs_identity),
            self.peak_areas.columns.isin(self.isrs_sample_names),
        ].squeeze()
        is_area = self.peak_areas.loc[
            self.peak_areas.index.isin(self.is_identity),
            self.peak_areas.columns.isin(self.isrs_sample_names),
        ]
        rs_amount = self.is_concentration_file.loc[
            self.is_concentration_file.index.isin(self.rs_identity), "amount"
        ].squeeze()

        is_amount = self.is_concentration_file.loc[
            self.is_concentration_file.index.isin(self.is_identity), "amount"
        ]
        return ((is_area * rs_amount) / rs_area).div(is_amount, axis="index")

    def calculate_mean_RRF(self):
        return self.calculate_RRF().mean(axis="columns")

In [None]:
Recovery(
    data.quant_file,
    data.is_concentration_file,
    data.sample_properties_file,
    data.is_correspondence_file,
).calculate_RRF()