In [None]:
from dataclasses import dataclass
from io import StringIO
from pathlib import Path
from typing import Optional

import matplotlib.pylab as plt
import pandas as pd
from pandas.testing import assert_frame_equal

import common_operations
import concentration_calculator
import data
import pipeline
import qc
import recovery
import utils

In [None]:
# mock_peak_areas = "name,type,QC_1,QC_2,QC_3,QC_4,QC_5,ISRS_1,ISRS_2,ISRS_3,ISRS_4,ISRS_5\n13C_HCB,Area,20,40,60,80,100,20,40,60,80,100\nCB_207,Area,100,100,100,100,100,1000,1000,1000,1000,1000"
# mock_is_concentration_file = "name,amount\n13C_HCB,1000\nCB_207,1000"
# mock_sample_properties_file = "sample_name,sample_type,volume\nQC_1,qc,0.5\nQC_2,qc,0.5\nQC_3,qc,0.5\nQC_4,qc,0.5\nQC_5,qc,0.5\nISRS_1,isrs,0.5\nISRS_2,isrs,0.5\nISRS_3,isrs,0.5\nISRS_4,isrs,0.5\nISRS_5,isrs,0.5"
# mock_is_correspondence_file = (
#     "native,internal_standard,external_standard\nalpaHCH,13C_HCB,CB_207"
# )

In [None]:
parent_folder = utils.Configuration.RAW_DATA_PATH

df = data.Data(
    quant_file=parent_folder.joinpath("results.csv"),
    is_correspondence_file=parent_folder.joinpath("is_std_table_correspondence.csv"),
    sample_properties_file=parent_folder.joinpath("sample_properties.csv"),
    qc_file=parent_folder.joinpath("qc.csv"),
    # is_concentration_file=parent_folder.joinpath("is_std_table_concentration.csv"),
)

In [None]:
data.DataValidator(df).validate_data()

In [None]:
recovery.Recovery(df).plot_response_factor(by_sample=True)

In [None]:
qc.CorrectionFactor(df).plot_correction_factor()

In [None]:
concentration_calculator.ConcentrationCalculator(
    df, qc.CorrectionFactor(df).calculate_correction_factor()
).plot_concentration(by_sample=False)

In [None]:
parent_folder = utils.Configuration.RAW_DATA_PATH

file_paths = {
    "quant_file": parent_folder.joinpath("results.csv"),
    "is_correspondence_file": parent_folder.joinpath("is_std_table_correspondence.csv"),
    "sample_properties_file": parent_folder.joinpath("sample_properties.csv"),
    "qc_file": parent_folder.joinpath("qc.csv"),
    "is_concentration_file": parent_folder.joinpath("is_std_table_concentration.csv"),
}

job = pipeline.Pipeline(**file_paths)

job.execute()

# Trubleshooting

In [None]:
# TODO at the moment the self.concentration_calculator.calculate_concentration() needs a QC file to be present
# we might want to have the option of not performing QC correction, so we need to make sure
# self.concentration_calculator.calculate_concentration() can be run without QC correction

parent_folder = utils.Configuration.RAW_DATA_PATH

file_paths = {
    "quant_file": parent_folder.joinpath("results.csv"),
    "is_correspondence_file": parent_folder.joinpath("is_std_table_correspondence.csv"),
    "sample_properties_file": parent_folder.joinpath("sample_properties.csv"),
    "qc_file": parent_folder.joinpath("qc.csv"),
    "is_concentration_file": parent_folder.joinpath("is_std_table_concentration.csv"),
}


class Pipeline:
    def __init__(
        self,
        quant_file,
        is_correspondence_file,
        sample_properties_file,
        qc_file=None,
        is_concentration_file=None,
    ):
        self.data = data.Data(
            quant_file,
            is_correspondence_file,
            sample_properties_file,
            qc_file,
            is_concentration_file,
        )
        self._data_validator = None
        self._recovery_calculator = None
        self._correction_factor_calculator = None
        self._concentration_calculator = None

    @property
    def data_validator(self):
        if self._data_validator is None:
            self._data_validator = data.DataValidator(self.data)
        return self._data_validator

    @property
    def recovery_calculator(self):
        if self._recovery_calculator is None:
            self._recovery_calculator = recovery.Recovery(self.data)
        return self._recovery_calculator

    @property
    def correction_factor_calculator(self):
        if self._correction_factor_calculator is None:
            self._correction_factor_calculator = qc.CorrectionFactor(self.data)
        return self._correction_factor_calculator

    @property
    def concentration_calculator(self):
        if self._concentration_calculator is None:
            self._concentration_calculator = (
                concentration_calculator.ConcentrationCalculator(
                    self.data,
                    self.correction_factor_calculator.calculate_correction_factor(),
                )
            )
        return self._concentration_calculator

    def execute(self, return_values):
        # Step 1: Validate the data
        self.data_validator.validate_data()

        # Initialize recovery as None
        recovery = None
        if self.data.is_concentration_file is not None:
            # Step 2: Calculate recovery
            recovery = self.recovery_calculator.calculate_recovery()

        correction_factors = None
        if self.data.qc_file is not None:
            # Step 3: Calculate correction factors
            correction_factors = (
                self.correction_factor_calculator.calculate_correction_factor()
            )

        # Step 4: Calculate concentrations
        concentrations = self.concentration_calculator.calculate_concentration()

        # Create a dictionary mapping return_values to the corresponding values
        results_dict = {
            "recovery": recovery,
            "correction_factors": correction_factors,
            "concentrations": concentrations,
        }

        # If "all" is in return_values, return all results
        if "all" in return_values:
            return results_dict

        # Otherwise, return only the requested results
        return {key: results_dict[key] for key in return_values if key in results_dict}


job = Pipeline(**file_paths)

job.execute(return_values=["all"])