In [None]:
from io import StringIO

import pandas as pd

import common_operations
import concentration_calculator
import data
import pipeline
import qc
import recovery
import utils

In [None]:
# mock_peak_areas = "name,type,QC_1,QC_2,QC_3,QC_4,QC_5,ISRS_1,ISRS_2,ISRS_3,ISRS_4,ISRS_5\n13C_HCB,Area,20,40,60,80,100,20,40,60,80,100\nCB_207,Area,100,100,100,100,100,1000,1000,1000,1000,1000"
# mock_is_concentration_file = "name,amount\n13C_HCB,1000\nCB_207,1000"
# mock_sample_properties_file = "sample_name,sample_type,volume\nQC_1,qc,0.5\nQC_2,qc,0.5\nQC_3,qc,0.5\nQC_4,qc,0.5\nQC_5,qc,0.5\nISRS_1,isrs,0.5\nISRS_2,isrs,0.5\nISRS_3,isrs,0.5\nISRS_4,isrs,0.5\nISRS_5,isrs,0.5"
# mock_is_correspondence_file = (
#     "native,internal_standard,external_standard\nalpaHCH,13C_HCB,CB_207"
# )

In [None]:
parent_folder = utils.Configuration.RAW_DATA_PATH

df = data.Data(
    quant_file=parent_folder.joinpath("results.csv"),
    is_correspondence_file=parent_folder.joinpath("is_std_table_correspondence.csv"),
    sample_properties_file=parent_folder.joinpath("sample_properties.csv"),
    qc_file=parent_folder.joinpath("qc.csv"),
    is_concentration_file=parent_folder.joinpath("is_std_table_concentration.csv"),
)

# Trubleshooting

In [None]:
# Usage example
parent_folder = utils.Configuration.RAW_DATA_PATH

file_paths = {
    "quant_file": parent_folder.joinpath("results.csv"),
    "is_correspondence_file": parent_folder.joinpath("is_std_table_correspondence.csv"),
    "sample_properties_file": parent_folder.joinpath("sample_properties.csv"),
    "qc_file": parent_folder.joinpath("qc.csv"),
    "is_concentration_file": parent_folder.joinpath("is_std_table_concentration.csv"),
}

dfs = data.Data(**file_paths)
data_validator = data.DataValidator(dfs)
recovery_calculator = recovery.Recovery(dfs)
correction_factor_calculator = qc.CorrectionFactor(dfs)
concentration_calc = concentration_calculator.ConcentrationCalculator(
    dfs, correction_factor_calculator.calculate_correction_factor()
)

mass_based_calculator = pipeline.MassBasedCalculator(
    dfs,
    data_validator,
    recovery_calculator,
    correction_factor_calculator,
    concentration_calc,
)

pipeline = pipeline.Pipeline(mass_based_calculator)

results = pipeline.execute()
results

In [None]:
pipeline.display_plot("concentration")

# Pytest examples


In [None]:
df = pd.DataFrame({"Column1": ["Maki", 2, "KUPAL"], "column2": ["l?p", 5, 6]})

processor = data.DataProcessor()
processed_data = processor.preprocess_file(df)
processed_data

In [None]:
example_data = {
    "quant_file": StringIO("name,type"),
    "is_correspondence_file": StringIO("native,internal_standard,external_standard"),
    "sample_properties_file": StringIO("sample_name,sample_type,volume"),
    "qc_file": StringIO("native,concentration"),
    "is_concentration_file": StringIO("name,amount"),
}

dataframes = data.Data(**example_data)
processor = data.DataProcessor()

processor.preprocess_file(dataframes.quant_file)

# data.DataValidator(dataframes).validate()

In [None]:
# TODO it may need to be validated if quant_file/type contains both area (needed for calculating recovery but not necessary) and concentration (needed)
# TODO it has to be validated that is_concentration_file contains both is and rs


example_data = {
    "quant_file": StringIO("Name,type\ncompound_1,area\ncompound_1,concentration"),
    "is_correspondence_file": StringIO(
        "native,internal_standard,external_standard\ncompound_1,IS_1,RS_1"
    ),
    "sample_properties_file": StringIO(
        "sample_name,sample_type,volume\nsample_1,blank,10\nsample_2,QC,10"
    ),
    "qc_file": StringIO("native,concentration\ncompound_1,10"),
    "is_concentration_file": StringIO("name,amount\nIS_1,10\nRS_1,10"),
}

dataframes = data.Data(**example_data)

common_operations.BaseCalculator(dataframes).get_is_rs_amount()
dataframes.quant_file