In [None]:
import sys
import yaml
import numpy as np

import pandas as pd
pd.options.mode.chained_assignment = None

sys.path.append('../python/')
import plate_util
from dashboard_notebook_util import (
    read_estimated_concentrations, 
    read_plate_data_with_calibration_concentrations,
    read_quality_control_concentrations
)
from analysis_util import get_table_with_all_duplicate_qc_checks

import logging
logging.basicConfig(level=logging.DEBUG, filename="dash_logs.log")

In [None]:
#TODO: standardise naming conventions of IDs

In [None]:
dict_parameters = yaml.safe_load(open("../parameters/july_2024_data_parameters.yaml", "r"))

In [None]:
pd_df_estimated_concentrations = read_estimated_concentrations(dict_parameters)

In [None]:
pd_df_calibration_concentrations = plate_util.read_and_clean_calibration_concentrations(dict_parameters)

In [None]:
pd_df_estimated_concentrations


In [None]:
pd_df_concentrations_with_qc = get_table_with_all_duplicate_qc_checks(
    dict_parameters, 
    pd_df_estimated_concentrations,
    pd_df_calibration_concentrations,
    dict_parameters["column name prefix for estimated concentrations"],
)

In [None]:
pd_df_intensities_with_qc = get_table_with_all_duplicate_qc_checks(
    dict_parameters, 
    pd_df_estimated_concentrations,
    pd_df_calibration_concentrations,
    "Median",
)

In [None]:
pd_df_intensities_with_qc

In [None]:
qc_columns_to_keep = ["sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    qc_columns_to_keep.append(f"CV {str_analyte}")
pd_df_intensities_with_qc = pd_df_intensities_with_qc[qc_columns_to_keep]

In [None]:
pd_df_intensities_with_qc

In [None]:
estimates_columns_to_keep = ["sample name annotations", "old sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    estimates_columns_to_keep.append(f"estimated concentration {str_analyte}")
pd_df_estimated_concentrations = pd_df_estimated_concentrations[estimates_columns_to_keep]

In [None]:
pd_df_estimates_with_qc = pd_df_estimated_concentrations.merge(pd_df_intensities_with_qc, on=["sample name annotations", "plate number"], how = 'left')

In [None]:
pd_df_estimates_with_qc

In [None]:
pd_df_estimates_no_cleaning = pd_df_estimates_with_qc.copy()
for str_analyte in dict_parameters["list of analytes"]:
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"CV {str_analyte}"] > 30, f"estimated concentration {str_analyte}"
    ] = np.nan
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"CV {str_analyte}"].isna(), f"estimated concentration {str_analyte}"
    ] = np.nan

In [None]:
pd_df_estimates_with_qc

In [None]:
def final_cleaning(pd_df_estimates):
    pd_df_estimates = (
        pd_df_estimates
        .groupby(
            ["sample name annotations", "old sample name annotations", "plate number"],
            dropna = False,
        )
        .mean()
        .reset_index()
    )
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("QC")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("SM")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("Std")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("Standard")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("Blank")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("BLANK")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("HEK")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("LPS")]
    pd_df_estimates = pd_df_estimates[~pd_df_estimates["sample name annotations"].str.contains("PAM")]
    estimates_columns_to_keep = ["sample name annotations", "old sample name annotations", "plate number"]
    for str_analyte in dict_parameters["list of analytes"]:
        estimates_columns_to_keep.append(f"estimated concentration {str_analyte}")
    pd_df_estimates = pd_df_estimates[estimates_columns_to_keep]
    return pd_df_estimates

In [None]:
pd_df_estimates_with_qc = final_cleaning(pd_df_estimates_with_qc)
pd_df_estimates_no_cleaning = final_cleaning(pd_df_estimates_no_cleaning)

In [None]:
pd_df_estimates_no_cleaning

In [None]:
pd_df_estimates_with_qc

In [None]:
pd_df_estimates_with_qc.to_csv("../output/cleaned_estimates.csv", index=False)
pd_df_estimates_no_cleaning.to_csv("../output/estimates_no_qc.csv", index=False)

In [None]:
temp = pd.read_csv("../output/cleaned_estimates.csv")

In [None]:
temp

In [None]:
temp_no_cleaning = pd.read_csv("../output/estimates_no_qc.csv")

In [None]:
temp_no_cleaning