In [None]:
import sys
import os
import yaml
import numpy as np
from scipy import stats

import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go

import pandas as pd
pd.options.mode.chained_assignment = None

sys.path.append('../python/')
import plate_util
from plotly_figure_parameters import dict_y_axis_parameters, dict_font_parameters, dict_x_axis_parameters_categorical
from dashboard_notebook_util import (
    read_estimated_concentrations, 
    read_plate_data_with_calibration_concentrations,
    read_quality_control_concentrations
)
from analysis_util import get_table_with_all_duplicate_qc_checks

import logging
logging.basicConfig(level=logging.DEBUG, filename="dash_logs.log")

In [None]:
dict_parameters = yaml.safe_load(open("../parameters/july_2024_data_parameters.yaml", "r"))

In [None]:
pd_df_plate_data = read_plate_data_with_calibration_concentrations(dict_parameters)
pd_df_estimated_concentrations = read_estimated_concentrations(dict_parameters)

In [None]:
pd_df_calibration_concentrations = plate_util.read_and_clean_calibration_concentrations(dict_parameters)

In [None]:
pd_df_estimated_concentrations

In [None]:
pd_df_plate_data

In [None]:
pd_df_concentrations_with_qc = get_table_with_all_duplicate_qc_checks(
    dict_parameters, 
    pd_df_estimated_concentrations,
    pd_df_calibration_concentrations,
    dict_parameters["column name prefix for estimated concentrations"],
)

In [None]:
pd_df_intensities_with_qc = get_table_with_all_duplicate_qc_checks(
    dict_parameters, 
    pd_df_estimated_concentrations,
    pd_df_calibration_concentrations,
    "Median",
)

In [None]:
pd_df_intensities_with_qc

In [None]:
qc_columns_to_keep = ["sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    qc_columns_to_keep.append(f"CV {str_analyte}")
pd_df_intensities_with_qc = pd_df_intensities_with_qc[qc_columns_to_keep]

In [None]:
pd_df_intensities_with_qc

In [None]:
estimates_columns_to_keep = ["sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    estimates_columns_to_keep.append(f"estimated concentration {str_analyte}")
pd_df_estimated_concentrations = pd_df_estimated_concentrations[estimates_columns_to_keep]

In [None]:
pd_df_estimates_with_qc = pd_df_estimated_concentrations.merge(pd_df_intensities_with_qc, on=["sample name annotations", "plate number"], how = 'left')

In [None]:
pd_df_estimates_with_qc

In [None]:
for str_analyte in dict_parameters["list of analytes"]:
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"CV {str_analyte}"] > 30, f"estimated concentration {str_analyte}"
    ] = np.nan
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"CV {str_analyte}"].isna(), f"estimated concentration {str_analyte}"
    ] = np.nan
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"plate number"] == 6, f"estimated concentration {str_analyte}"
    ] = np.nan

In [None]:
pd_df_estimates_with_qc

In [None]:
pd_df_estimates_with_qc = pd_df_estimates_with_qc.groupby(["sample name annotations", "plate number"]).mean().reset_index()

In [None]:
pd_df_estimates_with_qc

In [None]:
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("QC")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("SM")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("Std")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("Standard")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("Blank")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("BLANK")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("HEK")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("LPS")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("PAM")]

In [None]:
pd_df_estimates_with_qc

In [None]:
estimates_columns_to_keep = ["sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    estimates_columns_to_keep.append(f"estimated concentration {str_analyte}")
pd_df_estimated_concentrations_final = pd_df_estimates_with_qc[estimates_columns_to_keep]

In [None]:
pd_df_estimated_concentrations_final

In [None]:
pd_df_estimated_concentrations_final.to_csv("../output/cleaned_estimates.csv", index=False)

In [None]:
temp = pd.read_csv("../output/cleaned_estimates.csv")

In [None]:
temp