In [1]:
import sys
import os
import yaml
import numpy as np
from scipy import stats

import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go

import pandas as pd
pd.options.mode.chained_assignment = None

sys.path.append('../python/')
import plate_util
from plotly_figure_parameters import dict_y_axis_parameters, dict_font_parameters, dict_x_axis_parameters_categorical
from dashboard_notebook_util import (
    read_estimated_concentrations, 
    read_plate_data_with_calibration_concentrations,
    read_quality_control_concentrations
)
from analysis_util import get_table_with_all_duplicate_qc_checks

import logging
logging.basicConfig(level=logging.DEBUG, filename="dash_logs.log")

In [2]:
dict_parameters = yaml.safe_load(open("../parameters/july_2024_data_parameters.yaml", "r"))

In [3]:
pd_df_plate_data = read_plate_data_with_calibration_concentrations(dict_parameters)
pd_df_estimated_concentrations = read_estimated_concentrations(dict_parameters)

In [4]:
pd_df_calibration_concentrations = plate_util.read_and_clean_calibration_concentrations(dict_parameters)

In [5]:
pd_df_estimated_concentrations

Unnamed: 0,sample name annotations,sample name plate,sample repeat number,plate number,plate row,plate column,estimated concentration IFN-gamma,estimated concentration IL-1b,estimated concentration IL-6,estimated concentration IL-8,...,calibration curve gradient IFN-gamma,calibration curve gradient IL-1b,calibration curve gradient IL-6,calibration curve gradient IL-8,calibration curve gradient IL-10,calibration curve gradient IL-12(p70),calibration curve gradient IL-17A,calibration curve gradient IP-10,calibration curve gradient MCP-1,calibration curve gradient TNF-a
0,0001A_0.5hr,Unknown4,1,1,C,4,8.401,0.448,1.487,1.694,...,1.859212,3.698860,5.448019,18.025213,2.499612,0.431541,2.619544,7.633543,3.123279,5.641264
1,0001A_15min,Unknown3,1,1,B,4,3.111,0.000,0.453,1.075,...,1.673217,3.752785,5.817267,18.051093,2.571297,0.440654,2.592121,6.768471,2.797093,5.527677
2,0001A_1hr,Unknown5,1,1,D,4,5.140,0.528,1.250,1.389,...,1.758332,3.678081,5.507039,18.039894,2.509974,0.440654,2.601898,6.962041,2.918554,5.573445
3,0001A_2hr,Unknown6,1,1,E,4,4.130,0.000,1.682,1.188,...,1.719329,3.752785,5.393984,18.047804,2.544577,0.384701,2.595768,7.003098,2.934010,5.527677
4,0001A_4hr,Unknown7,1,1,F,4,3.616,0.000,2.194,1.755,...,1.701886,3.752785,5.276198,18.019681,2.705527,0.416290,2.599000,6.500246,3.031131,5.471120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,Standard 0 (Background),Background0,4,20,B,7,0.287,0.000,0.000,0.103,...,2.868162,3.616193,9.263940,26.507112,3.262703,1.935940,1.731611,1.635174,2.101125,6.731868
1479,Standard 0 (Background),Background0,5,21,H,3,0.000,0.000,0.000,0.000,...,3.635204,5.270009,13.202546,33.177990,3.520822,2.589050,3.322400,2.034932,3.464321,13.467623
1480,Standard 0 (Background),Background0,6,21,A,9,0.000,0.000,0.116,0.000,...,3.635204,5.270009,12.889391,33.177990,3.467597,2.589050,3.322400,1.683513,3.464321,13.467623
1481,Standard 0 (Background),Background0,7,21,B,9,0.000,0.103,0.254,0.000,...,3.635204,5.237574,12.753379,33.177990,3.520822,2.519656,3.322400,1.392338,3.528218,13.467623


In [6]:
pd_df_plate_data

Unnamed: 0.1,Unnamed: 0,plate number,plate row,plate column,sample name annotations,sample name plate,IFN-gamma Median,IL-1b Median,IL-6 Median,IL-8 Median,...,IFN-gamma Expected,IL-10 Expected,IL-12(p70) Expected,IL-17A Expected,IL-1b Expected,IL-6 Expected,IL-8 Expected,IP-10 Expected,MCP-1 Expected,TNF-a Expected
0,0,1,A,1,QC-plasma,Unknown1,32.70508,16.84863,17.74609,57.20605,...,,,,,,,,,,
1,1,1,B,1,QC-plasma,Unknown1,24.26758,15.50781,15.92969,54.32227,...,,,,,,,,,,
2,2,1,C,1,Std 1,Standard1,13.77148,17.23730,14.46094,34.33008,...,1.3,2.6,3.0,1.3,1.6,0.64,0.64,2.6,3.0,6.4
3,3,1,D,1,Std 1,Standard1,12.90430,16.01758,11.22656,26.57617,...,1.3,2.6,3.0,1.3,1.6,0.64,0.64,2.6,3.0,6.4
4,4,1,E,1,Std 2,Standard2,24.99512,38.54395,28.55371,72.10742,...,6.4,12.8,16.0,6.4,8.0,3.20,3.20,12.8,16.0,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1800,1800,21,D,12,QC-1,Control1,251.20801,288.63477,255.97461,705.87109,...,,,,,,,,,,
1801,1801,21,E,12,QC-2,Control2,1166.62695,1309.89258,1028.76367,2688.80664,...,,,,,,,,,,
1802,1802,21,F,12,QC-2,Control2,1461.19434,1546.18164,1178.35742,2923.54785,...,,,,,,,,,,
1803,1803,21,G,12,QC-plasma,Unknown2,86.73535,38.47852,38.58594,92.20410,...,,,,,,,,,,


In [7]:
pd_df_concentrations_with_qc = get_table_with_all_duplicate_qc_checks(
    dict_parameters, 
    pd_df_estimated_concentrations,
    pd_df_calibration_concentrations,
    dict_parameters["column name prefix for estimated concentrations"],
)

  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return (std_dev / mean) * 100
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.abs(estimate_1 - estimate_2) / mean
  return np.log(np.max([estimate_1, estimate_2]))
  return np.log(np.max([estimate_1, estimate_2]))
  return np.log(np.max([estimate_1, estimate_2]))
  return np.log(np.max([estimate_1, estimate_2]))


In [8]:
pd_df_intensities_with_qc = get_table_with_all_duplicate_qc_checks(
    dict_parameters, 
    pd_df_estimated_concentrations,
    pd_df_calibration_concentrations,
    "Median",
)

In [9]:
pd_df_intensities_with_qc

Unnamed: 0,sample name annotations,plate number,CV IFN-gamma,CV IL-1b,CV IL-6,CV IL-8,CV IL-10,CV IL-12(p70),CV IL-17A,CV IP-10,...,log max estimated concentration IFN-gamma,log max estimated concentration IL-1b,log max estimated concentration IL-6,log max estimated concentration IL-8,log max estimated concentration IL-10,log max estimated concentration IL-12(p70),log max estimated concentration IL-17A,log max estimated concentration IP-10,log max estimated concentration MCP-1,log max estimated concentration TNF-a
0,0001A_0.5hr,1,,,,,,,,,...,,,,,,,,,,
1,0001A_15min,1,,,,,,,,,...,,,,,,,,,,
2,0001A_1hr,1,,,,,,,,,...,,,,,,,,,,
3,0001A_2hr,1,,,,,,,,,...,,,,,,,,,,
4,0001A_4hr,1,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
735,SM8,4,4.051376,5.974692,6.565293,6.791565,17.077342,3.478714,5.460489,10.736658,...,2.456094,2.805725,3.349733,5.933664,2.657579,2.321353,3.141674,8.151980,7.921951,2.855707
736,SM9,6,13.119571,16.414518,16.094916,14.616103,19.302105,14.324517,7.020313,3.889405,...,3.814832,3.004677,5.892049,6.834615,3.220359,2.898154,3.183815,8.787486,8.872927,3.985596
737,Standard 0 (Background),19,0.847618,42.287960,12.746538,5.702203,6.039828,7.984091,44.967608,0.548262,...,2.691375,3.165937,2.799090,3.002933,2.785807,2.627763,3.479616,2.799506,2.304829,2.461024
738,Standard 0 (Background),20,6.005066,0.911686,10.893916,2.313595,7.800940,0.772166,8.107079,10.163203,...,2.769042,2.338646,2.841166,2.956597,2.801346,2.539847,2.729690,2.908969,2.410774,2.497361


In [10]:
qc_columns_to_keep = ["sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    qc_columns_to_keep.append(f"CV {str_analyte}")
pd_df_intensities_with_qc = pd_df_intensities_with_qc[qc_columns_to_keep]

In [11]:
pd_df_intensities_with_qc

Unnamed: 0,sample name annotations,plate number,CV IFN-gamma,CV IL-1b,CV IL-6,CV IL-8,CV IL-10,CV IL-12(p70),CV IL-17A,CV IP-10,CV MCP-1,CV TNF-a
0,0001A_0.5hr,1,,,,,,,,,,
1,0001A_15min,1,,,,,,,,,,
2,0001A_1hr,1,,,,,,,,,,
3,0001A_2hr,1,,,,,,,,,,
4,0001A_4hr,1,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
735,SM8,4,4.051376,5.974692,6.565293,6.791565,17.077342,3.478714,5.460489,10.736658,9.977075,2.728503
736,SM9,6,13.119571,16.414518,16.094916,14.616103,19.302105,14.324517,7.020313,3.889405,14.059567,18.347187
737,Standard 0 (Background),19,0.847618,42.287960,12.746538,5.702203,6.039828,7.984091,44.967608,0.548262,9.022542,1.693850
738,Standard 0 (Background),20,6.005066,0.911686,10.893916,2.313595,7.800940,0.772166,8.107079,10.163203,6.082462,5.619503


In [12]:
estimates_columns_to_keep = ["sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    estimates_columns_to_keep.append(f"estimated concentration {str_analyte}")
pd_df_estimated_concentrations = pd_df_estimated_concentrations[estimates_columns_to_keep]

In [13]:
pd_df_estimates_with_qc = pd_df_estimated_concentrations.merge(pd_df_intensities_with_qc, on=["sample name annotations", "plate number"], how = 'left')

In [14]:
pd_df_estimates_with_qc

Unnamed: 0,sample name annotations,plate number,estimated concentration IFN-gamma,estimated concentration IL-1b,estimated concentration IL-6,estimated concentration IL-8,estimated concentration IL-10,estimated concentration IL-12(p70),estimated concentration IL-17A,estimated concentration IP-10,...,CV IFN-gamma,CV IL-1b,CV IL-6,CV IL-8,CV IL-10,CV IL-12(p70),CV IL-17A,CV IP-10,CV MCP-1,CV TNF-a
0,0001A_0.5hr,1,8.401,0.448,1.487,1.694,7.575,17.465,2.699,160.058,...,,,,,,,,,,
1,0001A_15min,1,3.111,0.000,0.453,1.075,4.958,19.208,0.318,108.799,...,,,,,,,,,,
2,0001A_1hr,1,5.140,0.528,1.250,1.389,6.959,19.417,0.935,117.326,...,,,,,,,,,,
3,0001A_2hr,1,4.130,0.000,1.682,1.188,5.871,9.480,0.400,119.261,...,,,,,,,,,,
4,0001A_4hr,1,3.616,0.000,2.194,1.755,2.370,14.442,0.701,97.837,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,Standard 0 (Background),20,0.287,0.000,0.000,0.103,0.583,1.408,0.505,1.314,...,6.005066,0.911686,10.893916,2.313595,7.80094,0.772166,8.107079,10.163203,6.082462,5.619503
1479,Standard 0 (Background),21,0.000,0.000,0.000,0.000,0.200,0.000,0.000,2.301,...,,,,,,,,,,
1480,Standard 0 (Background),21,0.000,0.000,0.116,0.000,0.447,0.000,0.000,0.885,...,,,,,,,,,,
1481,Standard 0 (Background),21,0.000,0.103,0.254,0.000,0.130,0.222,0.000,0.242,...,,,,,,,,,,


In [15]:
for str_analyte in dict_parameters["list of analytes"]:
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"CV {str_analyte}"] > 30, f"estimated concentration {str_analyte}"
    ] = np.nan
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"CV {str_analyte}"].isna(), f"estimated concentration {str_analyte}"
    ] = np.nan
    pd_df_estimates_with_qc.loc[
        pd_df_estimates_with_qc[f"plate number"] == 6, f"estimated concentration {str_analyte}"
    ] = np.nan

In [16]:
pd_df_estimates_with_qc

Unnamed: 0,sample name annotations,plate number,estimated concentration IFN-gamma,estimated concentration IL-1b,estimated concentration IL-6,estimated concentration IL-8,estimated concentration IL-10,estimated concentration IL-12(p70),estimated concentration IL-17A,estimated concentration IP-10,...,CV IFN-gamma,CV IL-1b,CV IL-6,CV IL-8,CV IL-10,CV IL-12(p70),CV IL-17A,CV IP-10,CV MCP-1,CV TNF-a
0,0001A_0.5hr,1,,,,,,,,,...,,,,,,,,,,
1,0001A_15min,1,,,,,,,,,...,,,,,,,,,,
2,0001A_1hr,1,,,,,,,,,...,,,,,,,,,,
3,0001A_2hr,1,,,,,,,,,...,,,,,,,,,,
4,0001A_4hr,1,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,Standard 0 (Background),20,0.287,0.0,0.0,0.103,0.583,1.408,0.505,1.314,...,6.005066,0.911686,10.893916,2.313595,7.80094,0.772166,8.107079,10.163203,6.082462,5.619503
1479,Standard 0 (Background),21,,,,,,,,,...,,,,,,,,,,
1480,Standard 0 (Background),21,,,,,,,,,...,,,,,,,,,,
1481,Standard 0 (Background),21,,,,,,,,,...,,,,,,,,,,


In [17]:
pd_df_estimates_with_qc = pd_df_estimates_with_qc.groupby(["sample name annotations", "plate number"]).mean().reset_index()

In [18]:
pd_df_estimates_with_qc

Unnamed: 0,sample name annotations,plate number,estimated concentration IFN-gamma,estimated concentration IL-1b,estimated concentration IL-6,estimated concentration IL-8,estimated concentration IL-10,estimated concentration IL-12(p70),estimated concentration IL-17A,estimated concentration IP-10,...,CV IFN-gamma,CV IL-1b,CV IL-6,CV IL-8,CV IL-10,CV IL-12(p70),CV IL-17A,CV IP-10,CV MCP-1,CV TNF-a
0,0001A_0.5hr,1,,,,,,,,,...,,,,,,,,,,
1,0001A_15min,1,,,,,,,,,...,,,,,,,,,,
2,0001A_1hr,1,,,,,,,,,...,,,,,,,,,,
3,0001A_2hr,1,,,,,,,,,...,,,,,,,,,,
4,0001A_4hr,1,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
735,SM8,4,0.0000,0.434,1.4690,13.669,0.2400,0.000,1.1360,336.7585,...,4.051376,5.974692,6.565293,6.791565,17.077342,3.478714,5.460489,10.736658,9.977075,2.728503
736,SM9,6,,,,,,,,,...,13.119571,16.414518,16.094916,14.616103,19.302105,14.324517,7.020313,3.889405,14.059567,18.347187
737,Standard 0 (Background),19,0.0000,,0.0915,0.000,1.6635,0.655,,1.2410,...,0.847618,42.287960,12.746538,5.702203,6.039828,7.984091,44.967608,0.548262,9.022542,1.693850
738,Standard 0 (Background),20,0.1435,0.000,0.0975,0.115,0.8450,1.373,0.2525,2.0345,...,6.005066,0.911686,10.893916,2.313595,7.800940,0.772166,8.107079,10.163203,6.082462,5.619503


In [19]:
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("QC")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("SM")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("Std")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("Standard")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("Blank")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("BLANK")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("HEK")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("LPS")]
pd_df_estimates_with_qc = pd_df_estimates_with_qc[~pd_df_estimates_with_qc["sample name annotations"].str.contains("PAM")]

In [20]:
pd_df_estimates_with_qc

Unnamed: 0,sample name annotations,plate number,estimated concentration IFN-gamma,estimated concentration IL-1b,estimated concentration IL-6,estimated concentration IL-8,estimated concentration IL-10,estimated concentration IL-12(p70),estimated concentration IL-17A,estimated concentration IP-10,...,CV IFN-gamma,CV IL-1b,CV IL-6,CV IL-8,CV IL-10,CV IL-12(p70),CV IL-17A,CV IP-10,CV MCP-1,CV TNF-a
0,0001A_0.5hr,1,,,,,,,,,...,,,,,,,,,,
1,0001A_15min,1,,,,,,,,,...,,,,,,,,,,
2,0001A_1hr,1,,,,,,,,,...,,,,,,,,,,
3,0001A_2hr,1,,,,,,,,,...,,,,,,,,,,
4,0001A_4hr,1,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
653,9960 A,12,3.8900,0.7580,1.0990,0.8205,8.5575,2.9815,0.000,199.6850,...,0.015062,8.227571,3.647871,6.558170,5.878185,5.949531,3.419978,9.827549,0.616061,6.314690
654,9960 B,12,2.3475,0.3205,0.6505,0.7195,3.7580,2.8820,0.197,128.7715,...,8.626121,1.618395,0.386600,4.386087,0.976453,0.217260,21.560382,1.006346,1.528851,2.558406
655,9960 C,12,2.9535,0.8455,0.7385,0.9565,5.1430,7.0155,0.534,121.0510,...,0.028490,7.672174,13.052324,3.371984,6.136630,8.312057,1.816245,3.790132,5.317296,6.293286
656,9960 D,12,3.8385,0.0000,1.1760,0.7875,4.1400,4.9060,0.014,128.1910,...,2.493494,1.215954,4.608606,4.293927,10.414743,0.936467,0.766123,9.224480,8.641023,5.578599


In [21]:
estimates_columns_to_keep = ["sample name annotations", "plate number"]
for str_analyte in dict_parameters["list of analytes"]:
    estimates_columns_to_keep.append(f"estimated concentration {str_analyte}")
pd_df_estimated_concentrations_final = pd_df_estimates_with_qc[estimates_columns_to_keep]

In [22]:
pd_df_estimated_concentrations_final

Unnamed: 0,sample name annotations,plate number,estimated concentration IFN-gamma,estimated concentration IL-1b,estimated concentration IL-6,estimated concentration IL-8,estimated concentration IL-10,estimated concentration IL-12(p70),estimated concentration IL-17A,estimated concentration IP-10,estimated concentration MCP-1,estimated concentration TNF-a
0,0001A_0.5hr,1,,,,,,,,,,
1,0001A_15min,1,,,,,,,,,,
2,0001A_1hr,1,,,,,,,,,,
3,0001A_2hr,1,,,,,,,,,,
4,0001A_4hr,1,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
653,9960 A,12,3.8900,0.7580,1.0990,0.8205,8.5575,2.9815,0.000,199.6850,171.2355,12.7105
654,9960 B,12,2.3475,0.3205,0.6505,0.7195,3.7580,2.8820,0.197,128.7715,176.5245,9.5700
655,9960 C,12,2.9535,0.8455,0.7385,0.9565,5.1430,7.0155,0.534,121.0510,152.3470,9.9545
656,9960 D,12,3.8385,0.0000,1.1760,0.7875,4.1400,4.9060,0.014,128.1910,122.4380,9.6105


In [23]:
pd_df_estimated_concentrations_final.to_csv("../output/cleaned_estimates.csv", index=False)

In [24]:
temp = pd.read_csv("../output/cleaned_estimates.csv")

In [25]:
temp

Unnamed: 0,sample name annotations,plate number,estimated concentration IFN-gamma,estimated concentration IL-1b,estimated concentration IL-6,estimated concentration IL-8,estimated concentration IL-10,estimated concentration IL-12(p70),estimated concentration IL-17A,estimated concentration IP-10,estimated concentration MCP-1,estimated concentration TNF-a
0,0001A_0.5hr,1,,,,,,,,,,
1,0001A_15min,1,,,,,,,,,,
2,0001A_1hr,1,,,,,,,,,,
3,0001A_2hr,1,,,,,,,,,,
4,0001A_4hr,1,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
653,9960 A,12,3.8900,0.7580,1.0990,0.8205,8.5575,2.9815,0.000,199.6850,171.2355,12.7105
654,9960 B,12,2.3475,0.3205,0.6505,0.7195,3.7580,2.8820,0.197,128.7715,176.5245,9.5700
655,9960 C,12,2.9535,0.8455,0.7385,0.9565,5.1430,7.0155,0.534,121.0510,152.3470,9.9545
656,9960 D,12,3.8385,0.0000,1.1760,0.7875,4.1400,4.9060,0.014,128.1910,122.4380,9.6105
