In [None]:
import sys
import os
import yaml
import numpy as np
from scipy import stats

import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go

import pandas as pd
pd.options.mode.chained_assignment = None

sys.path.append('../python/')
from plotly_figure_parameters import dict_y_axis_parameters, dict_font_parameters, dict_x_axis_parameters_categorical
from dashboard_notebook_util import (
    read_estimated_concentrations, 
    read_plate_data_with_calibration_concentrations,
    read_quality_control_concentrations
)

In [None]:
dict_parameters = yaml.safe_load(open("../parameters/july_2024_data_parameters.yaml", "r"))

In [None]:
pd_df_plate_data = read_plate_data_with_calibration_concentrations(dict_parameters)
pd_df_estimated_concentrations = read_estimated_concentrations(dict_parameters)

In [None]:
pd_df_estimated_concentrations

In [None]:
pd_df_estimated_concentrations = pd_df_estimated_concentrations[pd_df_estimated_concentrations["sample name annotations"].str.contains("QC-plasma")]

In [None]:
pd_df_plate_data

In [None]:
pd_df_quality_control_concentrations = read_quality_control_concentrations(dict_parameters)

In [None]:
pd_df_estimated_concentrations

In [None]:
def perform_t_test_on_paired_wells(pd_group):
    if len(pd_group) != 2:
        return np.nan
    mean_1, mean_2 = pd_group["IFN-gamma Trimmed Mean"].values
    std_dev_1, std_dev_2 = pd_group["IFN-gamma Trimmed Standard Deviation"].values
    count_1, count_2 = pd_group["IFN-gamma Count"].values
    t_statistic, p_value = stats.ttest_ind_from_stats(
        mean_1, 
        std_dev_1, 
        count_1, 
        mean_2, 
        std_dev_2, 
        count_2,
        equal_var=False,
    )
    return p_value

In [None]:
pd_df_tested = (
    pd_df_plate_data
    .groupby(["sample name plate", "plate number"])
    .apply(perform_t_test_on_paired_wells, include_groups=False)
    .reset_index()
)
pd_df_tested.columns = ["sample name plate", "plate number"] + ['t test p value']
pd_df_tested = pd.merge(
    pd_df_tested, 
    pd_df_plate_data[["sample name plate", "plate number", "IFN-gamma Trimmed Mean", "IFN-gamma Trimmed Standard Deviation", "IFN-gamma Count"]], 
    on = ["sample name plate", "plate number"], 
    how = "left"
)

In [None]:
pd_df_tested

In [None]:
pd_df_tested_unique = pd_df_tested.drop_duplicates(subset = ["sample name plate", "plate number"])

In [None]:
pd_df_tested_unique

In [None]:
import seaborn as sns
sns.histplot(pd_df_tested_unique, x = "t test p value", bins = 20)

In [None]:
def calculate_paired_intra_plate_cv(str_analyte, pd_group):
    if len(pd_group) != 2:
        return np.nan
    estimate_1, estimate_2 = pd_group[f"estimated concentration {str_analyte}"].values
    mean = (estimate_1 + estimate_2) / 2
    std_dev = np.sqrt((estimate_1 - mean)**2 + (estimate_2 - mean)**2)
    return std_dev / mean


In [None]:
def calculate_paired_intra_plate_rel_abs_diff(str_analyte, pd_group):
    if len(pd_group) != 2:
        return np.nan
    estimate_1, estimate_2 = pd_group[f"estimated concentration {str_analyte}"].values
    mean = (estimate_1 + estimate_2) / 2
    return np.abs(estimate_1 - estimate_2) / mean

In [None]:
def get_table_of_duplicate_qc_checks(function_check, str_check_name):
    list_pd_df_estimated_concentrations_checked_one_analyte = []
    for str_analyte in dict_parameters["list of analytes"]:
        pd_df_estimated_concentrations_checked_one_analyte = (
            pd_df_estimated_concentrations
            .groupby(["sample name annotations", "plate number"])
            .apply(lambda x: function_check(str_analyte, x), include_groups=False)
            .reset_index()
        )
        pd_df_estimated_concentrations_checked_one_analyte.columns = ["sample name annotations", "plate number"] + [f"{str_check_name} {str_analyte}"]
        list_pd_df_estimated_concentrations_checked_one_analyte.append(pd_df_estimated_concentrations_checked_one_analyte)
    
    pd_df_estimated_concentrations_checked = list_pd_df_estimated_concentrations_checked_one_analyte[0]
    for pd_df_estimated_concentrations_checked_one_analyte in list_pd_df_estimated_concentrations_checked_one_analyte[1:]:
        pd_df_estimated_concentrations_checked = pd.merge(
            pd_df_estimated_concentrations_checked, 
            pd_df_estimated_concentrations_checked_one_analyte, 
            on = ["sample name annotations", "plate number"], 
            how = "outer"
        )
    return pd_df_estimated_concentrations_checked

In [None]:
pd_df_CV = get_table_of_duplicate_qc_checks(calculate_paired_intra_plate_cv, "CV")
pd_df_rel_abs_diff = get_table_of_duplicate_qc_checks(calculate_paired_intra_plate_rel_abs_diff, "rel. abs. diff.")

In [None]:
pd_df_rel_abs_diff

In [None]:
pd_df_CV

In [None]:
# for i in list(range(1, 17)) + [18]:
#     print(i, 
#         np.sqrt(
#             np.nanmean(
#                 pd_df_estimated_concentrations_checked[
#                     pd_df_estimated_concentrations_checked["plate number"] == i
#                 ]["intra plate CV"].values**2
#             )
#         )
#     )

In [None]:
# pd_df_estimated_concentrations_checked[pd_df_estimated_concentrations_checked["intra plate CV"] > 0.2]

In [None]:
dash_app_object_boxes = dash.Dash(__name__)

# Define the layout
dash_app_object_boxes.layout = html.Div([
    html.H1("Plots of estimated concentration for QC-plasma on different plates"),

    html.Div([
        html.Label("Plot type:"),
        dcc.Dropdown(
            id='plot-type-dropdown',
            options=[
                {'label': 'box', 'value': 'box'},
                {'label': 'strip', 'value': 'strip'},
            ],
            value='box',
        ),
    ], style={'width': '20%', 'display': 'inline-block'}),

    html.Div([
        html.Label("Analyte:"),
        dcc.Dropdown(
            id='analyte-dropdown',
            options=[{'label': col, 'value': col} for col in dict_parameters["list of analytes"]],
            value=dict_parameters["list of analytes"][0]
        ),
    ], style={'width': '20%', 'display': 'inline-block'}),
    
    dcc.Graph(id='scatter-plot')
], style={'backgroundColor': 'white', 'padding': '20px'})


@dash_app_object_boxes.callback(
    Output('scatter-plot', 'figure'),
    Input('analyte-dropdown', 'value'),
    Input('plot-type-dropdown', 'value'),
)
def update_graph(str_analyte, str_plot_type):
    
    # if str_statistic == "CV":
    #     pd_df_data = pd_df_CV
    # elif str_statistic == "rel. abs. diff.":
    #     pd_df_data = pd_df_rel_abs_diff
        
    str_column_name = f"estimated concentration {str_analyte}"
    
    if str_plot_type == "box":
        fig = px.box(
            pd_df_estimated_concentrations,
            x = "plate number", 
            y = str_column_name,
        )
    elif str_plot_type == "strip":
        fig = px.strip(
            pd_df_estimated_concentrations,
            x = "plate number", 
            y = str_column_name,
            hover_name = "sample name annotations",
        )    
        fig.update_traces(marker = dict(opacity = 0.75))
        
    fig.update_layout(
        xaxis=dict_x_axis_parameters_categorical,
        yaxis=dict_y_axis_parameters,
        font=dict_font_parameters,
        xaxis_title="plate number",
        yaxis_title="Concentration (pg/ml)",
        plot_bgcolor='white',
        paper_bgcolor='white',
    )        
    return fig

# Run the app
if __name__ == '__main__':
    dash_app_object_boxes.run(jupyter_mode="inline", debug = True, port=3312)