The goal of this notebook is to determine which cytokines and chemokines are found at high levels in pyroptotic inducing agents.
Doing this will allow us to determine ground truth of pyroptosis occurance.

### Imports

In [1]:
import pathlib

# umap analysis of treatment groups
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
import toml
from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
from scipy.cluster.hierarchy import linkage
from scipy.stats import f_oneway

# post hoc test for 'VEGF-C [NSU]' column using Tukey's HSD test
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# anova test on each group


warnings.filterwarnings("ignore")
warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
import umap

In [2]:
df_path = pathlib.Path(
    f"../../Data/clean/Plate2/nELISA_plate_430420_SHSY5Y_clean.parquet"
)


# read in the data
df = pd.read_parquet(df_path)

In [3]:
# import selected treatmenets
# set path
toml_path = pathlib.Path("../../../1.Exploratory_Data_Analysis/utils/params.toml")

# read in toml file
params = toml.load(toml_path)
list_of_treatments = params["list_of_treatments"]["treatments"]

In [4]:
# get the treatments in fourb_Metadata_Treatment_Dose_Inhibitor coulumn for each treatment in the list of treatments
# df = df[df["fourb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(list_of_treatments)]
df = df.drop(
    columns=[
        "position_x",
        "Dose",
        "Treatment",
        "twob_Treatment_Dose_Inhibitor_Dose",
        "threeb_Treatment_Dose_Inhibitor_Dose",
        "fourb_Treatment_Dose_Inhibitor_Dose",
    ]
)
# if column name does not contain [NSU], add Metadata_ to the beginning of the column name
df.columns = ["Metadata_" + col if "[NSU]" not in col else col for col in df.columns]

df_metadata = df[df.columns[df.columns.str.contains("Metadata")]]
# non_metadata_cols
df = df.drop(columns=df_metadata.columns)
df["oneb_Treatment_Dose_Inhibitor_Dose"] = df_metadata[
    "Metadata_oneb_Treatment_Dose_Inhibitor_Dose"
]
df["Metadata_position_x"] = df_metadata["Metadata_position_x"]

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU],oneb_Treatment_Dose_Inhibitor_Dose
0,0.097710,0.461685,0.270477,0.514695,0.479281,0.270494,0.708849,0.134432,0.350986,0.216932,...,0.469875,0.395392,0.560129,0.504521,0.490444,0.258834,0.238358,0.524276,0.250670,DMSO_0.100_%_DMSO_0.025_%
1,0.064513,0.451181,0.246274,0.471026,0.269795,0.204498,0.247611,0.322087,0.350642,0.349237,...,0.570146,0.032391,0.476656,0.315426,0.589522,0.381170,0.168645,0.455092,0.228752,DMSO_0.100_%_DMSO_0.025_%
2,0.061860,0.196318,0.236491,0.474891,0.174672,0.824721,0.704521,0.254823,0.443939,0.268677,...,0.374554,0.486915,0.389375,0.369421,0.680276,0.182956,0.263281,0.213596,0.064645,DMSO_0.100_%_DMSO_0.025_%
3,0.060998,0.596601,0.129926,0.302610,0.559309,0.087533,0.541110,0.350256,0.528260,0.313411,...,0.630644,0.586271,0.258029,0.561051,0.551671,0.582053,0.087565,0.140992,0.234191,DMSO_0.100_%_DMSO_0.025_%
4,0.061116,0.490832,0.339510,0.453362,0.414653,0.424223,0.702561,0.203464,0.502516,0.363301,...,0.493033,0.171562,0.615867,0.288153,0.506528,0.264141,0.296782,0.541689,0.167078,DMSO_0.100_%_DMSO_0.025_%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149,0.043200,0.257949,0.227103,0.565494,0.260330,0.523500,0.788839,0.425712,0.398209,0.317449,...,0.285976,0.000000,0.432374,0.310545,0.568101,0.275941,0.312566,0.286031,0.288358,media_ctr_0.0_0_Media_ctr_0.0_0
150,0.072687,0.379701,0.308980,0.537718,0.640661,0.502533,0.451720,0.385627,0.371089,0.190994,...,0.361696,0.376460,0.628378,0.190684,0.636181,0.450715,0.178011,0.621119,0.229238,media_ctr_0.0_0_Media_ctr_0.0_0
151,0.081503,0.161844,0.316893,0.433801,0.630864,0.577818,0.576989,0.348813,0.362100,0.311019,...,0.185620,0.391430,0.620522,0.423500,0.785011,0.308965,0.268730,0.613026,0.254080,media_ctr_0.0_0_Media_0.0_0
152,0.062455,0.639999,0.334726,0.425654,0.281513,0.488874,0.734609,0.232744,0.425711,0.368349,...,0.371897,0.213694,0.528663,0.487337,0.439436,0.161322,0.296984,0.625991,0.297158,media_ctr_0.0_0_Media_ctr_0.0_0


In [None]:
# set output path
all_cytokines_path = pathlib.Path(
    f"./results/SHSY5Y_all_cytokine_values_per_treatment_per_well.parquet"
)
all_cytokines_path_melted = pathlib.Path(
    f"./results/SHSY5Y_all_cytokine_values_per_treatment_per_well_melted.parquet"
)
df.to_parquet(all_cytokines_path)

df_melted = df.melt(
    id_vars=["Metadata_position_x", "oneb_Treatment_Dose_Inhibitor_Dose"],
    var_name="cytokine",
    value_name="cytokine_value",
)

df_melted.to_parquet(all_cytokines_path_melted)

## Anova and Post-Hoc Analysis
Anova of all treatments and post-hoc analysis of all treatments for each cytokine and chemokine. 
This will determine the cytokines and chemokines that are found at high levels in pyroptotic inducing agents.

In [5]:
# define blank df
final_df_tukey = pd.DataFrame(
    {
        "group1": [""],
        "group2": [""],
        "meandiff": [""],
        "lower": [""],
        "upper": [""],
        "reject": [""],
        "p-adj": [""],
        "cytokine": [""],
    }
)

In [6]:
# perform anova on each column of the data frame with oneb_meta as the groupby
num = 0
alpha = 0.05
alpha_adj = alpha / (len(df.columns) - 1)
for i in df.columns:
    for treatment in list_of_treatments:
        if i == "oneb_Treatment_Dose_Inhibitor_Dose":
            continue
        one_way_anova = stats.f_oneway(
            df[i][df["oneb_Treatment_Dose_Inhibitor_Dose"] == treatment],
            df[i][df["oneb_Treatment_Dose_Inhibitor_Dose"] != treatment],
        )
        if one_way_anova.pvalue < alpha:
            num += 1
            tukey = pairwise_tukeyhsd(
                endog=df[i],
                groups=df["oneb_Treatment_Dose_Inhibitor_Dose"],
                alpha=alpha_adj,
            )
            # send the results to a dataframe
            tukey_results = pd.DataFrame(
                data=tukey._results_table.data[1:], columns=tukey._results_table.data[0]
            )
            tukey_results["cytokine"] = f"{i}"
            # concat the results to the blank df
            final_df_tukey = pd.concat([final_df_tukey, tukey_results], axis=0)
        else:
            pass
print(
    f"Out of the {len(df.columns ) - 1} cytokines tested, {num} were significantly different between groups (p < {alpha})"
)

In [None]:
# check for blank first row...
final_df_tukey.head(3)

In [None]:
# remove first row as it is blank fro some reason
final_df_tukey = final_df_tukey.iloc[1:]
final_df_tukey.head(3)

Clean up the data and filter out tests that are not significant.

In [None]:
# drop rows in pvalue column that are over 0.05
final_df_tukey = final_df_tukey[final_df_tukey["p-adj"] < 0.05]

In [None]:
# sort the df by p-adj
final_df_tukey = final_df_tukey.sort_values(by=["p-adj"], ascending=[True])

# filter the data for significanct post hoc tests
If we see two high dose groups of pyroptotic treatments in this p-adj value < 0.05 data then we can toss it. 
This implies a variable treatment. 
We are primarily interested in which cytokines best differentiate between control, apoptosis, and pyroptosis

In [None]:
final_df_tukey["cytokine"].unique()
# create output path for the df
output_path = pathlib.Path(f"./results/tukey_filtered_nomic_results.csv")
# save the df
final_df_tukey.to_csv(output_path)

In [None]:
# graph each cytokine
for col in final_df_tukey["cytokine"].unique():
    sns.barplot(
        x="oneb_Treatment_Dose_Inhibitor_Dose",
        y=col,
        data=df,
        capsize=0.2,
        order=list_of_treatments,
    )
    plt.title(col)
    plt.xticks(rotation=90)
    plt.show()
# feature pick
cytokines = [
    "Activin A [NSU]",
    "IL-1 alpha [NSU]",
    "IL-1 beta [NSU]",
    "Oncostatin M (OSM) [NSU]",
    "IFN gamma [NSU]",
    "Osteopontin (OPN) [NSU]",
    "TNF alpha [NSU]",
    "EMMPRIN [NSU]",
    "G-CSF [NSU]",
    "MMP-9 [NSU]",
    "IL-6 [NSU]",
    "MIF [NSU]",
    "IL-16 [NSU]",
    "IL-22 [NSU]",
    "IL-18 [NSU]",
    "CCL24 [NSU]",
    "CCL20 [NSU]",
    "CXCL11 [NSU]",
    "CXCL1 [NSU]",
]

In [None]:
# drop all columns that are not in cytokines list
selected_cytokines = df[cytokines]


# plot the results of the tukey test for each cytokine
a = len(selected_cytokines.columns)
b = 6
plt.figure(figsize=(50, 100))
plt.suptitle("Cytokine Levels Across Treatments", fontsize=18)
plt.subplots_adjust(top=0.975, bottom=0.01, hspace=1, wspace=0.3)
# plt.tight_layout()
# plt.tight_layout()
for col in enumerate(selected_cytokines.columns):
    # print(col)
    # barplot with confidence intervals for col in final_df_tukey['cytokine'].unique():
    # plt.figure(figsize=(6, 4))
    plt.subplot(a, b, col[0] + 1)
    sns.barplot(
        x="oneb_Treatment_Dose_Inhibitor_Dose",
        y=col[1],
        data=df,
        capsize=0.2,
        order=list_of_treatments,
    )
    # # title
    plt.title(col[1])
    # rotate xticks 90 degrees
    plt.xticks(rotation=90)
# set path for saving plot
pathlib.Path(f"./figures/").mkdir(parents=True, exist_ok=True)
# save plot
plt.savefig(f"./figures/selected_cytokines.png", bbox_inches="tight")
# # show plot
plt.show()
# # close plot
# # plt.close()

In [None]:
# save the final_df_tukey df to a csv file
final_df_tukey.to_csv("results/tukey_test_results.csv", index=False)

# write the cytokines column to a csv file
cytokines
with open("results/cytokines.csv", "w") as f:
    f.write("cytokine\n")
    for item in cytokines:
        f.write(f"{item}\n")
    f.close()

## Heatmaps of cytokine levels in each treatment

In [None]:
df_cytokines = df[cytokines]
df_cytokines = pd.concat(
    [df["oneb_Treatment_Dose_Inhibitor_Dose"], df_cytokines], axis=1
)
df_cytokines = df_cytokines.set_index("oneb_Treatment_Dose_Inhibitor_Dose")

In [None]:
cytokines

In [None]:
# data_new['Inducer1_and_dose'] = df['Inducer1_and_dose']
# aggregate the data by treatment group via mean
data_agg = df_cytokines.groupby("oneb_Treatment_Dose_Inhibitor_Dose").mean()
# heatmap of umap_clusters_with_cytokine_data_agg
# subset the columns to plot
column_list = [col for col in data_agg.columns if "[NSU]" in col]
# subset the rows to plot and label the rows with treatment groups
row_list = data_agg.index
# subset the data to plot
data = data_agg[column_list]

In [None]:
# order the rows by treatment group
data_agg = data_agg.reindex(list_of_treatments, axis=0)

In [None]:
data_agg

In [None]:
# create the heatmap with dendrogram and cluster the rows and columns with the euclidean distance metric
# order the rows and columns by the linkage matrix generated by the clustering algorithm
# import linkage from scipy.cluster.hierarchy to cluster the rows and columns
# define the linkage matrix
linkage_df = linkage(
    data_agg.T, metric="euclidean", method="ward", optimal_ordering=True
)
g = sns.clustermap(
    data_agg.T,
    cmap="viridis",
    metric="euclidean",
    method="ward",
    row_cluster=True,
    col_cluster=False,
    row_linkage=linkage_df,
    col_linkage=linkage_df,
    xticklabels=True,
    yticklabels=True,
    vmin=0,
    vmax=1,
    # cbar_kws={"label": "Cytokine Levels"},
    # dendrogram_ratio=0.25,
    # set colorbar position to the right
    # cbar_pos=(1.02, 0.25, 0.03, 0.5),
    # set plot size
    # figsize=(25, 25),
)
# change the font size of the x and y ticks
# g.ax_heatmap.tick_params(labelsize=20)
# g.ax_heatmap.set_xlabel("Cytokines", fontsize=30)

# save the heatmap
plt.savefig("./figures/heatmap_SHSY5Y.png", bbox_inches="tight")
# show the heatmap
plt.show()