# Correlation of Nomic Features
Each nELISA features in used for the correlation of the wells.
The correlation of the aggregated features is calculated per:
* well
* per treatment
* per selected treatment

In [1]:
import pathlib

import numpy as np
import pandas as pd

In [2]:
# Parameters
cell_type = "PBMC"

In [3]:
# set import data paths
nomic_df_path = pathlib.Path(
    f"../2.Nomic_nELISA_Analysis/Data/clean/Plate2/nELISA_plate_430420_{cell_type}.csv"
)

# read in data
nomic_df_raw = pd.read_csv(nomic_df_path)

In [4]:
# remove column if colname has pgml in it
nomic_df = nomic_df_raw.loc[
    :, ~nomic_df_raw.columns.str.contains("pgml", case=False, na=False)
]
# if column does not contain [NSU] then prefix with Metadata_
for col in nomic_df.columns:
    if not any(x in col for x in ["NSU"]):
        nomic_df = nomic_df.rename(columns={col: "Metadata_" + col})

In [5]:
## Clean up df
# remove uM in each row of the Metadata_inducer1_concentration column if it is present
# if "inducer1_concentration_value" in nomic_df.columns:
#     nomic_df["inducer1_concentration_value"] = nomic_df[
#         "inducer1_concentration_value"
#     ].str.replace("µM", "")
# replace nan values with 0
nomic_df["Metadata_inducer1_concentration_value"] = nomic_df[
    "Metadata_inducer1_concentration_value"
].fillna(0)
nomic_df["Metadata_inducer2_concentration_value"] = nomic_df[
    "Metadata_inducer2_concentration_value"
].fillna(0)
nomic_df["Metadata_inhibitor_concentration_value"] = nomic_df[
    "Metadata_inhibitor_concentration_value"
].fillna(0)
# treatment column merge
conditions = [
    (nomic_df["Metadata_inducer2"].isnull()),
    nomic_df["Metadata_inducer2"].notnull(),
]
results = [
    (nomic_df["Metadata_inducer1"]).astype(str),
    (nomic_df["Metadata_inducer1"] + "_" + nomic_df["Metadata_inducer2"]).astype(str),
]
nomic_df["Metadata_Treatment"] = np.select(condlist=conditions, choicelist=results)

# dose column merge
conditions = [
    (nomic_df["Metadata_inducer2"].isnull()),
    nomic_df["Metadata_inducer2"].notnull(),
]

results = [
    (nomic_df["Metadata_inducer1_concentration_value"].astype(str)).astype(str),
    (
        nomic_df["Metadata_inducer1_concentration_value"].astype(str)
        + "_"
        + nomic_df["Metadata_inducer2_concentration_value"].astype(str)
    ).astype(str),
]
nomic_df["Metadata_Dose"] = np.select(condlist=conditions, choicelist=results)
nomic_df["Metadata_inducer1_concentration_value"] = pd.to_numeric(
    nomic_df["Metadata_inducer1_concentration_value"]
)
# one beta of inudcer1, inducer1 concentration, inhibitor, and inhibitor concentration all as 1 beta term
nomic_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = (
    nomic_df["Metadata_Treatment"]
    + "_"
    + nomic_df["Metadata_Dose"].astype(str)
    + "_"
    + nomic_df["Metadata_inhibitor"].astype(str)
    + "_"
    + nomic_df["Metadata_inhibitor_concentration_value"].astype(str)
).astype(str)

In [6]:
nomic_cleaned = nomic_df.copy()
# drop first column of metadata
nomic_df.columns[3:25]
nomic_df = nomic_df.drop(nomic_df.columns[3:25], axis=1)
nomic_df = nomic_df.drop(nomic_df.columns[0:2], axis=1)
nomic_df.drop(nomic_df.columns[0], axis=1, inplace=True)
# drop Metadata_Dose column
nomic_df = nomic_df.drop(["Metadata_Dose"], axis=1)
nomic_df = nomic_df.drop(["Metadata_Treatment"], axis=1)
nomic_df = nomic_df.drop(["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"], axis=1)

# Normalization of Values

In [7]:
# min-max normalization of nomic data from scipy
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
nomic_df = pd.DataFrame(scaler.fit_transform(nomic_df), columns=nomic_df.columns)

In [8]:
# summary statistics of df to check min-max normalization
nomic_df.describe()

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
count,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0,...,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0
mean,0.210367,0.475921,0.419877,0.465889,0.455548,0.377734,0.570632,0.355978,0.452083,0.355345,...,0.473808,0.418869,0.402429,0.448119,0.462338,0.54901,0.420083,0.314011,0.447576,0.348847
std,0.272208,0.198562,0.258954,0.178171,0.182614,0.18363,0.17521,0.167947,0.165232,0.174296,...,0.194492,0.178115,0.168891,0.159872,0.220829,0.169934,0.203106,0.224093,0.185292,0.17205
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.051578,0.318037,0.230291,0.343561,0.314063,0.2526,0.434223,0.251276,0.357886,0.239875,...,0.328619,0.302704,0.281719,0.365503,0.312435,0.423527,0.29135,0.166779,0.321669,0.228874
50%,0.072591,0.463071,0.359486,0.447928,0.439187,0.381498,0.586944,0.341592,0.428599,0.315487,...,0.466609,0.404098,0.404436,0.436647,0.446795,0.561156,0.397135,0.236412,0.453741,0.313567
75%,0.274888,0.597291,0.631269,0.570563,0.571771,0.490413,0.704031,0.438899,0.523628,0.442596,...,0.590691,0.541472,0.510351,0.543357,0.576491,0.66942,0.527872,0.409108,0.576348,0.45712
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [9]:
# add position_x back to df
nomic_df.loc[:, "Metadata_position_x"] = nomic_df_raw["position_x"]

# Correlation of Wells

In [10]:
# set index to Metadata_Well
nomic_df = nomic_df.set_index("Metadata_position_x")

In [11]:
well_corr_df = nomic_df.T.corr()
save_path = pathlib.Path(f"./results/correlation/{cell_type}/nomic/")
save_path.mkdir(parents=True, exist_ok=True)
well_corr_df.to_csv(f"{save_path}/wells_corr.csv")

# All Treatment correlation

In [12]:
nomic_df.reset_index(inplace=True)
nomic_df.drop(["Metadata_position_x"], axis=1, inplace=True)
nomic_df.loc[:, "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = nomic_cleaned[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
]
# groupby Metadata_Treatment_Dose_Inhibitor_Dose
nomic_df = nomic_df.groupby("oneb_Metadata_Treatment_Dose_Inhibitor_Dose").mean()

In [13]:
well_corr_df = nomic_df.T.corr()
save_path = pathlib.Path(f"./results/correlation/{cell_type}/nomic/")
save_path.mkdir(parents=True, exist_ok=True)
well_corr_df.to_csv(f"{save_path}/treatments_corr.csv")

# Treatment correlation for selected treatments

In [14]:
list_of_treatments = [
    "LPS_0.01_DMSO_0.025",
    "LPS_0.1_DMSO_0.025",
    "LPS_1.0_DMSO_0.025",
    "LPS_10.0_DMSO_0.025",
    "LPS_100.0_DMSO_0.025",
    "DMSO_0.1_DMSO_0.025",
    "Thapsigargin_1.0_DMSO_0.025",
    "Thapsigargin_10.0_DMSO_0.025",
]

In [15]:
nomic_df = nomic_df.reset_index()
# subset the data to only include the treatments of interest from list_of_treatments
nomic_df = nomic_df[
    nomic_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(list_of_treatments)
]
# aggregate by treatment and dose
nomic_df = nomic_df.groupby(["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]).mean()

In [16]:
well_corr_df = nomic_df.T.corr()
save_path = pathlib.Path(f"./results/correlation/{cell_type}/nomic/")
save_path.mkdir(parents=True, exist_ok=True)
well_corr_df.to_csv(f"{save_path}/selected_treatments_corr.csv")