In [1]:
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import toml
import umap

In [2]:
nELISA_plate_430420_PBMC_path = pathlib.Path(
    "../../Data/clean/Plate2/nELISA_plate_430420_PBMC_clean.parquet"
)
manual_cluster_1_path = pathlib.Path(
    "../../Data/clean/Plate2/Manual_Treatment_Clusters_1.csv"
)

manual_cluster_2_path = pathlib.Path(
    "../../Data/clean/Plate2/Manual_Treatment_Clusters_2.csv"
)

treatment_clusters_path = pathlib.Path(
    "../../../1.Exploratory_Data_Analysis/utils/params.toml"
)


nELISA_plate_430420_PBMC = pd.read_parquet(nELISA_plate_430420_PBMC_path)
manual_clusters_1 = pd.read_csv(manual_cluster_1_path)
manual_clusters_2 = pd.read_csv(manual_cluster_2_path)
treatments = toml.load(treatment_clusters_path)["list_of_treatments"]["treatments"]

nELISA_original_plate = nELISA_plate_430420_PBMC.copy()

In [3]:
# select data only columns and make floats
nELISA_data_values = nELISA_original_plate.filter(like="NSU", axis=1).astype("float")
nELISA_data_values.head()

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
0,0.09771,0.461685,0.270477,0.514695,0.479281,0.270494,0.708849,0.134432,0.350986,0.216932,...,0.386063,0.469875,0.395392,0.560129,0.504521,0.490444,0.258834,0.238358,0.524276,0.25067
1,0.064513,0.451181,0.246274,0.471026,0.269795,0.204498,0.247611,0.322087,0.350642,0.349237,...,0.45446,0.570146,0.032391,0.476656,0.315426,0.589522,0.38117,0.168645,0.455092,0.228752
2,0.06186,0.196318,0.236491,0.474891,0.174672,0.824721,0.704521,0.254823,0.443939,0.268677,...,0.755683,0.374554,0.486915,0.389375,0.369421,0.680276,0.182956,0.263281,0.213596,0.064645
3,0.060998,0.596601,0.129926,0.30261,0.559309,0.087533,0.54111,0.350256,0.52826,0.313411,...,0.254542,0.630644,0.586271,0.258029,0.561051,0.551671,0.582053,0.087565,0.140992,0.234191
4,0.061116,0.490832,0.33951,0.453362,0.414653,0.424223,0.702561,0.203464,0.502516,0.363301,...,0.424098,0.493033,0.171562,0.615867,0.288153,0.506528,0.264141,0.296782,0.541689,0.167078


In [4]:
print(
    f"""
NSU nELISA mean of Activin A: {nELISA_data_values['Activin A [NSU]'].mean()}
NSU nELISA STDEV of Activin A: {nELISA_data_values['Activin A [NSU]'].std()}
NSU nELISA min of Activin A: {nELISA_data_values['Activin A [NSU]'].min()}
NSU nELISA max of Activin A: {nELISA_data_values['Activin A [NSU]'].max()}
"""
)


NSU nELISA mean of Activin A: 0.21036656278540766
NSU nELISA STDEV of Activin A: 0.27220830038007515
NSU nELISA min of Activin A: 0.0
NSU nELISA max of Activin A: 0.9999999999999999



In [5]:
# rename columns to remove special character "/"
# replace with "/" in the column names file nameing is not possible
nELISA_original_plate.columns = nELISA_original_plate.columns.str.replace("/", "_")

# set umap parameters
umap_params = umap.UMAP(
    n_neighbors=6,
    min_dist=0.8,
    n_components=2,
    metric="cosine",
    spread=1.1,
    init="random",
    random_state=0,
)

In [6]:
# fit and transform data for umap
proj_2d = umap_params.fit_transform(nELISA_data_values)

# add umap coordinates to dataframe of metadata and raw data
nELISA_original_plate["umap_1"] = proj_2d[:, 0]
nELISA_original_plate["umap_2"] = proj_2d[:, 1]

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


In [7]:
# define output paths
nELISA_plate_430420_out_path = pathlib.Path(
    "./results/nELISA_plate_430420_umap_PBMC.csv"
)
# write to csv
nELISA_original_plate.to_csv(nELISA_plate_430420_out_path, index=False)

### Selected Treatments

In [8]:
# select treatments from the list of treatments from the df
nELISA_plate_430420_PBMC_treatments = nELISA_plate_430420_PBMC[
    nELISA_plate_430420_PBMC["oneb_Treatment_Dose_Inhibitor_Dose"].isin(treatments)
]
# select data only columns and make floats
nELISA_plate_430420_PBMC_treatments_values = nELISA_plate_430420_PBMC_treatments.filter(
    like="NSU", axis=1
).astype("float")

# fit and transform data for umap
proj_2d = umap_params.fit_transform(nELISA_plate_430420_PBMC_treatments_values)

# add umap coordinates to dataframe of metadata and raw data
nELISA_plate_430420_PBMC_treatments["umap_1"] = proj_2d[:, 0]
nELISA_plate_430420_PBMC_treatments["umap_2"] = proj_2d[:, 1]

# define output paths
nELISA_plate_430420_selected_treatments_out_path = pathlib.Path(
    "./results/nELISA_plate_430420_umap_PBMC_selected_treatments.csv"
)
# write to csv
nELISA_plate_430420_PBMC_treatments.to_csv(
    nELISA_plate_430420_selected_treatments_out_path, index=False
)

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nELISA_plate_430420_PBMC_treatments["umap_1"] = proj_2d[:, 0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nELISA_plate_430420_PBMC_treatments["umap_2"] = proj_2d[:, 1]
