In [1]:
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import toml
import umap

In [2]:
nELISA_plate_430420_SHSY5Y_path = pathlib.Path(
    "../../Data/clean/Plate2/nELISA_plate_430420_SHSY5Y_clean.parquet"
)
manual_cluster_1_path = pathlib.Path(
    "../../Data/clean/Plate2/Manual_Treatment_Clusters_1.csv"
)

manual_cluster_2_path = pathlib.Path(
    "../../Data/clean/Plate2/Manual_Treatment_Clusters_2.csv"
)

treatment_clusters_path = pathlib.Path(
    "../../../1.Exploratory_Data_Analysis/utils/params.toml"
)


nELISA_plate_430420_SHSY5Y = pd.read_parquet(nELISA_plate_430420_SHSY5Y_path)
manual_clusters_1 = pd.read_csv(manual_cluster_1_path)
manual_clusters_2 = pd.read_csv(manual_cluster_2_path)
treatments = toml.load(treatment_clusters_path)["list_of_treatments"]["treatments"]

nELISA_original_plate = nELISA_plate_430420_SHSY5Y.copy()

In [3]:
# select data only columns and make floats
nELISA_data_values = nELISA_original_plate.filter(like="NSU", axis=1).astype("float")
nELISA_data_values.head()

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
0,0.595518,0.564662,0.538727,0.751911,0.79671,0.50657,0.557007,0.0,0.13011,0.585972,...,0.318056,0.180171,0.524795,0.299154,0.296559,0.27815,0.57092,0.344809,0.232809,0.725177
1,0.549004,0.856272,0.127459,0.070602,0.810357,0.35985,0.600601,0.682277,0.288149,0.681883,...,0.356741,0.305277,0.271509,0.767602,0.26011,0.250799,0.457524,0.225962,0.300728,0.459638
2,0.891845,0.640559,0.620414,0.142671,0.187645,0.0,0.440517,0.651019,0.548641,0.01408,...,0.488387,0.179485,0.317863,0.645845,0.369798,0.0,0.685494,0.207687,0.754146,0.679904
3,0.739737,0.987796,0.123841,0.579838,0.483562,0.599216,0.454416,0.407962,0.568372,0.58832,...,0.261839,0.201939,0.369552,0.450109,0.219126,0.335244,0.537461,0.483328,0.533404,0.18747
4,0.489414,0.684927,0.359885,0.52906,0.684803,0.484723,0.714058,0.532762,0.488618,0.472403,...,0.328667,0.134393,0.51791,0.455387,0.456338,0.144323,0.338754,0.748028,0.447766,0.73361


In [4]:
print(
    f"""
NSU nELISA mean of Activin A: {nELISA_data_values['Activin A [NSU]'].mean()}
NSU nELISA STDEV of Activin A: {nELISA_data_values['Activin A [NSU]'].std()}
NSU nELISA min of Activin A: {nELISA_data_values['Activin A [NSU]'].min()}
NSU nELISA max of Activin A: {nELISA_data_values['Activin A [NSU]'].max()}
"""
)


NSU nELISA mean of Activin A: 0.5127904493184683
NSU nELISA STDEV of Activin A: 0.19111781098451824
NSU nELISA min of Activin A: 0.0
NSU nELISA max of Activin A: 1.0



In [5]:
# rename columns to remove special character "/"
# replace with "/" in the column names file nameing is not possible
nELISA_original_plate.columns = nELISA_original_plate.columns.str.replace("/", "_")

# set umap parameters
umap_params = umap.UMAP(
    n_neighbors=6,
    min_dist=0.8,
    n_components=2,
    metric="cosine",
    spread=1.1,
    init="random",
    random_state=0,
)

In [6]:
# fit and transform data for umap
proj_2d = umap_params.fit_transform(nELISA_data_values)

# add umap coordinates to dataframe of metadata and raw data
nELISA_original_plate["umap_1"] = proj_2d[:, 0]
nELISA_original_plate["umap_2"] = proj_2d[:, 1]

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


In [7]:
# define output paths
nELISA_plate_430420_out_path = pathlib.Path(
    "./results/nELISA_plate_430420_umap_SHSY5Y.csv"
)
# write to csv
nELISA_original_plate.to_csv(nELISA_plate_430420_out_path, index=False)

### Selected Treatments

In [8]:
# select treatments from the list of treatments from the df
nELISA_plate_430420_SHSY5Y_treatments = nELISA_plate_430420_SHSY5Y[
    nELISA_plate_430420_SHSY5Y["oneb_Treatment_Dose_Inhibitor_Dose"].isin(treatments)
]
# select data only columns and make floats
nELISA_plate_430420_SHSY5Y_treatments_values = (
    nELISA_plate_430420_SHSY5Y_treatments.filter(like="NSU", axis=1).astype("float")
)

# fit and transform data for umap
proj_2d = umap_params.fit_transform(nELISA_plate_430420_SHSY5Y_treatments_values)

# add umap coordinates to dataframe of metadata and raw data
nELISA_plate_430420_SHSY5Y_treatments["umap_1"] = proj_2d[:, 0]
nELISA_plate_430420_SHSY5Y_treatments["umap_2"] = proj_2d[:, 1]

# define output paths
nELISA_plate_430420_selected_treatments_out_path = pathlib.Path(
    "./results/nELISA_plate_430420_umap_SHSY5Y_selected_treatments.csv"
)
# write to csv
nELISA_plate_430420_SHSY5Y_treatments.to_csv(
    nELISA_plate_430420_selected_treatments_out_path, index=False
)

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nELISA_plate_430420_SHSY5Y_treatments["umap_1"] = proj_2d[:, 0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nELISA_plate_430420_SHSY5Y_treatments["umap_2"] = proj_2d[:, 1]
