In [1]:
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly_express as px
import seaborn as sns
import umap

In [2]:
nELISA_plate_430420_SH_SY5Y_path = pathlib.Path(
    "../../Data/clean/Plate2/nELISA_plate_430420_SH_SY5Y.csv"
)
manual_cluster_path = pathlib.Path(
    "../../Data/clean/Plate2/Manual_Treatment_Clusters.csv"
)


nELISA_plate_430420_SH_SY5Y = pd.read_csv(nELISA_plate_430420_SH_SY5Y_path)
manual_clusters = pd.read_csv(manual_cluster_path)

nELISA_orgingal_plate = nELISA_plate_430420_SH_SY5Y.copy()

In [3]:
# select data only columns and make floats
nELISA_data_values = nELISA_orgingal_plate.filter(like="NSU", axis=1)
nELISA_data_values = nELISA_data_values.astype("float")
nELISA_data_values.head()

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
0,0.050305,-0.046947,1.361487,0.553065,3.16121,0.323546,0.200321,-4.628914,-1.455375,1.411887,...,-1.053944,1.931219,0.43912,-1.538831,30.157265,-0.733615,0.413442,-0.897618,-1.841281,0.048389
1,-0.136531,1.014299,-1.82931,-2.86016,3.326872,-0.29377,0.520861,1.004449,-0.906373,2.202077,...,-0.871663,3.274879,-0.738697,0.816301,29.581649,-0.939235,-0.385612,-1.410167,-1.406394,-0.863555
2,1.240569,0.229261,1.995245,-2.499106,-4.232099,-1.807823,-0.656196,0.746356,-0.001466,-3.299824,...,-0.25136,1.923855,-0.523146,0.204165,31.313849,-2.824671,1.220794,-1.488981,1.496881,-0.107094
3,0.629592,1.492948,-1.85738,-0.308989,-0.640024,0.713347,-0.554004,-1.260493,0.067077,1.431232,...,-1.318833,2.165008,-0.282784,-0.779901,28.934441,-0.304395,0.177668,-0.300231,0.083451,-1.798261
4,-0.375887,0.390727,-0.026048,-0.563376,1.802794,0.231624,1.355083,-0.230052,-0.209977,0.476215,...,-1.003948,1.439554,0.407104,-0.753367,32.680499,-1.739692,-1.222531,0.841332,-0.464893,0.077351


In [4]:
# normalize data via max value in each column
max_values = nELISA_data_values.max()  # find max value in each column
nELISA_data_values_sensor_max_norm = nELISA_data_values.div(
    max_values
)  # divide each value in each column by max value in that column
nELISA_data_values_sensor_max_norm.head()

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
0,0.030033,-0.030537,0.275591,0.307953,0.561603,0.134832,0.057937,-1.275958,-0.929072,0.292741,...,-0.488091,0.179877,0.165775,-0.775352,0.730801,-0.156319,0.120292,-0.465569,-0.59955,0.048768
1,-0.081511,0.659766,-0.370288,-1.59257,0.591034,-0.122423,0.150645,0.276876,-0.578604,0.456579,...,-0.403676,0.305027,-0.278871,0.4113,0.716852,-0.200133,-0.112195,-0.731414,-0.457944,-0.870331
2,0.740639,0.149126,0.403876,-1.391531,-0.751851,-0.753377,-0.189787,0.205733,-0.000936,-0.684186,...,-0.116407,0.179191,-0.197497,0.10287,0.758829,-0.601883,0.355194,-0.772292,0.487408,-0.107934
3,0.375876,0.97111,-0.37597,-0.172049,-0.113703,0.297274,-0.16023,-0.347454,0.04282,0.296752,...,-0.610764,0.201652,-0.106756,-0.392959,0.701168,-0.064861,0.051693,-0.155721,0.027173,-1.81237
4,-0.22441,0.254154,-0.005273,-0.313694,0.320274,0.096525,0.39192,-0.063414,-0.134044,0.098739,...,-0.464938,0.134082,0.153689,-0.37959,0.791947,-0.370695,-0.3557,0.436375,-0.151376,0.077958


In [5]:
# print mean and stdev of first data column before and after normalization to check normalization
print(f"NSU nELISA mean of Activin A: {nELISA_data_values['Activin A [NSU]'].mean()}")
print(f"NSU nELISA STDEV of Activin A: {nELISA_data_values['Activin A [NSU]'].std()}")

print(
    f"NSU sensor normalized nELISA mean of Activin A: {nELISA_data_values_sensor_max_norm['Activin A [NSU]'].mean()}"
)
print(
    f"NSU sensor normalized nELISA STDEV of Activin A: {nELISA_data_values_sensor_max_norm['Activin A [NSU]'].std()}"
)

NSU nELISA mean of Activin A: -0.281989801113817
NSU nELISA STDEV of Activin A: 0.7676688394550193
NSU sensor normalized nELISA mean of Activin A: -0.16835212877937208
NSU sensor normalized nELISA STDEV of Activin A: 0.4583097786138692


In [6]:
# rename columns to remove special character "/"
nELISA_orgingal_plate.columns = nELISA_orgingal_plate.columns.str.replace("/", "_")

# set umap parameters
umap_params = umap.UMAP(
    n_neighbors=6,
    min_dist=0.8,
    n_components=2,
    metric="cosine",
    spread=1.1,
    init="random",
    random_state=0,
)

# fit and transform data for umap
proj_2d = umap_params.fit_transform(nELISA_data_values_sensor_max_norm)

# add umap coordinates to dataframe of metadata and raw data
nELISA_orgingal_plate["umap_1"] = proj_2d[:, 0]
nELISA_orgingal_plate["umap_2"] = proj_2d[:, 1]

# add manual clusters columns to dataframe
nELISA_plate_430420 = pd.merge(
    nELISA_orgingal_plate, manual_clusters, on=("inducer1", "inhibitor"), how="inner"
)

nELISA_plate_430420["inducer1_plus_concentration"] = (
    nELISA_plate_430420["inducer1"]
    + "_"
    + nELISA_plate_430420["inducer1_concentration"]
)

nELISA_plate_430420["inducer1_plus_inhibitor"] = (
    nELISA_plate_430420["inducer1"] + "_" + nELISA_plate_430420["inhibitor"]
)

nELISA_plate_430420["inducer1_plus_concentration_plus_inhibitor"] = (
    nELISA_plate_430420["inducer1"]
    + "_"
    + nELISA_plate_430420["inducer1_concentration"]
    + "_"
    + nELISA_plate_430420["inhibitor"]
)

# define output paths
nELISA_plate_430420_out_path = pathlib.Path(
    "./results/nELISA_plate_430420_umap_SH-SY5Y.csv"
)
# write to csv
nELISA_plate_430420.to_csv(nELISA_plate_430420_out_path, index=False)