In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pathlib
import plotly
import plotly_express as px
import plotly.graph_objs as go
import iplot
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import umap
from sklearn import preprocessing

In [2]:
def umap_graph(df: pd.DataFrame, color: str, save_name: str):
    """plot umap graph

    Parameters
    ----------
    df : pd.DataFrame
        pandas dataframe with umap coordinates
    color : str
        column to color points by
    save_name : str
        name of file to be saved
    """

    # plot UMAP
    fig_2d = px.scatter(
        df,
        x="umap_1",
        y="umap_2",
        color=color,
        labels={"color": "Cell Type"},
        title="UMAP projection of the nELISA data",
    ).update_layout(xaxis_title="UMAP_1", yaxis_title="UMAP_2")

    fig_2d.update_traces(marker={"size": 12})
    fig_2d.show()

    # save UMAP
    # fig_2d.write_html(f"{save_name}.html")
    fig_2d.write_image(f"{save_name}.png")

In [3]:
nELISA_plate_430418_430419_path = pathlib.Path(
    "../Data/clean/nELISA_plate_430418_430419.csv"
)
nELISA_plate_430420_path = pathlib.Path("../Data/clean/nELISA_plate_430420.csv")
manual_clusters_path = pathlib.Path("../Data/Manual_Treatment_Clusters.csv")

nELISA_plate_430418_430419 = pd.read_csv(nELISA_plate_430418_430419_path)
nELISA_plate_430420 = pd.read_csv(nELISA_plate_430420_path)
manual_clusters = pd.read_csv(manual_clusters_path)

In [4]:
# select data only columns and make floats
nELISA_data_values = nELISA_plate_430420.filter(like="NSU", axis=1)
nELISA_data_values = nELISA_data_values.astype("float")
nELISA_data_values.head()

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
0,26.491764,-1.073383,25.817862,-2.435479,-2.556473,-1.25913,-0.043782,-0.661045,-1.50878,-2.046371,...,-0.728423,-0.681505,0.124148,2.617804,-2.210675,-1.433347,1.077095,5.713861,0.055628,-0.889479
1,24.943616,0.718127,26.59841,0.92619,-3.238686,-0.387113,3.650604,-0.227975,0.450686,0.104226,...,-1.296642,-1.387073,-0.140898,-1.198079,-0.128332,-1.550067,-2.169066,6.024132,0.340468,-0.844918
2,1.555501,0.268977,27.596171,-2.300495,-3.045141,-1.129626,2.458642,0.278998,0.062647,-1.1567,...,-0.746483,0.115185,-0.002896,-1.473596,0.235711,-1.449242,1.852248,-0.168032,0.961319,-0.363984
3,-0.92914,0.076314,26.313207,0.253148,-6.751763,-0.539164,-1.718383,1.443255,-0.341732,-0.719875,...,-2.232221,-1.126181,1.062176,1.542644,0.269262,-2.311105,2.088687,-0.306334,0.562805,-0.155359
4,0.398325,-0.130139,8.797693,-0.445542,0.371358,-0.462473,1.422635,-1.915471,-0.748352,-1.583011,...,-1.06539,-0.314291,0.171604,-0.137741,0.373863,-1.042447,-1.016304,-0.289143,0.373108,-0.804121


In [5]:
# normalize data via max value in each column
max_values = nELISA_data_values.max()  # find max value in each column
nELISA_data_values_sensor_max_norm = nELISA_data_values.div(
    max_values
)  # divide each value in each column by max value in that column
nELISA_data_values_sensor_max_norm.head()

Unnamed: 0,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],BMP2 [NSU],BMP3 [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
0,0.780796,-0.497382,0.76875,-0.836129,-0.315212,-0.492554,-0.011993,-0.106441,-0.570024,-0.321927,...,-0.337339,-0.063476,0.042025,1.0,-0.053571,-0.305419,0.23769,0.707199,0.01807,-0.218599
1,0.735167,0.332764,0.791992,0.317972,-0.399328,-0.151433,1.0,-0.036708,0.170271,0.016396,...,-0.600487,-0.129194,-0.047694,-0.457665,-0.00311,-0.330289,-0.478662,0.745601,0.110599,-0.207648
2,0.045846,0.124638,0.821701,-0.789788,-0.375464,-0.441894,0.673489,0.044924,0.023668,-0.181968,...,-0.345703,0.010728,-0.00098,-0.562913,0.005712,-0.308806,0.408748,-0.020797,0.312277,-0.089453
3,-0.027385,0.035362,0.783499,0.086909,-0.832489,-0.210913,-0.470712,0.232392,-0.129108,-0.113248,...,-1.033762,-0.104894,0.35955,0.589289,0.006525,-0.492452,0.460924,-0.037915,0.182823,-0.038181
4,0.01174,-0.060304,0.261959,-0.15296,0.045788,-0.180913,0.389698,-0.308428,-0.282731,-0.249033,...,-0.493392,-0.029274,0.058089,-0.052617,0.00906,-0.222125,-0.224275,-0.035787,0.121201,-0.197622


In [6]:
# print mean and stdev of first data column before and after normalization to check normalization
print(f"NSU nELISA mean of Activin A: {nELISA_data_values['Activin A [NSU]'].mean()}")
print(f"NSU nELISA STDEV of Activin A: {nELISA_data_values['Activin A [NSU]'].std()}")

print(
    f"NSU sensor normalized nELISA mean of Activin A: {nELISA_data_values_sensor_max_norm['Activin A [NSU]'].mean()}"
)
print(
    f"NSU sensor normalized nELISA STDEV of Activin A: {nELISA_data_values_sensor_max_norm['Activin A [NSU]'].std()}"
)

NSU nELISA mean of Activin A: 2.1514411832893092
NSU nELISA STDEV of Activin A: 7.565218374231674
NSU sensor normalized nELISA mean of Activin A: 0.06340974112531376
NSU sensor normalized nELISA STDEV of Activin A: 0.22297078925163913


In [7]:
# rename columns to remove special character "/"
nELISA_plate_430420.columns = nELISA_plate_430420.columns.str.replace("/", "_")

# set umap parameters
umap_params = umap.UMAP(
    n_neighbors=6,
    min_dist=0.8,
    n_components=2,
    metric="cosine",
    spread=1.1,
    init="random",
    random_state=0,
)

# fit and transform data for umap
proj_2d = umap_params.fit_transform(nELISA_data_values_sensor_max_norm)

# add umap coordinates to dataframe of metadata and raw data
nELISA_plate_430420["umap_1"] = proj_2d[:, 0]
nELISA_plate_430420["umap_2"] = proj_2d[:, 1]

# add manual clusters columns to dataframe
nELISA_plate_430420 = pd.merge(
    nELISA_plate_430420, manual_clusters, on=("inducer1", "inhibitor"), how="inner"
)

# define output paths
nELISA_plate_430420_out_path = pathlib.Path("./results/nELISA_plate_430420_umap.csv")
# write to csv
nELISA_plate_430420.to_csv(nELISA_plate_430420_out_path, index=False)

In [8]:
nELISA_plate_430420 = nELISA_plate_430420[
    nELISA_plate_430420.columns.drop(list(nELISA_plate_430420.filter(regex="pgML")))
]
for col in nELISA_plate_430420.columns:
    umap_graph(
        nELISA_plate_430420,
        f"{col}",
        f"./figures/nELISA_Normalized_UMAP_{col}",
    )