## Nucleus morphology UMAP

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize, feature_select
from pycytominer.cyto_utils import infer_cp_features

import umap

In [2]:
# Focus on gasdermin cols
gasdermin_cols = [
    "TranslocatedNuclei_Intensity_IntegratedIntensity_CorrGasderminD",
    "DilatedNuclei_Intensity_IntegratedIntensity_CorrGasderminD",
    "TranslocatedNuclei_Neighbors_NumberOfNeighbors_Expanded"
]

In [3]:
# Load single cell profiles
feature_dir = pathlib.Path("..", "3.process_features", "data")

# This determines the file
file_prefix = "interstellar_wave1_dilate"
file_suffix = "_sc.csv.gz"
dilation_factor = 50

cp_file = pathlib.Path(feature_dir, f"{file_prefix}{dilation_factor}{file_suffix}")
output_umap_file = pathlib.Path("results", f"umap_embeddings_dilation{dilation_factor}.csv.gz")

# Load data
cp_df = pd.read_csv(cp_file, low_memory=False)

# Remove outliers
cp_df = (
    cp_df
    .query("DilatedNuclei_Intensity_IntegratedIntensity_CorrGasderminD < 100")
    .reset_index(drop=True)
)

print(cp_df.shape)
cp_df.head()

(17296, 841)


Unnamed: 0,Metadata_wellName,Metadata_row,Metadata_col,Metadata_alias,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_TranslocatedNuclei_Parent_DilatedNuclei,...,Nuclei_Texture_SumVariance_CorrGasderminD_3_02_256,Nuclei_Texture_SumVariance_CorrGasderminD_3_03_256,Nuclei_Texture_Variance_CorrDNA_3_00_256,Nuclei_Texture_Variance_CorrDNA_3_01_256,Nuclei_Texture_Variance_CorrDNA_3_02_256,Nuclei_Texture_Variance_CorrDNA_3_03_256,Nuclei_Texture_Variance_CorrGasderminD_3_00_256,Nuclei_Texture_Variance_CorrGasderminD_3_01_256,Nuclei_Texture_Variance_CorrGasderminD_3_02_256,Nuclei_Texture_Variance_CorrGasderminD_3_03_256
0,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,1,...,0.562764,0.519955,5.453863,5.338515,5.496212,5.341068,0.208618,0.20586,0.207914,0.20388
1,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,4,...,0.562764,0.519955,5.453863,5.338515,5.496212,5.341068,0.208618,0.20586,0.207914,0.20388
2,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,2,...,0.606445,0.583598,1.243379,1.224368,1.241659,1.238014,0.248763,0.246857,0.247815,0.248309
3,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,3,...,0.630966,0.607802,1.578343,1.586902,1.568943,1.577166,0.249089,0.249433,0.24949,0.249476
4,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,5,...,0.635611,0.585116,3.693021,3.61539,3.75539,3.691136,0.240287,0.240656,0.24094,0.242418


In [4]:
# Process only the nucleus features through the pipeline
# Remove all information from gasdermin channel
nucleus_features = infer_cp_features(cp_df, compartments="Nuclei")
nucleus_features = [x for x in nucleus_features if "Gasdermin" not in x]

metadata_features = infer_cp_features(cp_df, metadata=True)

cp_norm_fs_df = normalize(
    profiles=cp_df,
    features=nucleus_features,
    method="standardize"
)

ops = [
    "variance_threshold",
    "correlation_threshold",
    "blocklist",
    "drop_na_columns"
]
cp_norm_fs_df = feature_select(
    profiles=cp_norm_fs_df,
    features=nucleus_features,
    operation=ops,
    na_cutoff=0
)

print(cp_norm_fs_df.shape)
cp_norm_fs_df.head()

(17296, 89)


Unnamed: 0,Metadata_wellName,Metadata_row,Metadata_col,Metadata_alias,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_TranslocatedNuclei_Parent_DilatedNuclei,...,Nuclei_Texture_Correlation_CorrDNA_3_00_256,Nuclei_Texture_Correlation_CorrDNA_3_01_256,Nuclei_Texture_Correlation_CorrDNA_3_02_256,Nuclei_Texture_Correlation_CorrDNA_3_03_256,Nuclei_Texture_DifferenceEntropy_CorrDNA_3_02_256,Nuclei_Texture_DifferenceVariance_CorrDNA_3_03_256,Nuclei_Texture_InfoMeas1_CorrDNA_3_00_256,Nuclei_Texture_InfoMeas1_CorrDNA_3_01_256,Nuclei_Texture_InfoMeas1_CorrDNA_3_02_256,Nuclei_Texture_InfoMeas1_CorrDNA_3_03_256
0,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,1,...,0.860423,0.845928,0.722466,0.688251,0.179948,-0.419181,-0.558412,-0.422113,-0.319445,-0.303139
1,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,4,...,0.860423,0.845928,0.722466,0.688251,0.179948,-0.419181,-0.558412,-0.422113,-0.319445,-0.303139
2,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,2,...,-1.25484,-1.631391,-2.007997,-1.616413,-0.369674,0.622749,1.091174,1.518552,1.75261,1.497787
3,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,3,...,-0.037423,-0.213935,-0.883704,-0.072768,-0.417221,0.669601,-0.184422,0.16388,0.749631,0.088193
4,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,5,...,-1.386999,-1.839468,-1.384641,-1.373801,0.780289,-0.677501,1.669752,1.603315,1.717936,1.47023


In [5]:
# Fit UMAP
cp_feature_df = cp_norm_fs_df.drop(metadata_features, axis="columns")

umap_fit = umap.UMAP(random_state=42, n_components=2)

embeddings_df = pd.DataFrame(
    umap_fit.fit_transform(cp_feature_df), columns=["UMAP0", "UMAP1"]
)

embeddings_df = pd.concat([
    cp_norm_fs_df.loc[:, metadata_features],
    embeddings_df,
    cp_df.loc[:, gasdermin_cols]
], axis="columns")

dose_recode = {
    "0": "low",
    "0.1µM": "low",
    "1µM": "low",
    "1µg/ml + 1µM": "low",
    "2.5µM": "high",
    "10µM": "high",
    "1µg/ml + 10µM": "high"
}
embeddings_df = embeddings_df.assign(
    Metadata_dose_recode=embeddings_df.Metadata_dose.replace(dose_recode)
)

# Output file
embeddings_df.to_csv(output_umap_file, index=False)

print(embeddings_df.shape)
embeddings_df.head()

(17296, 19)


Unnamed: 0,Metadata_wellName,Metadata_row,Metadata_col,Metadata_alias,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_TranslocatedNuclei_Parent_DilatedNuclei,Metadata_TranslocatedNuclei_Parent_Nuclei,Metadata_DilatedNuclei_Number_Object_Number,Metadata_Nuclei_Number_Object_Number,UMAP0,UMAP1,TranslocatedNuclei_Intensity_IntegratedIntensity_CorrGasderminD,DilatedNuclei_Intensity_IntegratedIntensity_CorrGasderminD,TranslocatedNuclei_Neighbors_NumberOfNeighbors_Expanded,Metadata_dose_recode
0,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,1,1,1,1,9.128251,8.58147,17.836798,19.612568,3,high
1,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,4,1,4,1,9.154372,8.57803,15.1047,15.497138,5,high
2,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,2,2,2,2,5.802378,4.135841,14.489743,19.689052,3,high
3,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,3,3,3,3,10.566055,0.882976,42.425449,62.860749,3,high
4,A10,1,10,9,Disulfiram,2.5µM,19,70117_20230118MM1_Ab Test_V2,A10,5,4,5,4,8.293353,1.635236,35.623372,57.850413,4,high
