# Get 2D UMAP embeddings for NF1 CellProfiler data

## Import libraries

In [1]:
import pathlib
import pandas as pd
import umap

import UMAPutils as utils

## Set paths

In [2]:
norm_data_path = pathlib.Path("../../../4_processing_features/data/nf1_sc_norm_fs_cellprofiler.csv.gz")
norm_data = pd.read_csv(norm_data_path, compression="gzip", index_col=0)

metadata_dataframe, feature_data = utils.split_data(norm_data)

## Get embeddings as a pandas dataframe

In [3]:
fit = umap.UMAP(random_state=0, n_components=2)

embeddings = pd.DataFrame(
        fit.fit_transform(feature_data), columns=["UMAP1", "UMAP2"]
    )
embeddings

Unnamed: 0,UMAP1,UMAP2
0,11.638459,-2.359264
1,11.901725,-2.993272
2,11.517516,-2.876570
3,11.468371,-2.884644
4,12.816878,-1.696046
...,...,...
144,14.656168,1.404544
145,14.670760,-1.818202
146,15.191056,-2.393243
147,14.885182,-1.832831


## Combine metadata with embeddings and save as new `.csv.gz` file

In [4]:
save_path = pathlib.Path('../../data/norm_fs_embeddings_cp.csv.gz')

norm_fs_embeddings_data = utils.merge_metadata_embeddings(metadata_dataframe, embeddings, save_path)
norm_fs_embeddings_data

Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_number_of_singlecells,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_OrigNuclei,Metadata_Cells_Number_Object_Number,Metadata_Nuclei_Number_Object_Number,UMAP1,UMAP2
0,C,6,12,NF1,WT,1,1,C6,1,4,1,4,11.638459,-2.359264
1,C,6,12,NF1,WT,1,1,C6,2,5,2,5,11.901725,-2.993272
2,C,6,12,NF1,WT,1,1,C6,3,7,3,7,11.517516,-2.876570
3,C,6,12,NF1,WT,1,1,C6,4,8,4,8,11.468371,-2.884644
4,C,6,12,NF1,WT,4,1,C6,1,3,1,3,12.816878,-1.696046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144,F,7,46,NF1,Null,32,1,F7,8,12,8,12,14.656168,1.404544
145,F,7,46,NF1,Null,32,1,F7,9,13,9,13,14.670760,-1.818202
146,F,7,46,NF1,Null,32,1,F7,10,14,10,14,15.191056,-2.393243
147,F,7,46,NF1,Null,32,1,F7,11,16,11,16,14.885182,-1.832831
