In [7]:
#Imports

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import umap
import os

from ark.analysis import visualize, dimensionality_reduction

## Load Data from a CSV

In [9]:
# Setup filepaths

base_dir = "../data/example_dataset/"
data_dir = os.path.join(base_dir,"spatial_enrichment_input_data/select_cell_data.csv")

In [11]:

all_data = pd.read_csv(data_dir)
all_data.head()

Unnamed: 0.1,Unnamed: 0,SampleID,cellLabelInImage,cellSize,C,Na,Si,HH3,Vimentin,SMA,...,NaKATPase,HLA.Class.1,Ta,Au,Tissue,PatientID,lineage,cell_type,cell_lin,lintype_num
0,33751,64,3,41,0.0,0.902025,0.448598,0.507697,0.631015,0.0,...,0.0,0.580343,0.312942,0.443076,gran_lung,1.0,immune,imm_other,other,4
1,33752,64,4,184,0.0,0.886973,0.454619,0.438676,0.622629,0.0,...,0.340696,0.590069,0.318355,0.459903,gran_lung,1.0,immune,neutrophil,granulocyte,3
2,33753,64,8,155,0.0,0.91702,0.979366,0.602476,0.0,0.0,...,0.034656,0.381503,0.222221,0.34184,gran_lung,1.0,endothelial,endothelial,nonimmune,5
3,33754,64,11,154,0.0,0.889487,0.69826,0.429029,0.0,0.0,...,0.034879,0.413266,0.608899,0.678524,gran_lung,1.0,immune,imm_other,other,4
4,33755,64,12,155,0.0,0.931851,0.824671,0.342842,0.403899,0.0,...,0.266889,0.591914,0.801724,0.802122,gran_lung,1.0,immune,CD14_Mono,myeloid,1


## Visualize Patient-Population Distribution Examples

In [None]:
# Visualzing PatientID vs Cell Type, using default paramters so showing all graphs
visualize.visualize_patient_population_distribution(all_data, "PatientID", "cell_type")

In [None]:
#Visualize SampleID vs cell_type, ommiting total distribution since population same as previous function
visualize.visualize_patient_population_distribution(all_data, "SampleID", "cell_type", show_total_count=False)

In [None]:
# Visualizing PatientID vs Cell Lineage, 
visualize.visualize_patient_population_distribution(all_data, "PatientID", "cell_lin")

In [None]:
# Visualizing cell lineage vs cell type
visualize.visualize_patient_population_distribution(all_data, "cell_lin", "cell_type", show_total_count=False)

In [None]:
# Visualizing cell lineage vs cell type, saving it in the same directory
visualize.visualize_patient_population_distribution(all_data, "cell_lin", "cell_type", show_total_count=False, save_dir = "")

In [None]:
# PCA Projection of data, with all the Target categories, and PatientID as the label. Saves image into current directory.

dimensionality_reduction.visualize_dimensionality_reduction(all_data,['C', 'Na', 'Si', 'HH3',
       'Vimentin', 'SMA', 'Background', 'Collagen.1', 'Lag3', 'CD4', 'CD14',
       'Foxp3', 'PD.1', 'CD31', 'PD.L1', 'E.cadherin', 'Ki67', 'CD209',
       'CD206', 'gdTCR', 'iNOS', 'CD68', 'CD36', 'CD8', 'CD3', 'IDO', 'CD11c',
       'CD163', 'CD20', 'CD16', 'IFNg', 'HLA.DR.DQ.DP', 'CD11b', 'CD45',
       'H3K9Ac', 'Keratin.pan', 'CD103', 'MastChyTry', 'MPO', 'NaKATPase',
       'HLA.Class.1', 'Ta', 'Au'], "PatientID",algorithm="PCA", save_dir="")

In [None]:
# PCA Projection of data, with all the Target categories, and PatientID as the label.

dimensionality_reduction.visualize_dimensionality_reduction(all_data,['C', 'Na', 'Si', 'HH3',
       'Vimentin', 'SMA', 'Background', 'Collagen.1', 'Lag3', 'CD4', 'CD14',
       'Foxp3', 'PD.1', 'CD31', 'PD.L1', 'E.cadherin', 'Ki67', 'CD209',
       'CD206', 'gdTCR', 'iNOS', 'CD68', 'CD36', 'CD8', 'CD3', 'IDO', 'CD11c',
       'CD163', 'CD20', 'CD16', 'IFNg', 'HLA.DR.DQ.DP', 'CD11b', 'CD45',
       'H3K9Ac', 'Keratin.pan', 'CD103', 'MastChyTry', 'MPO', 'NaKATPase',
       'HLA.Class.1', 'Ta', 'Au'], "cell_type",algorithm="PCA")

In [None]:
# tSNE Projection of data, with all the Target categories, and PatientID as the label. Saves image into current directory.

dimensionality_reduction.visualize_dimensionality_reduction(all_data,['C', 'Na', 'Si', 'HH3',
       'Vimentin', 'SMA', 'Background', 'Collagen.1', 'Lag3', 'CD4', 'CD14',
       'Foxp3', 'PD.1', 'CD31', 'PD.L1', 'E.cadherin', 'Ki67', 'CD209',
       'CD206', 'gdTCR', 'iNOS', 'CD68', 'CD36', 'CD8', 'CD3', 'IDO', 'CD11c',
       'CD163', 'CD20', 'CD16', 'IFNg', 'HLA.DR.DQ.DP', 'CD11b', 'CD45',
       'H3K9Ac', 'Keratin.pan', 'CD103', 'MastChyTry', 'MPO', 'NaKATPase',
       'HLA.Class.1', 'Ta', 'Au'], "PatientID",algorithm="tSNE", save_dir="")

In [None]:
# tSNE Projection of data, with all the Target categories, and Cell Type as the label.


dimensionality_reduction.visualize_dimensionality_reduction(all_data,['C', 'Na', 'Si', 'HH3',
       'Vimentin', 'SMA', 'Background', 'Collagen.1', 'Lag3', 'CD4', 'CD14',
       'Foxp3', 'PD.1', 'CD31', 'PD.L1', 'E.cadherin', 'Ki67', 'CD209',
       'CD206', 'gdTCR', 'iNOS', 'CD68', 'CD36', 'CD8', 'CD3', 'IDO', 'CD11c',
       'CD163', 'CD20', 'CD16', 'IFNg', 'HLA.DR.DQ.DP', 'CD11b', 'CD45',
       'H3K9Ac', 'Keratin.pan', 'CD103', 'MastChyTry', 'MPO', 'NaKATPase',
       'HLA.Class.1', 'Ta', 'Au'], "cell_type",algorithm="tSNE")

In [None]:
# UMAP Projection of data, with all the Target categories, and PatientID as the label. Saves image into current directory.


dimensionality_reduction.visualize_dimensionality_reduction(all_data,['C', 'Na', 'Si', 'HH3',
       'Vimentin', 'SMA', 'Background', 'Collagen.1', 'Lag3', 'CD4', 'CD14',
       'Foxp3', 'PD.1', 'CD31', 'PD.L1', 'E.cadherin', 'Ki67', 'CD209',
       'CD206', 'gdTCR', 'iNOS', 'CD68', 'CD36', 'CD8', 'CD3', 'IDO', 'CD11c',
       'CD163', 'CD20', 'CD16', 'IFNg', 'HLA.DR.DQ.DP', 'CD11b', 'CD45',
       'H3K9Ac', 'Keratin.pan', 'CD103', 'MastChyTry', 'MPO', 'NaKATPase',
       'HLA.Class.1', 'Ta', 'Au'], "PatientID",algorithm="UMAP", save_dir="")

In [None]:
# tSNE Projection of data, with all the Target categories, and Cell Type as the label. 

dimensionality_reduction.visualize_dimensionality_reduction(all_data,['C', 'Na', 'Si', 'HH3',
       'Vimentin', 'SMA', 'Background', 'Collagen.1', 'Lag3', 'CD4', 'CD14',
       'Foxp3', 'PD.1', 'CD31', 'PD.L1', 'E.cadherin', 'Ki67', 'CD209',
       'CD206', 'gdTCR', 'iNOS', 'CD68', 'CD36', 'CD8', 'CD3', 'IDO', 'CD11c',
       'CD163', 'CD20', 'CD16', 'IFNg', 'HLA.DR.DQ.DP', 'CD11b', 'CD45',
       'H3K9Ac', 'Keratin.pan', 'CD103', 'MastChyTry', 'MPO', 'NaKATPase',
       'HLA.Class.1', 'Ta', 'Au'], "cell_type",algorithm="UMAP", save_dir="")