### **Curating HCAHeartST10550732(OCT).h5ad**

Article: Spatially resolved multiomics of human cardiac niches

DOI: https://doi.org/10.1038/s41586-023-06311-1 

Data Source : https://www.heartcellatlas.org

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/data/HCAHeartST10550732.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 2199 × 33538
    obs: 'array_row', 'array_col', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender', 'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP', 'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso', 'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut', 'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic', 'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4', 'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed', 'vCM4', 'vCM5', 'AVN_P_c

##### **X- expression matrix**

In [7]:
# View the expression matrix of the anndata object

In [8]:
adata.X

<2199x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 3398459 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 39)	1.494994
  (0, 46)	0.6231686
  (0, 51)	1.494994
  (0, 53)	0.6231686
  (0, 55)	0.6231686
  (0, 62)	0.6231686
  (0, 66)	1.2794
  (0, 77)	0.6231686
  (0, 78)	1.2794
  (0, 93)	0.6231686
  (0, 98)	0.6231686
  (0, 121)	0.6231686
  (0, 154)	2.2667792
  (0, 156)	1.0041752
  (0, 161)	1.0041752
  (0, 178)	0.6231686
  (0, 185)	0.6231686
  (0, 190)	1.0041752
  (0, 201)	1.6722507
  (0, 215)	0.6231686
  (0, 219)	0.6231686
  (0, 226)	0.6231686
  (0, 236)	0.6231686
  (0, 244)	0.6231686
  (0, 245)	0.6231686
  :	:
  (2198, 33070)	1.2171524
  (2198, 33131)	2.3523483
  (2198, 33133)	1.2171524
  (2198, 33207)	1.7500885
  (2198, 33245)	1.2171524
  (2198, 33254)	1.7500885
  (2198, 33279)	1.2171524
  (2198, 33311)	1.2171524
  (2198, 33396)	1.2171524
  (2198, 33445)	1.2171524
  (2198, 33465)	1.2171524
  (2198, 33474)	1.2171524
  (2198, 33479)	1.2171524
  (2198, 33492)	1.7500885
  (2198, 33496)	5.403185
  (2198, 33497)	5.1062565
  (2198, 33498)	6.2151814
  (2198, 33499)	5.605954
  (2198, 33501)	5.8992

##### **Raw counts matrix**

In [11]:
# If X has normalized counts, check for the raw counts matrix.

In [12]:
#Here the raw counts are provided in a separate object, load the raw counts matrix

In [13]:
araw = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/Raw_counts/visium-OCT_adult-8reg-revision_raw.h5ad')

In [14]:
# view raw object

In [15]:
araw

AnnData object with n_obs × n_vars = 93788 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'region_cell2loc'
    var: 'gene_ids', 'feature_types', 'genome', 'SYMBOL'
    uns: 'spatial'
    obsm: 'MT', 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'

In [16]:
# view raw matrix

In [17]:
araw.X

<93788x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 147751779 stored elements in Compressed Sparse Row format>

In [18]:
print(araw.X)

  (0, 26)	1.0
  (0, 27)	1.0
  (0, 39)	1.0
  (0, 154)	2.0
  (0, 156)	1.0
  (0, 171)	1.0
  (0, 201)	1.0
  (0, 219)	2.0
  (0, 220)	1.0
  (0, 267)	61.0
  (0, 275)	1.0
  (0, 350)	1.0
  (0, 363)	1.0
  (0, 408)	1.0
  (0, 414)	2.0
  (0, 423)	2.0
  (0, 424)	1.0
  (0, 442)	1.0
  (0, 449)	1.0
  (0, 470)	5.0
  (0, 472)	2.0
  (0, 473)	1.0
  (0, 491)	1.0
  (0, 493)	1.0
  (0, 526)	1.0
  :	:
  (93787, 33249)	1.0
  (93787, 33254)	1.0
  (93787, 33294)	1.0
  (93787, 33375)	1.0
  (93787, 33376)	2.0
  (93787, 33399)	1.0
  (93787, 33413)	1.0
  (93787, 33446)	1.0
  (93787, 33451)	1.0
  (93787, 33474)	2.0
  (93787, 33479)	2.0
  (93787, 33490)	1.0
  (93787, 33492)	1.0
  (93787, 33496)	57.0
  (93787, 33497)	31.0
  (93787, 33498)	101.0
  (93787, 33499)	78.0
  (93787, 33500)	3.0
  (93787, 33501)	70.0
  (93787, 33502)	67.0
  (93787, 33503)	33.0
  (93787, 33504)	3.0
  (93787, 33505)	54.0
  (93787, 33506)	9.0
  (93787, 33508)	40.0


In [19]:
# since the raw object is combined one, extract the raw counts for this dataset 

In [20]:
araw = araw[araw.obs['sample']=='HCAHeartST10550732']

In [21]:
araw

View of AnnData object with n_obs × n_vars = 2199 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'region_cell2loc'
    var: 'gene_ids', 'feature_types', 'genome', 'SYMBOL'
    uns: 'spatial'
    obsm: 'MT', 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'

##### **Variables(var)**

In [22]:
#View the var of anndata and raw object

In [23]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome
SYMBOL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38
FAM138A,ENSG00000237613,Gene Expression,GRCh38
OR4F5,ENSG00000186092,Gene Expression,GRCh38
AL627309.1,ENSG00000238009,Gene Expression,GRCh38
AL627309.3,ENSG00000239945,Gene Expression,GRCh38
...,...,...,...
AC233755.2,ENSG00000277856,Gene Expression,GRCh38
AC233755.1,ENSG00000275063,Gene Expression,GRCh38
AC240274.1,ENSG00000271254,Gene Expression,GRCh38
AC213203.1,ENSG00000277475,Gene Expression,GRCh38


In [24]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [25]:
# Check the index column of var. Check whether ensembl ids are provided in the index column of var or not.

In [26]:
# If ensembl ids are not in the index column and is present in another column, set the ensembl ids column as the index column

In [27]:
adata.var['gene_symbols'] = adata.var_names

In [28]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
SYMBOL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
FAM138A,ENSG00000237613,Gene Expression,GRCh38,FAM138A
OR4F5,ENSG00000186092,Gene Expression,GRCh38,OR4F5
AL627309.1,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
AL627309.3,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
AC233755.2,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
AC233755.1,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
AC240274.1,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
AC213203.1,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [29]:
adata.var_names = adata.var['gene_ids']

In [30]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [31]:
# load the approved genes file

In [32]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [33]:
# Create a dictionary from the approved genes file using the symbols and feature id columns.

In [34]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [35]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [36]:
len(genedict)

116184

In [37]:
# Filter out the genes which are not in the approved genes file

In [38]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]
var_to_keep_araw = [x for x in araw.var_names if (x in genedict)]

In [39]:
len(var_to_keep_adata)

33234

In [40]:
len(var_to_keep_araw)

33234

In [41]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [42]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [43]:
# Modify the anndata object by filtering out the filtered genes. copy the index column values to a new column called gene_symbols

In [44]:
adata = adata[:, var_to_keep_adata].copy()
araw = araw[:, var_to_keep_araw].copy()

In [45]:
#  View the var

In [46]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [47]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


feature is filtered

In [48]:
# Assign False since the feature was not filtered out in the normalized matrix (X).

In [49]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [50]:
#View var

In [51]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols,feature_is_filtered
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG,False
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A,False
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5,False
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1,False
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3,False
...,...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2,False
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1,False
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1,False
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1,False


In [52]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [53]:
#  Delete the unwanted columns in adata and araw.

In [54]:
del araw.var['gene_ids']
del araw.var['SYMBOL']
del araw.var['feature_types']
del araw.var['genome']

In [55]:
del adata.var['gene_ids']
del adata.var['gene_symbols']
del adata.var['feature_types']
del adata.var['genome']

In [56]:
# view var

In [57]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids,Unnamed: 1_level_1
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [58]:
araw.var

ENSG00000243485
ENSG00000237613
ENSG00000186092
ENSG00000238009
ENSG00000239945
...
ENSG00000277856
ENSG00000275063
ENSG00000271254
ENSG00000277475
ENSG00000268674


#### **Observations(obs) (Cell metadata)**

In [59]:
#view obs

In [60]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,CD4+T_Th1_abundance,CD4+T_Th2_abundance,CD4+T_reg_abundance,NC5_glial_abundance,aCM5_abundance,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,0.005035,0.013968,0.005701,,,,,,0.002917,0.002645
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,0.002019,0.003444,0.002517,,,,,,0.000937,0.001847
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,0.004092,0.001601,0.000808,,,,,,0.000920,0.003387
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,0.004500,0.002451,0.002876,,,,,,0.004542,0.009823
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,0.012971,0.014325,0.017519,,,,,,0.022167,0.015366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,0.010816,0.013847,0.008572,,,,,,0.017586,0.012906
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,0.001297,0.001993,0.000795,,,,,,0.001272,0.002152
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,0.004881,0.004190,0.003463,,,,,,0.002372,0.005539
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,0.010100,0.010628,0.008103,,,,,,0.003979,0.007415


In [61]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'CD4+T_Th1_abundance', 'CD4+T_Th2_abundance', 'CD4+T_reg_abundance',
       'NC5_glial_abundance', 'aCM5_abundance', 'Adip4_abundance',
       'NC3_glial_abundance', 'NC6_schwann_abundance', 'EC9_FB-like_abundance',
       'gdT_abundance'],
      dtype='object', length=161)

#### **assay_ontology_term_id**

In [62]:
# identify the column in adata which corresponds to assay

In [63]:
list(adata.obs['kit_10x'].unique())

['Visium-v1']

In [64]:
# add the assay_ontology_term_id column

In [65]:
adata.obs['assay_ontology_term_id'] = ['EFO:0010961'] * len(adata.obs)

In [66]:
# change datatype of the column

In [67]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [68]:
# view adata.obs

In [69]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,CD4+T_Th2_abundance,CD4+T_reg_abundance,NC5_glial_abundance,aCM5_abundance,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,0.013968,0.005701,,,,,,0.002917,0.002645,EFO:0010961
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,0.003444,0.002517,,,,,,0.000937,0.001847,EFO:0010961
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,0.001601,0.000808,,,,,,0.000920,0.003387,EFO:0010961
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,0.002451,0.002876,,,,,,0.004542,0.009823,EFO:0010961
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,0.014325,0.017519,,,,,,0.022167,0.015366,EFO:0010961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,0.013847,0.008572,,,,,,0.017586,0.012906,EFO:0010961
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,0.001993,0.000795,,,,,,0.001272,0.002152,EFO:0010961
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,0.004190,0.003463,,,,,,0.002372,0.005539,EFO:0010961
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,0.010628,0.008103,,,,,,0.003979,0.007415,EFO:0010961


#### **cell_type_ontology_term_id**

In [70]:
#get the column in adata.obs related. to cell type annotation

In [71]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'CD4+T_Th2_abundance', 'CD4+T_reg_abundance', 'NC5_glial_abundance',
       'aCM5_abundance', 'Adip4_abundance', 'NC3_glial_abundance',
       'NC6_schwann_abundance', 'EC9_FB-like_abundance', 'gdT_abundance',
       'assay_ontology_term_id'],
      dtype='object', length=162)

In [72]:
adata.obsm

AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf

In [73]:
adata.obsm['means_cell_abundance_w_sf']

Unnamed: 0_level_0,meanscell_abundance_w_sf_Adip1,meanscell_abundance_w_sf_Adip2,meanscell_abundance_w_sf_Adip3,meanscell_abundance_w_sf_B,meanscell_abundance_w_sf_B_plasma,meanscell_abundance_w_sf_CD14+Mo,meanscell_abundance_w_sf_CD16+Mo,meanscell_abundance_w_sf_CD4+T_act,meanscell_abundance_w_sf_CD4+T_naive,meanscell_abundance_w_sf_CD8+T_cytox,...,meanscell_abundance_w_sf_CD4+T_Th1,meanscell_abundance_w_sf_CD4+T_Th2,meanscell_abundance_w_sf_CD4+T_reg,meanscell_abundance_w_sf_NC5_glial,meanscell_abundance_w_sf_aCM5,meanscell_abundance_w_sf_Adip4,meanscell_abundance_w_sf_NC3_glial,meanscell_abundance_w_sf_NC6_schwann,meanscell_abundance_w_sf_EC9_FB-like,meanscell_abundance_w_sf_gdT
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,0.024324,0.013958,0.043447,0.128722,0.121310,0.361681,0.325113,0.172167,0.440548,0.372169,...,0.082620,0.177026,0.101049,,,,,,0.039128,0.035979
HCAHeartST10550732_AAACAGAGCGACTCCT-1,0.029055,0.023092,0.020183,0.073616,0.069824,0.203564,0.053512,0.131965,0.346047,0.090877,...,0.088193,0.106764,0.069585,,,,,,0.027777,0.039242
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,0.012210,0.005849,0.066210,0.057957,0.145274,0.402248,0.057193,0.086324,0.138732,0.089478,...,0.121983,0.060190,0.036304,,,,,,0.027607,0.050742
HCAHeartST10550732_AAACAGGGTCTATATT-1,0.024904,0.009868,0.029493,0.127333,0.033895,0.187862,0.096377,0.090942,0.234676,0.091306,...,0.105472,0.076437,0.080013,,,,,,0.080598,0.096642
HCAHeartST10550732_AAACCCGAACGAAATC-1,0.060846,0.041189,0.076302,0.237323,0.148456,0.218750,0.172059,0.154376,0.326717,0.157399,...,0.154652,0.132994,0.194973,,,,,,0.166699,0.140817
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,0.081614,0.037861,0.063791,0.133414,0.176346,0.264599,0.157576,0.172162,0.157597,0.160009,...,0.170003,0.196602,0.152799,,,,,,0.220545,0.136391
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,0.023616,0.022022,0.031920,0.091403,0.051824,0.091641,0.449385,0.084400,0.156763,0.063980,...,0.056315,0.092466,0.062588,,,,,,0.040993,0.051065
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,0.073607,0.118512,0.043862,0.151726,0.063661,0.150312,0.086146,0.140412,0.372579,0.139337,...,0.140568,0.106773,0.093865,,,,,,0.049682,0.090086
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,0.106193,0.255156,0.193277,0.231668,0.192550,0.262049,0.137485,0.173516,0.310522,0.166446,...,0.166566,0.172229,0.132480,,,,,,0.075224,0.102122


In [74]:
max_columns = adata.obsm['means_cell_abundance_w_sf'].apply(lambda x: x[x == x.max()].index.tolist(), axis=1)

In [75]:
max_columns

spot_id
HCAHeartST10550732_AAACAAGTATCTCCCA-1    [meanscell_abundance_w_sf_MoMP]
HCAHeartST10550732_AAACAGAGCGACTCCT-1    [meanscell_abundance_w_sf_vCM4]
HCAHeartST10550732_AAACAGCTTTCAGAAG-1    [meanscell_abundance_w_sf_vCM4]
HCAHeartST10550732_AAACAGGGTCTATATT-1    [meanscell_abundance_w_sf_vCM4]
HCAHeartST10550732_AAACCCGAACGAAATC-1    [meanscell_abundance_w_sf_vCM4]
                                                      ...               
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1    [meanscell_abundance_w_sf_vCM4]
HCAHeartST10550732_TTGTTAGCAAATTCGA-1    [meanscell_abundance_w_sf_vCM4]
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1    [meanscell_abundance_w_sf_vCM4]
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1    [meanscell_abundance_w_sf_vCM4]
HCAHeartST10550732_TTGTTTCCATACAACT-1    [meanscell_abundance_w_sf_vCM4]
Length: 2199, dtype: object

In [76]:
columns_with_multiple_max = max_columns.apply(lambda x: [col for col in x if x.count(col)>1])

In [77]:
columns_with_multiple_max 

spot_id
HCAHeartST10550732_AAACAAGTATCTCCCA-1    []
HCAHeartST10550732_AAACAGAGCGACTCCT-1    []
HCAHeartST10550732_AAACAGCTTTCAGAAG-1    []
HCAHeartST10550732_AAACAGGGTCTATATT-1    []
HCAHeartST10550732_AAACCCGAACGAAATC-1    []
                                         ..
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1    []
HCAHeartST10550732_TTGTTAGCAAATTCGA-1    []
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1    []
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1    []
HCAHeartST10550732_TTGTTTCCATACAACT-1    []
Length: 2199, dtype: object

In [78]:
adata.obs['columns_with_multiple_max'] = columns_with_multiple_max

In [79]:
non_empty_columns = columns_with_multiple_max[columns_with_multiple_max .apply(lambda x: len(x) > 0)]

In [80]:
len(non_empty_columns )

0

In [81]:
max_columns = [col[0].replace('meanscell_abundance_w_sf_','') if len(col) > 0 else '' for col in max_columns]

In [82]:
adata.obs['highest_cell_Density_columns'] = max_columns

In [83]:
adata.obs['highest_cell_Density_columns']

spot_id
HCAHeartST10550732_AAACAAGTATCTCCCA-1    MoMP
HCAHeartST10550732_AAACAGAGCGACTCCT-1    vCM4
HCAHeartST10550732_AAACAGCTTTCAGAAG-1    vCM4
HCAHeartST10550732_AAACAGGGTCTATATT-1    vCM4
HCAHeartST10550732_AAACCCGAACGAAATC-1    vCM4
                                         ... 
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1    vCM4
HCAHeartST10550732_TTGTTAGCAAATTCGA-1    vCM4
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1    vCM4
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1    vCM4
HCAHeartST10550732_TTGTTTCCATACAACT-1    vCM4
Name: highest_cell_Density_columns, Length: 2199, dtype: object

In [84]:
list(adata.obs['highest_cell_Density_columns'].unique())

['MoMP',
 'vCM4',
 'SMC2_art',
 'FB4_activated',
 'EC5_art',
 'DC',
 'CD8+T_te',
 'FB3',
 'LYVE1+IGF1+MP',
 'vCM3_stressed',
 'T/NK_cycling',
 'FB5',
 'CD16+Mo',
 'EC7_endocardial',
 'EC6_ven',
 'PC2_atria',
 'MAIT-like',
 'LYVE1+MP_cycling',
 'Neut',
 'LYVE1+TIMD4+MP',
 'PC3_str',
 'SMC1_basic',
 'EC8_ln',
 'NC1_glial',
 'CD14+Mo']

In [85]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['highest_cell_Density_columns']

In [86]:
# create a dictionary of cell type and ontology term

In [87]:
mapping= {'aCM1': 'CL:0002129',
 'aCM2': 'CL:0002129',
 'aCM3': 'CL:0002129',
 'aCM4': 'CL:0002129',
 'aCM5': 'CL:0002129',
 'Adip1': 'CL:0000136',
 'Adip2': 'CL:0000136',
 'Adip3': 'CL:0000136',
 'Adip4': 'CL:0000136',
 'Adipocyte': 'CL:0000136',
 'Atrial Cardiomyocyte': 'CL:0002129',
 'AVN_bundle_cell': 'CL:0010005',
 'AVN_P_cell': 'CL:1000477',
 'B': 'CL:0000236',
 'B_plasma': 'CL:0000786',
 'CD14+Mo': 'CL:0001054',
 'CD16+Mo': 'CL:0002396',
 'CD4+T_act': 'CL:0000896',
 'CD4+T_naive': 'CL:0000895',
 'CD4+T_Th2': 'CL:0000546',
 'CD8+T_cytox': 'CL:0000794',
 'CD8+T_em': 'CL:0000913',
 'CD8+T_te': 'CL:0000625',
 'CD8+T_trans': 'CL:0000625',
 'DC': 'CL:0001056',
 'EC1_cap': 'CL:0002144',
 'EC10_CMC-like': 'CL:0000115',
 'EC2_cap': 'CL:0002144',
 'EC3_cap': 'CL:0002144',
 'EC4_immune': 'CL:0000115',
 'EC5_art': 'CL:1000413',
 'EC6_ven': 'CL:0002543',
 'EC7_atria': 'CL:0002350',
 'EC7_endocardial': 'CL:0002350',
 'EC8_ln': 'CL:0002138',
 'Endothelial cell': 'CL:0000115',
 'FB1': 'CL:0002548',
 'FB2': 'CL:0002548',
 'FB3': 'CL:0002548',
 'FB4': 'CL:0002548',
 'FB4_activated': 'CL:0002548',
 'FB5': 'CL:0002548',
 'FB6': 'CL:0002548',
 'Fibroblast': 'CL:0000057',
 'ILC': 'CL:0001065',
 'Lymphatic Endothelial cell': 'CL:0002138',
 'Lymphoid': 'CL:0000542',
 'LYVE1+IGF1+MP': 'CL:0000235',
 'LYVE1+MP_cycling': 'CL:0000235',
 'LYVE1+TIMD4+MP': 'CL:0000235',
 'MAIT-like': 'CL:0000940',
 'Mast': 'CL:0000097',
 'Mast cell': 'CL:0000097',
 'Meso': 'CL:0000077',
 'Mesothelial cell': 'CL:0000077',
 'MoMP': 'CL:0000576',
 'Mural cell': 'CL:0008034',
 'Myeloid': 'CL:0000763',
 'NC1': 'CL:0000125',
 'NC1_glial': 'CL:0000125',
 'NC2': 'CL:0000125',
 'NC2_glial_NGF+': 'CL:0000125',
 'Neural cell': 'CL:0002319',
 'Neut': 'CL:0000775',
 'NK_CD16hi': 'CL:0000939',
 'NK_CD56hi': 'CL:0000938',
 'PC1_vent': 'CL:0000669',
 'PC2_atria': 'CL:0000669',
 'PC3_str': 'CL:0000669',
 'PC4_CMC-like': 'CL:0000669',
 'SAN_P_cell': 'CL:1000477',
 'SMC1_basic': 'CL:0000192',
 'SMC2_art': 'CL:0002591',
 'T/NK_cycling': 'CL:0000814',
 'vCM1': 'CL:0002131',
 'vCM3_stressed': 'CL:0002131',
 'vCM4': 'CL:0002131',
 'Ventricular Cardiomyocyte': 'CL:0002131',
 'unclassified': 'CL:0000003'}

In [88]:
# add the cell_type_ontology_term_id column

In [89]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].map(mapping)

In [90]:
# change datatype of the column

In [91]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [92]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,aCM5_abundance,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.002917,0.002645,EFO:0010961,[],MoMP,CL:0000576
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.000937,0.001847,EFO:0010961,[],vCM4,CL:0002131
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.000920,0.003387,EFO:0010961,[],vCM4,CL:0002131
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.004542,0.009823,EFO:0010961,[],vCM4,CL:0002131
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.022167,0.015366,EFO:0010961,[],vCM4,CL:0002131
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.017586,0.012906,EFO:0010961,[],vCM4,CL:0002131
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.001272,0.002152,EFO:0010961,[],vCM4,CL:0002131
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.002372,0.005539,EFO:0010961,[],vCM4,CL:0002131
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,,,,,0.003979,0.007415,EFO:0010961,[],vCM4,CL:0002131


#### **donor_id**

In [93]:
#identify the column in adata.obs which provides donor information

In [94]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'aCM5_abundance', 'Adip4_abundance', 'NC3_glial_abundance',
       'NC6_schwann_abundance', 'EC9_FB-like_abundance', 'gdT_abundance',
       'assay_ontology_term_id', 'columns_with_multiple_max',
       'highest_cell_Density_columns', 'cell_type_ontology_term_id'],
      dtype='object', length=165)

In [95]:
list(adata.obs['donor'].unique())

['D3']

In [96]:
# add the donor_id column

In [97]:
adata.obs['donor_id'] = adata.obs['donor']

In [98]:
# change datatype of the column

In [99]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [100]:
# view unique values of donor_id column

In [101]:
list(adata.obs['donor_id'].unique())

['D3']

In [102]:
#view obs

In [103]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,,,,0.002917,0.002645,EFO:0010961,[],MoMP,CL:0000576,D3
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,,,,0.000937,0.001847,EFO:0010961,[],vCM4,CL:0002131,D3
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,,,,0.000920,0.003387,EFO:0010961,[],vCM4,CL:0002131,D3
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,,,,0.004542,0.009823,EFO:0010961,[],vCM4,CL:0002131,D3
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,,,,0.022167,0.015366,EFO:0010961,[],vCM4,CL:0002131,D3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,,,,0.017586,0.012906,EFO:0010961,[],vCM4,CL:0002131,D3
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,,,,0.001272,0.002152,EFO:0010961,[],vCM4,CL:0002131,D3
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,,,,0.002372,0.005539,EFO:0010961,[],vCM4,CL:0002131,D3
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,,,,0.003979,0.007415,EFO:0010961,[],vCM4,CL:0002131,D3


In [104]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'Adip4_abundance', 'NC3_glial_abundance', 'NC6_schwann_abundance',
       'EC9_FB-like_abundance', 'gdT_abundance', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id'],
      dtype='object', length=166)

#### **development_stage_ontology_term_id**

In [105]:
# identify the column in adata which corresponds to age

In [106]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'Adip4_abundance', 'NC3_glial_abundance', 'NC6_schwann_abundance',
       'EC9_FB-like_abundance', 'gdT_abundance', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id'],
      dtype='object', length=166)

In [107]:
list(adata.obs['age'].unique())

['55-60']

In [108]:
# create a dictionary for age and development stage ontology term id

In [109]:
mapping= {'50-55':'HsapDv:0000240', 
          '55-60':'HsapDv:0000240', 
          '70-75':'HsapDv:0000242', 
          '65-70':'HsapDv:0000241', 
          '60-65':'HsapDv:0000241',
          '40-45':'HsapDv:0000239', 
          '45-50':'HsapDv:0000239', 
          '20-25':'HsapDv:0000237'}

In [110]:
# add the development_stage_ontology_term_id column

In [111]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['age'].map(mapping)

In [112]:
# change datatype of the column

In [113]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [114]:
# view unique values of development_stage_ontology_term_id column

In [115]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000240']

In [116]:
# view adata.obs

In [117]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,,,0.002917,0.002645,EFO:0010961,[],MoMP,CL:0000576,D3,HsapDv:0000240
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,,,0.000937,0.001847,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,,,0.000920,0.003387,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,,,0.004542,0.009823,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,,,0.022167,0.015366,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,,,0.017586,0.012906,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,,,0.001272,0.002152,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,,,0.002372,0.005539,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,,,0.003979,0.007415,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240


#### **disease_ontology_term_id**

In [118]:
# Assign normal since all are healthy patients

In [119]:
# add the disease_ontology_term_id column

In [120]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461']* len(adata.obs)

In [121]:
#change data type of column

In [122]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [123]:
# view obs

In [124]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,,0.002917,0.002645,EFO:0010961,[],MoMP,CL:0000576,D3,HsapDv:0000240,PATO:0000461
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,,0.000937,0.001847,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,,0.000920,0.003387,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,,0.004542,0.009823,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,,0.022167,0.015366,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,,0.017586,0.012906,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,,0.001272,0.002152,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,,0.002372,0.005539,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,,0.003979,0.007415,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461


#### **is_primary_data**

In [125]:
adata.obs['is_primary_data'] = [False] * len(adata.obs)

In [126]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,0.002917,0.002645,EFO:0010961,[],MoMP,CL:0000576,D3,HsapDv:0000240,PATO:0000461,False
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,0.000937,0.001847,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,0.000920,0.003387,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,0.004542,0.009823,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,0.022167,0.015366,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,0.017586,0.012906,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,0.001272,0.002152,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,0.002372,0.005539,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,0.003979,0.007415,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False


In [127]:
#change data type of column

In [128]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### **organism_ontology_term_id**

In [129]:
# assign organism id 

In [130]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [131]:
#change data type of column

In [132]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [133]:
# view obs

In [134]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,0.002645,EFO:0010961,[],MoMP,CL:0000576,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,0.001847,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,0.003387,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,0.009823,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,0.015366,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,0.012906,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,0.002152,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,0.005539,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,0.007415,EFO:0010961,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [135]:
# create a dictionary of donor and ethinic_origin

In [136]:
mapping= {'D1': 'Caucasian',
 'D2': 'Caucasian',
 'D3': 'Caucasian',
 'D4': 'Caucasian',
 'D5': 'Caucasian',
 'D6': 'Caucasian',
 'D7': 'Caucasian',
 'D11': 'Caucasian',
 'H2': 'Caucasian',
 'H3': 'Asian',
 'H4': 'Caucasian',
 'H5': 'Caucasian',
 'H6': 'Caucasian',
 'H7': 'Caucasian',
 'A61': 'Caucasian',
 'AH1': 'Caucasian',
 'AH1-A61': 'unknown',
 'AH2': 'South Asian',
 'AV10': 'Caucasian',
 'AV13': 'Caucasian',
 'AV14': 'Caucasian',
 'AV3': 'Caucasian',
 'D8': 'Caucasian',
 'AH5': 'Caucasian',
 'AH6': 'Caucasian',
 'AV1': 'Caucasian'}

In [137]:
# add ethinic_origin column

In [138]:
adata.obs['ethinic_origin'] = adata.obs['donor'].map(mapping)

In [139]:
# create a dictionary of ethinic_origin and self_reported_ethnicity_ontology_term_id

In [140]:
mapping= {'Caucasian': 'HANCESTRO:0005',
 'Asian': 'HANCESTRO:0008',
 'unknown': 'unknown',
 'South Asian': 'HANCESTRO:0006'}

In [141]:
# add self_reported_ethnicity_ontology_term_id column

In [142]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['ethinic_origin'].map(mapping)

In [143]:
# change data type

In [144]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [145]:
# view obs

In [146]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,[],MoMP,CL:0000576,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,[],vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005


#### **sex_ontology_term_id**

In [147]:
# identify the column in adata.obs which corresponds to sex

In [148]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object', length=172)

In [149]:
# list the unique values 

In [150]:
list(adata.obs['gender'].unique())

['Male']

In [151]:
# create a dictionary of sex and sex ontology term id

In [152]:
mapping= {'Female': 'PATO:0000383', 'Male': 'PATO:0000384'}

In [153]:
# add sex_ontology_term_id column

In [154]:
adata.obs['sex_ontology_term_id'] = adata.obs['gender'].map(mapping)

In [155]:
# change data type

In [156]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [157]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,MoMP,CL:0000576,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,vCM4,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384


#### **suspension_type**

In [158]:
# since visium suspension type is 'na'

In [159]:
adata.obs['suspension_type'] = ['na'] * len(adata.obs)

In [160]:
# change data type

In [161]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [162]:
# view obs

In [163]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,CL:0000576,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na


#### **tissue_ontology_term_id**

In [164]:
# identify the column in adata.obs which corresponds to tissue

In [165]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type'],
      dtype='object', length=174)

In [166]:
# list unique values

In [167]:
list(adata.obs['region'].unique())

['SP']

In [168]:
# create a dictionary with tissue and corresponding tissue ontology term id

In [169]:
mapping= {'AX':'UBERON:0002098', 
          'LV':'UBERON:0002084', 
          'RV':'UBERON:0002080', 
          'LA':'UBERON:0002079', 
          'SP':'UBERON:0002094', 
          'RA':'UBERON:0002078', 
          'SAN':'UBERON:0002351', 
          'AVN':'UBERON:0002352'}

In [170]:
# add 'tissue_ontology_term_id' column

In [171]:
adata.obs['tissue_ontology_term_id'] = adata.obs['region'].map(mapping)

In [172]:
# change data type of column

In [173]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [174]:
#list the unique values in 'tissue_ontology_term_id' column

In [175]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0002094']

In [176]:
# view obs

In [177]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,50,102,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGAGCGACTCCT-1,14,94,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,43,9,1977,7.589842,5859.0,8.675905,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGGGTCTATATT-1,47,13,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACCCGAACGAAATC-1,45,115,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,51,59,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,22,42,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,24,64,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,31,77,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,D3,DBD,...,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094


In [178]:
del adata.obs['donor']
del adata.obs['gender']
del adata.obs['highest_cell_Density_columns']
del adata.obs['columns_with_multiple_max']
del adata.obs['array_row']
del adata.obs['array_col']
del adata.obs['modality']
del adata.obs['kit_10x']
del adata.obs['ethinic_origin']

#### **obsm (Embeddings)**

In [179]:
adata.obsm

AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf

In [180]:
adata.obsm.keys()

KeysView(AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf)

In [181]:
adata.obsm['means_cell_abundance_w_sf']= adata.obsm['means_cell_abundance_w_sf'].values

In [182]:
adata.obsm['stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf'].values

In [183]:
adata.obsm['prop']=adata.obsm['prop'].values

In [184]:
adata.obsm['q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf'].values

In [185]:
adata.obsm['q95_cell_abundance_w_sf']= adata.obsm['q95_cell_abundance_w_sf'].values

In [186]:
adata.obsm['X_means_cell_abundance_w_sf'] = adata.obsm['means_cell_abundance_w_sf']
adata.obsm['X_prop'] = adata.obsm['prop']
adata.obsm['X_q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf']
adata.obsm['X_q95_cell_abundance_w_sf'] = adata.obsm['q95_cell_abundance_w_sf']
adata.obsm['X_stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf']

In [187]:
adata.obsm

AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf, X_means_cell_abundance_w_sf, X_prop, X_q05_cell_abundance_w_sf, X_q95_cell_abundance_w_sf, X_stds_cell_abundance_w_sf

In [188]:
del adata.obsm['MT']
del adata.obsm['means_cell_abundance_w_sf']
del adata.obsm['prop']
del adata.obsm['q05_cell_abundance_w_sf']
del adata.obsm['q95_cell_abundance_w_sf']
del adata.obsm['stds_cell_abundance_w_sf']

#### **uns (Dataset Metadata)**

In [189]:
adata.uns

OverloadedDict, wrapping:
	{'log1p': {}, 'spatial': {'HCAHeartST10550732': {'images': {'hires': array([[[0.9411765 , 0.9372549 , 0.95686275],
        [0.94509804, 0.9411765 , 0.95686275],
        [0.94509804, 0.9411765 , 0.95686275],
        ...,
        [0.9490196 , 0.94509804, 0.9607843 ],
        [0.9490196 , 0.94509804, 0.9607843 ],
        [0.9529412 , 0.94509804, 0.95686275]],

       [[0.94509804, 0.9411765 , 0.95686275],
        [0.9411765 , 0.9372549 , 0.95686275],
        [0.94509804, 0.9411765 , 0.95686275],
        ...,
        [0.9490196 , 0.94509804, 0.9607843 ],
        [0.9490196 , 0.94509804, 0.95686275],
        [0.9529412 , 0.94509804, 0.95686275]],

       [[0.94509804, 0.9411765 , 0.95686275],
        [0.9411765 , 0.9372549 , 0.95686275],
        [0.9411765 , 0.9411765 , 0.95686275],
        ...,
        [0.9529412 , 0.94509804, 0.9607843 ],
        [0.9529412 , 0.94509804, 0.9607843 ],
        [0.9490196 , 0.94509804, 0.95686275]],

       ...,

       [[0.9372549

In [190]:
adata.uns['image_caption'] = 'Shown here is an image of 10 μm thick slice of the interventricular septum region of the adult human heart stained with H&E'

In [191]:
adata.uns['title'] = 'Visium spatial - HCAHeartST10550732 (OCT)'

In [192]:
adata.uns['default_embedding'] = 'X_spatial'

In [193]:
adata.uns.keys()

dict_keys(['log1p', 'spatial', 'image_caption', 'title', 'default_embedding'])

### **Final checks and adjustments**

In [194]:
adata

AnnData object with n_obs × n_vars = 2199 × 33234
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age', 'facility', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP', 'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso', 'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut', 'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic', 'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4', 'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed', 'vCM4', 'vCM5', 'AVN_P_cell', 'CD4+T_Tfh', 'CD4+T_Th1', 'CD4+T_Th2', 'CD4+T_reg', 'NC5_glial

In [195]:
adata.obs.dtypes

n_genes_by_counts                              int32
log1p_n_genes_by_counts                      float64
total_counts                                 float32
log1p_total_counts                           float32
sangerID                                    category
                                              ...   
organism_ontology_term_id                   category
self_reported_ethnicity_ontology_term_id    category
sex_ontology_term_id                        category
suspension_type                             category
tissue_ontology_term_id                     category
Length: 166, dtype: object

In [196]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [197]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed log1p_n_genes_by_counts from float64 to float32
changed Adip1 from float64 to float32
changed Adip2 from float64 to float32
changed Adip3 from float64 to float32
changed B from float64 to float32
changed B_plasma from float64 to float32
changed CD14+Mo from float64 to float32
changed CD16+Mo from float64 to float32
changed CD4+T_act from float64 to float32
changed CD4+T_naive from float64 to float32
changed CD8+T_cytox from float64 to float32
changed CD8+T_em from float64 to float32
changed CD8+T_te from float64 to float32
changed CD8+T_trans from float64 to float32
changed DC from float64 to float32
changed EC10_CMC-like from float64 to float32
changed EC1_cap from float64 to float32
changed EC2_cap from float64 to float32
changed EC3_cap from float64 to float32
changed EC4_immune from float64 to float32
changed EC5_art from float64 to float32
changed EC6_ven from float64 to float32
changed EC7_endocardial from float64 to float32
changed EC8_ln from float64 to float32
changed 

In [198]:
adata.obs

Unnamed: 0_level_0,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor_type,age,facility,flushed,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0000576,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGAGCGACTCCT-1,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,1977,7.589841,5859.0,8.675905,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGGGTCTATATT-1,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACCCGAACGAAATC-1,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094


In [199]:
adata.obs.columns

Index(['n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts',
       'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age',
       'facility', 'flushed',
       ...
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object', length=166)

In [200]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids,Unnamed: 1_level_1
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [201]:
adata.obs

Unnamed: 0_level_0,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor_type,age,facility,flushed,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10550732_AAACAAGTATCTCCCA-1,3434,8.141772,11563.0,9.355652,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0000576,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGAGCGACTCCT-1,1753,7.469654,5269.0,8.569786,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGCTTTCAGAAG-1,1977,7.589841,5859.0,8.675905,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACAGGGTCTATATT-1,1579,7.365180,4535.0,8.419801,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_AAACCCGAACGAAATC-1,2051,7.626570,6727.0,8.814034,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST10550732_TTGTGTTTCCCGAAAG-1,1060,6.966967,2694.0,7.899154,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTAGCAAATTCGA-1,1059,6.966024,2427.0,7.794823,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTCAGTGTGCTAC-1,1262,7.141245,3458.0,8.148735,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094
HCAHeartST10550732_TTGTTGTGTGTCAAGA-1,1152,7.050123,2863.0,7.959974,HCAHeartST10550732,SP,DBD,55-60,Sanger,no,...,CL:0002131,D3,HsapDv:0000240,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002094


In [202]:
adata.obs.columns

Index(['n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts',
       'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age',
       'facility', 'flushed',
       ...
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object', length=166)

In [203]:
#check the format of expression matrix

In [204]:
adata.X

<2199x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 3397093 stored elements in Compressed Sparse Row format>

In [205]:
araw.X

<2199x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 3397093 stored elements in Compressed Sparse Row format>

In [206]:
#Copy raw counts to adata.raw

In [207]:
adata.raw = araw

In [208]:
#write the curated object to final_objects folder

In [209]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/final_objects/HCAHeartST10550732(OCT).h5ad', compression = 'gzip')