### **Curating OCT_LA.h5ad**

Article: Spatially resolved multiomics of human cardiac niches

DOI: https://doi.org/10.1038/s41586-023-06311-1 

Data Source : https://www.heartcellatlas.org

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/data/OCT_LA.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 5822 × 33538
    obs: 'array_row', 'array_col', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender', 'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP', 'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso', 'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut', 'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic', 'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4', 'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed', 'vCM4', 'vCM5', 'AVN_P_c

##### **X- expression matrix**

In [7]:
# View the expression matrix of the anndata object

In [8]:
adata.X

<5822x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 9897606 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 39)	1.3042499
  (0, 46)	1.3042499
  (0, 72)	0.85120255
  (0, 91)	0.85120255
  (0, 113)	0.85120255
  (0, 122)	0.85120255
  (0, 127)	0.85120255
  (0, 154)	1.3042499
  (0, 160)	0.85120255
  (0, 185)	0.85120255
  (0, 190)	1.3042499
  (0, 209)	0.85120255
  (0, 220)	0.85120255
  (0, 225)	0.85120255
  (0, 229)	0.85120255
  (0, 236)	0.85120255
  (0, 256)	0.85120255
  (0, 264)	0.85120255
  (0, 267)	6.6414475
  (0, 268)	2.5712495
  (0, 270)	0.85120255
  (0, 271)	1.3042499
  (0, 275)	0.85120255
  (0, 331)	0.85120255
  (0, 338)	0.85120255
  :	:
  (5821, 32853)	1.3258771
  (5821, 33017)	1.3258771
  (5821, 33064)	1.3258771
  (5821, 33078)	1.3258771
  (5821, 33131)	1.3258771
  (5821, 33133)	1.876556
  (5821, 33220)	1.3258771
  (5821, 33239)	1.876556
  (5821, 33254)	1.876556
  (5821, 33285)	1.3258771
  (5821, 33322)	1.3258771
  (5821, 33376)	1.876556
  (5821, 33406)	1.3258771
  (5821, 33496)	6.341989
  (5821, 33497)	6.010106
  (5821, 33498)	6.7121654
  (5821, 33499)	6.1668115
  (5821, 33500)	3.8

##### **Raw counts matrix**

In [11]:
# If X has normalized counts, check for the raw counts matrix.

In [12]:
#Here the raw counts are provided in a separate object, load the raw counts matrix

In [13]:
araw = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/Raw_counts/visium-OCT_adult-8reg-revision_raw.h5ad')

In [14]:
# view raw object

In [15]:
araw

AnnData object with n_obs × n_vars = 93788 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'region_cell2loc'
    var: 'gene_ids', 'feature_types', 'genome', 'SYMBOL'
    uns: 'spatial'
    obsm: 'MT', 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'

In [16]:
# view raw matrix

In [17]:
araw.X

<93788x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 147751779 stored elements in Compressed Sparse Row format>

In [18]:
print(araw.X)

  (0, 26)	1.0
  (0, 27)	1.0
  (0, 39)	1.0
  (0, 154)	2.0
  (0, 156)	1.0
  (0, 171)	1.0
  (0, 201)	1.0
  (0, 219)	2.0
  (0, 220)	1.0
  (0, 267)	61.0
  (0, 275)	1.0
  (0, 350)	1.0
  (0, 363)	1.0
  (0, 408)	1.0
  (0, 414)	2.0
  (0, 423)	2.0
  (0, 424)	1.0
  (0, 442)	1.0
  (0, 449)	1.0
  (0, 470)	5.0
  (0, 472)	2.0
  (0, 473)	1.0
  (0, 491)	1.0
  (0, 493)	1.0
  (0, 526)	1.0
  :	:
  (93787, 33249)	1.0
  (93787, 33254)	1.0
  (93787, 33294)	1.0
  (93787, 33375)	1.0
  (93787, 33376)	2.0
  (93787, 33399)	1.0
  (93787, 33413)	1.0
  (93787, 33446)	1.0
  (93787, 33451)	1.0
  (93787, 33474)	2.0
  (93787, 33479)	2.0
  (93787, 33490)	1.0
  (93787, 33492)	1.0
  (93787, 33496)	57.0
  (93787, 33497)	31.0
  (93787, 33498)	101.0
  (93787, 33499)	78.0
  (93787, 33500)	3.0
  (93787, 33501)	70.0
  (93787, 33502)	67.0
  (93787, 33503)	33.0
  (93787, 33504)	3.0
  (93787, 33505)	54.0
  (93787, 33506)	9.0
  (93787, 33508)	40.0


In [19]:
# since the raw object is combined one, extract the raw counts for this dataset 

In [20]:
araw = araw[araw.obs['region']=='LA']

In [21]:
araw

View of AnnData object with n_obs × n_vars = 5822 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'region_cell2loc'
    var: 'gene_ids', 'feature_types', 'genome', 'SYMBOL'
    uns: 'spatial'
    obsm: 'MT', 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'

##### **Variables(var)**

In [22]:
#View the var of anndata and raw object

In [23]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome
SYMBOL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38
FAM138A,ENSG00000237613,Gene Expression,GRCh38
OR4F5,ENSG00000186092,Gene Expression,GRCh38
AL627309.1,ENSG00000238009,Gene Expression,GRCh38
AL627309.3,ENSG00000239945,Gene Expression,GRCh38
...,...,...,...
AC233755.2,ENSG00000277856,Gene Expression,GRCh38
AC233755.1,ENSG00000275063,Gene Expression,GRCh38
AC240274.1,ENSG00000271254,Gene Expression,GRCh38
AC213203.1,ENSG00000277475,Gene Expression,GRCh38


In [24]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [25]:
# Check the index column of var. Check whether ensembl ids are provided in the index column of var or not.

In [26]:
# If ensembl ids are not in the index column and is present in another column, set the ensembl ids column as the index column

In [27]:
adata.var['gene_symbols'] = adata.var_names

In [28]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
SYMBOL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
FAM138A,ENSG00000237613,Gene Expression,GRCh38,FAM138A
OR4F5,ENSG00000186092,Gene Expression,GRCh38,OR4F5
AL627309.1,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
AL627309.3,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
AC233755.2,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
AC233755.1,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
AC240274.1,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
AC213203.1,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [29]:
adata.var_names = adata.var['gene_ids']

In [30]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [31]:
# load the approved genes file

In [32]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [33]:
# Create a dictionary from the approved genes file using the symbols and feature id columns.

In [34]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [35]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [36]:
len(genedict)

116184

In [37]:
# Filter out the genes which are not in the approved genes file

In [38]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]
var_to_keep_araw = [x for x in araw.var_names if (x in genedict)]

In [39]:
len(var_to_keep_adata)

33234

In [40]:
len(var_to_keep_araw)

33234

In [41]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [42]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [43]:
# Modify the anndata object by filtering out the filtered genes. copy the index column values to a new column called gene_symbols

In [44]:
adata = adata[:, var_to_keep_adata].copy()
araw = araw[:, var_to_keep_araw].copy()

In [45]:
#  View the var

In [46]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [47]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


feature is filtered

In [48]:
# Assign False since the feature was not filtered out in the normalized matrix (X).

In [49]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [50]:
#View var

In [51]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols,feature_is_filtered
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG,False
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A,False
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5,False
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1,False
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3,False
...,...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2,False
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1,False
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1,False
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1,False


In [52]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [53]:
#  Delete the unwanted columns in adata and araw.

In [54]:
del araw.var['gene_ids']
del araw.var['SYMBOL']
del araw.var['feature_types']
del araw.var['genome']

In [55]:
del adata.var['gene_ids']
del adata.var['gene_symbols']
del adata.var['feature_types']
del adata.var['genome']

In [56]:
# view var

In [57]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids,Unnamed: 1_level_1
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [58]:
araw.var

ENSG00000243485
ENSG00000237613
ENSG00000186092
ENSG00000238009
ENSG00000239945
...
ENSG00000277856
ENSG00000275063
ENSG00000271254
ENSG00000277475
ENSG00000268674


#### **Observations(obs) (Cell metadata)**

In [59]:
#view obs

In [60]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,CD4+T_Th1,CD4+T_Th2,CD4+T_reg,NC5_glial,aCM5,Adip4,NC3_glial,NC6_schwann,EC9_FB-like,gdT
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,0.003228,0.003118,0.004319,0.001773,0.002009,0.000759,0.005056,0.003200,,
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,0.002988,0.003698,0.002001,0.003224,0.003749,0.000812,0.001612,0.001164,,
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,0.001767,0.002178,0.001709,0.001045,0.004621,0.001874,0.054607,0.001461,,
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,0.002429,0.004100,0.001880,0.001305,0.003449,0.070256,0.008251,0.001057,,
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,0.004135,0.009916,0.003853,0.013060,0.009841,0.000929,0.033318,0.011014,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,0.002563,0.004093,0.002098,0.002699,0.008795,0.034684,0.006533,0.002001,,
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,0.002874,0.003963,0.002892,0.001227,0.005235,0.001087,0.003922,0.000738,,
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,0.001225,0.001704,0.001614,0.000553,0.006996,0.000598,0.005064,0.001347,,
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,0.001163,0.001498,0.001378,0.000749,0.005012,0.000231,0.003459,0.002808,,


In [61]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender',
       'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final',
       'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo',
       'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te',
       'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap',
       'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1',
       'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'v

#### **assay_ontology_term_id**

In [62]:
# identify the column in adata which corresponds to assay

In [63]:
list(adata.obs['kit_10x'].unique())

['Visium-v1']

In [64]:
# add the assay_ontology_term_id column

In [65]:
adata.obs['assay_ontology_term_id'] = ['EFO:0010961'] * len(adata.obs)

In [66]:
# change datatype of the column

In [67]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [68]:
# view adata.obs

In [69]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,CD4+T_Th2,CD4+T_reg,NC5_glial,aCM5,Adip4,NC3_glial,NC6_schwann,EC9_FB-like,gdT,assay_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,0.003118,0.004319,0.001773,0.002009,0.000759,0.005056,0.003200,,,EFO:0010961
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,0.003698,0.002001,0.003224,0.003749,0.000812,0.001612,0.001164,,,EFO:0010961
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,0.002178,0.001709,0.001045,0.004621,0.001874,0.054607,0.001461,,,EFO:0010961
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,0.004100,0.001880,0.001305,0.003449,0.070256,0.008251,0.001057,,,EFO:0010961
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,0.009916,0.003853,0.013060,0.009841,0.000929,0.033318,0.011014,,,EFO:0010961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,0.004093,0.002098,0.002699,0.008795,0.034684,0.006533,0.002001,,,EFO:0010961
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,0.003963,0.002892,0.001227,0.005235,0.001087,0.003922,0.000738,,,EFO:0010961
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,0.001704,0.001614,0.000553,0.006996,0.000598,0.005064,0.001347,,,EFO:0010961
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,0.001498,0.001378,0.000749,0.005012,0.000231,0.003459,0.002808,,,EFO:0010961


#### **cell_type_ontology_term_id**

In [70]:
#get the column in adata.obs related. to cell type annotation

In [71]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender',
       'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final',
       'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo',
       'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te',
       'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap',
       'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1',
       'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'v

In [72]:
adata.obsm

AxisArrays with keys: MT, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf

In [73]:
adata.obsm['means_cell_abundance_w_sf']

Unnamed: 0_level_0,meanscell_abundance_w_sf_Adip1,meanscell_abundance_w_sf_Adip2,meanscell_abundance_w_sf_Adip3,meanscell_abundance_w_sf_B,meanscell_abundance_w_sf_B_plasma,meanscell_abundance_w_sf_CD14+Mo,meanscell_abundance_w_sf_CD16+Mo,meanscell_abundance_w_sf_CD4+T_act,meanscell_abundance_w_sf_CD4+T_naive,meanscell_abundance_w_sf_CD8+T_cytox,...,meanscell_abundance_w_sf_CD4+T_Th1,meanscell_abundance_w_sf_CD4+T_Th2,meanscell_abundance_w_sf_CD4+T_reg,meanscell_abundance_w_sf_NC5_glial,meanscell_abundance_w_sf_aCM5,meanscell_abundance_w_sf_Adip4,meanscell_abundance_w_sf_NC3_glial,meanscell_abundance_w_sf_NC6_schwann,meanscell_abundance_w_sf_EC9_FB-like,meanscell_abundance_w_sf_gdT
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,0.033481,0.013122,0.021663,0.122123,0.051141,0.055996,0.067064,0.125679,0.170803,0.234475,...,0.074284,0.096848,0.105793,0.044999,0.029828,0.018171,0.046782,0.051292,,
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,0.023015,0.009225,0.036216,0.069983,0.063417,0.238742,0.082563,0.170819,0.120898,0.083198,...,0.086088,0.083302,0.076103,0.059699,0.043791,0.025402,0.024359,0.030093,,
HCAHeartST10238252_AAACAGGGTCTATATT-1,0.023734,0.015191,0.015248,0.131658,0.023650,0.029503,0.213236,0.074517,0.070560,0.059708,...,0.055547,0.057932,0.055989,0.038344,0.045611,0.028907,0.130817,0.034401,,
HCAHeartST10238252_AAACATGGTGAGAGGA-1,0.604246,0.038368,0.066384,0.071262,0.036337,0.121047,0.133060,0.122905,0.093649,0.098526,...,0.083517,0.119052,0.070171,0.058582,0.047222,0.189945,0.074269,0.038426,,
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,0.041989,0.016995,0.021385,0.062727,0.060769,0.059504,0.043281,0.102097,0.110942,0.158537,...,0.082503,0.148186,0.087875,0.121750,0.071651,0.023231,0.108422,0.091605,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,0.100245,0.029569,0.042339,0.060568,0.020017,0.024477,0.025185,0.134345,0.104714,0.103722,...,0.087852,0.102155,0.083864,0.060857,0.064547,0.117566,0.051446,0.044107,,
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,0.024313,0.019209,0.035666,0.059930,0.013087,0.073584,0.022036,0.142127,0.122405,0.083265,...,0.089134,0.102812,0.106704,0.041693,0.064199,0.026737,0.037718,0.029178,,
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,0.023294,0.013651,0.026383,0.073748,0.019489,0.028292,0.024452,0.095692,0.095461,0.088575,...,0.075137,0.069138,0.059607,0.032415,0.072772,0.021194,0.050751,0.046822,,
HCAHeartST9341983_TTGTTTCACATCCAGG-1,0.033378,0.019419,0.022530,0.080360,0.016376,0.056746,0.028537,0.119122,0.130799,0.093830,...,0.083414,0.094537,0.117734,0.062129,0.077998,0.031150,0.061532,0.092784,,


In [74]:
max_columns = adata.obsm['means_cell_abundance_w_sf'].apply(lambda x: x[x == x.max()].index.tolist(), axis=1)

In [75]:
max_columns

spot_id
HCAHeartST10238252_AAACACCAATAACTGC-1    [meanscell_abundance_w_sf_aCM4]
HCAHeartST10238252_AAACAGCTTTCAGAAG-1    [meanscell_abundance_w_sf_aCM4]
HCAHeartST10238252_AAACAGGGTCTATATT-1    [meanscell_abundance_w_sf_aCM4]
HCAHeartST10238252_AAACATGGTGAGAGGA-1     [meanscell_abundance_w_sf_FB5]
HCAHeartST10238252_AAACCGTTCGTCCAGG-1    [meanscell_abundance_w_sf_aCM4]
                                                      ...               
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1     [meanscell_abundance_w_sf_aCM4]
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1     [meanscell_abundance_w_sf_aCM4]
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1     [meanscell_abundance_w_sf_aCM4]
HCAHeartST9341983_TTGTTTCACATCCAGG-1     [meanscell_abundance_w_sf_aCM4]
HCAHeartST9341983_TTGTTTCATTAGTCTA-1     [meanscell_abundance_w_sf_aCM4]
Length: 5822, dtype: object

In [76]:
columns_with_multiple_max = max_columns.apply(lambda x: [col for col in x if x.count(col)>1])

In [77]:
columns_with_multiple_max 

spot_id
HCAHeartST10238252_AAACACCAATAACTGC-1    []
HCAHeartST10238252_AAACAGCTTTCAGAAG-1    []
HCAHeartST10238252_AAACAGGGTCTATATT-1    []
HCAHeartST10238252_AAACATGGTGAGAGGA-1    []
HCAHeartST10238252_AAACCGTTCGTCCAGG-1    []
                                         ..
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1     []
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1     []
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1     []
HCAHeartST9341983_TTGTTTCACATCCAGG-1     []
HCAHeartST9341983_TTGTTTCATTAGTCTA-1     []
Length: 5822, dtype: object

In [78]:
adata.obs['columns_with_multiple_max'] = columns_with_multiple_max

In [79]:
non_empty_columns = columns_with_multiple_max[columns_with_multiple_max .apply(lambda x: len(x) > 0)]

In [80]:
len(non_empty_columns )

0

In [81]:
max_columns = [col[0].replace('meanscell_abundance_w_sf_','') if len(col) > 0 else '' for col in max_columns]

In [82]:
adata.obs['highest_cell_Density_columns'] = max_columns

In [83]:
adata.obs['highest_cell_Density_columns']

spot_id
HCAHeartST10238252_AAACACCAATAACTGC-1    aCM4
HCAHeartST10238252_AAACAGCTTTCAGAAG-1    aCM4
HCAHeartST10238252_AAACAGGGTCTATATT-1    aCM4
HCAHeartST10238252_AAACATGGTGAGAGGA-1     FB5
HCAHeartST10238252_AAACCGTTCGTCCAGG-1    aCM4
                                         ... 
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1     aCM4
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1     aCM4
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1     aCM4
HCAHeartST9341983_TTGTTTCACATCCAGG-1     aCM4
HCAHeartST9341983_TTGTTTCATTAGTCTA-1     aCM4
Name: highest_cell_Density_columns, Length: 5822, dtype: object

In [84]:
list(adata.obs['highest_cell_Density_columns'].unique())

['aCM4',
 'FB5',
 'LYVE1+IGF1+MP',
 'EC6_ven',
 'EC7_endocardial',
 'NC1_glial',
 'SMC2_art',
 'SMC1_basic',
 'EC3_cap',
 'Meso',
 'FB3',
 'FB4_activated',
 'FB1',
 'CD16+Mo',
 'PC3_str',
 'EC5_art',
 'Adip1',
 'NK_CD16hi',
 'LYVE1+TIMD4+MP',
 'Adip4',
 'EC8_ln',
 'Adip3',
 'B_plasma',
 'Adip2',
 'aCM3']

In [85]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['highest_cell_Density_columns']

In [86]:
# create a dictionary of cell type and ontology term

In [87]:
mapping= {'aCM1': 'CL:0002129',
 'aCM2': 'CL:0002129',
 'aCM3': 'CL:0002129',
 'aCM4': 'CL:0002129',
 'aCM5': 'CL:0002129',
 'Adip1': 'CL:0000136',
 'Adip2': 'CL:0000136',
 'Adip3': 'CL:0000136',
 'Adip4': 'CL:0000136',
 'Adipocyte': 'CL:0000136',
 'Atrial Cardiomyocyte': 'CL:0002129',
 'AVN_bundle_cell': 'CL:0010005',
 'AVN_P_cell': 'CL:1000477',
 'B': 'CL:0000236',
 'B_plasma': 'CL:0000786',
 'CD14+Mo': 'CL:0001054',
 'CD16+Mo': 'CL:0002396',
 'CD4+T_act': 'CL:0000896',
 'CD4+T_naive': 'CL:0000895',
 'CD4+T_Th2': 'CL:0000546',
 'CD8+T_cytox': 'CL:0000794',
 'CD8+T_em': 'CL:0000913',
 'CD8+T_te': 'CL:0000625',
 'CD8+T_trans': 'CL:0000625',
 'DC': 'CL:0001056',
 'EC1_cap': 'CL:0002144',
 'EC10_CMC-like': 'CL:0000115',
 'EC2_cap': 'CL:0002144',
 'EC3_cap': 'CL:0002144',
 'EC4_immune': 'CL:0000115',
 'EC5_art': 'CL:1000413',
 'EC6_ven': 'CL:0002543',
 'EC7_atria': 'CL:0002350',
 'EC7_endocardial': 'CL:0002350',
 'EC8_ln': 'CL:0002138',
 'Endothelial cell': 'CL:0000115',
 'FB1': 'CL:0002548',
 'FB2': 'CL:0002548',
 'FB3': 'CL:0002548',
 'FB4': 'CL:0002548',
 'FB4_activated': 'CL:0002548',
 'FB5': 'CL:0002548',
 'FB6': 'CL:0002548',
 'Fibroblast': 'CL:0000057',
 'ILC': 'CL:0001065',
 'Lymphatic Endothelial cell': 'CL:0002138',
 'Lymphoid': 'CL:0000542',
 'LYVE1+IGF1+MP': 'CL:0000235',
 'LYVE1+MP_cycling': 'CL:0000235',
 'LYVE1+TIMD4+MP': 'CL:0000235',
 'MAIT-like': 'CL:0000940',
 'Mast': 'CL:0000097',
 'Mast cell': 'CL:0000097',
 'Meso': 'CL:0000077',
 'Mesothelial cell': 'CL:0000077',
 'MoMP': 'CL:0000576',
 'Mural cell': 'CL:0008034',
 'Myeloid': 'CL:0000763',
 'NC1': 'CL:0000125',
 'NC1_glial': 'CL:0000125',
 'NC2': 'CL:0000125',
 'NC2_glial_NGF+': 'CL:0000125',
 'Neural cell': 'CL:0002319',
 'Neut': 'CL:0000775',
 'NK_CD16hi': 'CL:0000939',
 'NK_CD56hi': 'CL:0000938',
 'PC1_vent': 'CL:0000669',
 'PC2_atria': 'CL:0000669',
 'PC3_str': 'CL:0000669',
 'PC4_CMC-like': 'CL:0000669',
 'SAN_P_cell': 'CL:1000477',
 'SMC1_basic': 'CL:0000192',
 'SMC2_art': 'CL:0002591',
 'T/NK_cycling': 'CL:0000814',
 'vCM1': 'CL:0002131',
 'vCM3_stressed': 'CL:0002131',
 'vCM4': 'CL:0002131',
 'Ventricular Cardiomyocyte': 'CL:0002131',
 'unclassified': 'CL:0000003'}

In [88]:
# add the cell_type_ontology_term_id column

In [89]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].map(mapping)

In [90]:
# change datatype of the column

In [91]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [92]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,aCM5,Adip4,NC3_glial,NC6_schwann,EC9_FB-like,gdT,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,0.002009,0.000759,0.005056,0.003200,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,0.003749,0.000812,0.001612,0.001164,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,0.004621,0.001874,0.054607,0.001461,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,0.003449,0.070256,0.008251,0.001057,,,EFO:0010961,[],FB5,CL:0002548
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,0.009841,0.000929,0.033318,0.011014,,,EFO:0010961,[],aCM4,CL:0002129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,0.008795,0.034684,0.006533,0.002001,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,0.005235,0.001087,0.003922,0.000738,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,0.006996,0.000598,0.005064,0.001347,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,0.005012,0.000231,0.003459,0.002808,,,EFO:0010961,[],aCM4,CL:0002129


#### **donor_id**

In [93]:
#identify the column in adata.obs which provides donor information

In [94]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender',
       'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final',
       'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo',
       'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te',
       'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap',
       'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1',
       'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'v

In [95]:
list(adata.obs['donor'].unique())

['D8', 'D5', 'D6']

In [96]:
# add the donor_id column

In [97]:
adata.obs['donor_id'] = adata.obs['donor']

In [98]:
# change datatype of the column

In [99]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [100]:
# view unique values of donor_id column

In [101]:
list(adata.obs['donor_id'].unique())

['D8', 'D5', 'D6']

In [102]:
#view obs

In [103]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,Adip4,NC3_glial,NC6_schwann,EC9_FB-like,gdT,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,0.000759,0.005056,0.003200,,,EFO:0010961,[],aCM4,CL:0002129,D8
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,0.000812,0.001612,0.001164,,,EFO:0010961,[],aCM4,CL:0002129,D8
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,0.001874,0.054607,0.001461,,,EFO:0010961,[],aCM4,CL:0002129,D8
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,0.070256,0.008251,0.001057,,,EFO:0010961,[],FB5,CL:0002548,D8
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,0.000929,0.033318,0.011014,,,EFO:0010961,[],aCM4,CL:0002129,D8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,0.034684,0.006533,0.002001,,,EFO:0010961,[],aCM4,CL:0002129,D6
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,0.001087,0.003922,0.000738,,,EFO:0010961,[],aCM4,CL:0002129,D6
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,0.000598,0.005064,0.001347,,,EFO:0010961,[],aCM4,CL:0002129,D6
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,0.000231,0.003459,0.002808,,,EFO:0010961,[],aCM4,CL:0002129,D6


In [104]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender',
       'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final',
       'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo',
       'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te',
       'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap',
       'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1',
       'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'v

#### **development_stage_ontology_term_id**

In [105]:
# identify the column in adata which corresponds to age

In [106]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender',
       'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final',
       'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo',
       'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te',
       'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap',
       'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1',
       'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'v

In [107]:
list(adata.obs['age'].unique())

['45-50', '65-70']

In [108]:
adata.obs['age'] = np.where(adata.obs['donor'] == 'D6' , '70-75', adata.obs['age'])

In [109]:
age_value = adata.obs.loc[adata.obs['donor'] == 'D6', 'age'].values[0]

In [110]:
age_value

'70-75'

In [111]:
# create a dictionary for age and development stage ontology term id

In [112]:
mapping= {'50-55':'HsapDv:0000240', 
          '55-60':'HsapDv:0000240', 
          '70-75':'HsapDv:0000242', 
          '65-70':'HsapDv:0000241', 
          '60-65':'HsapDv:0000241',
          '40-45':'HsapDv:0000239', 
          '45-50':'HsapDv:0000239', 
          '20-25':'HsapDv:0000237'}

In [113]:
# add the development_stage_ontology_term_id column

In [114]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['age'].map(mapping)

In [115]:
# change datatype of the column

In [116]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [117]:
# view unique values of development_stage_ontology_term_id column

In [118]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000239', 'HsapDv:0000241', 'HsapDv:0000242']

In [119]:
# view adata.obs

In [120]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,NC3_glial,NC6_schwann,EC9_FB-like,gdT,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,0.005056,0.003200,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,0.001612,0.001164,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,0.054607,0.001461,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,0.008251,0.001057,,,EFO:0010961,[],FB5,CL:0002548,D8,HsapDv:0000239
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,0.033318,0.011014,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,0.006533,0.002001,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,0.003922,0.000738,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,0.005064,0.001347,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,0.003459,0.002808,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242


#### **disease_ontology_term_id**

In [121]:
# Assign normal since all are healthy patients

In [122]:
# add the disease_ontology_term_id column

In [123]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461']* len(adata.obs)

In [124]:
#change data type of column

In [125]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [126]:
# view obs

In [127]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,NC6_schwann,EC9_FB-like,gdT,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,0.003200,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,0.001164,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,0.001461,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,0.001057,,,EFO:0010961,[],FB5,CL:0002548,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,0.011014,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,0.002001,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,0.000738,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,0.001347,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,0.002808,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461


#### **is_primary_data**

In [128]:
adata.obs['is_primary_data'] = [True] * len(adata.obs)

In [129]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,EC9_FB-like,gdT,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,,,EFO:0010961,[],FB5,CL:0002548,D8,HsapDv:0000239,PATO:0000461,True
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True


In [130]:
#change data type of column

In [131]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### **organism_ontology_term_id**

In [132]:
# assign organism id 

In [133]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [134]:
#change data type of column

In [135]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [136]:
# view obs

In [137]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,gdT,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,,EFO:0010961,[],FB5,CL:0002548,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [138]:
# create a dictionary of donor and ethinic_origin

In [139]:
mapping= {'D1': 'Caucasian',
 'D2': 'Caucasian',
 'D3': 'Caucasian',
 'D4': 'Caucasian',
 'D5': 'Caucasian',
 'D6': 'Caucasian',
 'D7': 'Caucasian',
 'D11': 'Caucasian',
 'H2': 'Caucasian',
 'H3': 'Asian',
 'H4': 'Caucasian',
 'H5': 'Caucasian',
 'H6': 'Caucasian',
 'H7': 'Caucasian',
 'A61': 'Caucasian',
 'AH1': 'Caucasian',
 'AH1-A61': 'unknown',
 'AH2': 'South Asian',
 'AV10': 'Caucasian',
 'AV13': 'Caucasian',
 'AV14': 'Caucasian',
 'AV3': 'Caucasian',
 'D8': 'Caucasian',
 'AH5': 'Caucasian',
 'AH6': 'Caucasian',
 'AV1': 'Caucasian'}

In [140]:
# add ethinic_origin column

In [141]:
adata.obs['ethinic_origin'] = adata.obs['donor'].map(mapping)

In [142]:
# create a dictionary of ethinic_origin and self_reported_ethnicity_ontology_term_id

In [143]:
mapping= {'Caucasian': 'HANCESTRO:0005',
 'Asian': 'HANCESTRO:0008',
 'unknown': 'unknown',
 'South Asian': 'HANCESTRO:0006'}

In [144]:
# add self_reported_ethnicity_ontology_term_id column

In [145]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['ethinic_origin'].map(mapping)

In [146]:
# change data type

In [147]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [148]:
# view obs

In [149]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,[],FB5,CL:0002548,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,[],aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005


#### **sex_ontology_term_id**

In [150]:
# identify the column in adata.obs which corresponds to sex

In [151]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender',
       'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final',
       'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo',
       'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te',
       'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap',
       'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1',
       'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'v

In [152]:
# list the unique values 

In [153]:
list(adata.obs['gender'].unique())

['Male', 'Female']

In [154]:
# create a dictionary of sex and sex ontology term id

In [155]:
mapping= {'Female': 'PATO:0000383', 'Male': 'PATO:0000384'}

In [156]:
# add sex_ontology_term_id column

In [157]:
adata.obs['sex_ontology_term_id'] = adata.obs['gender'].map(mapping)

In [158]:
# change data type

In [159]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [160]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,FB5,CL:0002548,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,aCM4,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384


#### **suspension_type**

In [161]:
# since visium suspension type is 'na'

In [162]:
adata.obs['suspension_type'] = ['na'] * len(adata.obs)

In [163]:
# change data type

In [164]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [165]:
# view obs

In [166]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,CL:0002548,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na


#### **tissue_ontology_term_id**

In [167]:
# identify the column in adata.obs which corresponds to tissue

In [168]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type'],
      dtype='object', length=102)

In [169]:
# list unique values

In [170]:
list(adata.obs['region'].unique())

['LA']

In [171]:
# create a dictionary with tissue and corresponding tissue ontology term id

In [172]:
mapping= {'AX':'UBERON:0002098', 
          'LV':'UBERON:0002084', 
          'RV':'UBERON:0002080', 
          'LA':'UBERON:0002079', 
          'SP':'UBERON:0002094', 
          'RA':'UBERON:0002078', 
          'SAN':'UBERON:0002351', 
          'AVN':'UBERON:0002352'}

In [173]:
# add 'tissue_ontology_term_id' column

In [174]:
adata.obs['tissue_ontology_term_id'] = adata.obs['region'].map(mapping)

In [175]:
# change data type of column

In [176]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [177]:
#list the unique values in 'tissue_ontology_term_id' column

In [178]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0002079']

In [179]:
# view obs

In [180]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,59,19,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,D8,DCD,...,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,43,9,2202,7.697575,7383.0,8.907071,HCAHeartST10238252,LA,D8,DCD,...,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACAGGGTCTATATT-1,47,13,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,D8,DCD,...,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACATGGTGAGAGGA-1,62,0,1422,7.260523,3173.0,8.062748,HCAHeartST10238252,LA,D8,DCD,...,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,52,42,2662,7.887209,9018.0,9.107089,HCAHeartST10238252,LA,D8,DCD,...,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,50,28,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,D6,DCD,...,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,24,64,1806,7.499423,7245.0,8.888205,HCAHeartST9341983,LA,D6,DCD,...,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,31,77,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,D6,DCD,...,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTTCACATCCAGG-1,58,42,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,D6,DCD,...,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079


In [181]:
del adata.obs['donor']
del adata.obs['gender']
del adata.obs['highest_cell_Density_columns']
del adata.obs['columns_with_multiple_max']
del adata.obs['array_row']
del adata.obs['array_col']
del adata.obs['modality']
del adata.obs['kit_10x']
del adata.obs['ethinic_origin']

#### **obsm (Embeddings)**

In [182]:
adata.obsm

AxisArrays with keys: MT, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf

In [183]:
adata.obsm.keys()

KeysView(AxisArrays with keys: MT, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf)

In [184]:
adata.obsm['means_cell_abundance_w_sf']= adata.obsm['means_cell_abundance_w_sf'].values

In [185]:
adata.obsm['stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf'].values

In [186]:
adata.obsm['prop']=adata.obsm['prop'].values

In [187]:
adata.obsm['q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf'].values

In [188]:
adata.obsm['q95_cell_abundance_w_sf']= adata.obsm['q95_cell_abundance_w_sf'].values

In [189]:
adata.obsm['X_means_cell_abundance_w_sf'] = adata.obsm['means_cell_abundance_w_sf']
adata.obsm['X_prop'] = adata.obsm['prop']
adata.obsm['X_q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf']
adata.obsm['X_q95_cell_abundance_w_sf'] = adata.obsm['q95_cell_abundance_w_sf']
adata.obsm['X_stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf']
adata.obsm['X_spatial'] = adata.obsm['spatial']

In [190]:
adata.obsm

AxisArrays with keys: MT, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf, X_means_cell_abundance_w_sf, X_prop, X_q05_cell_abundance_w_sf, X_q95_cell_abundance_w_sf, X_stds_cell_abundance_w_sf, X_spatial

In [191]:
del adata.obsm['MT']
del adata.obsm['means_cell_abundance_w_sf']
del adata.obsm['prop']
del adata.obsm['spatial']
del adata.obsm['q05_cell_abundance_w_sf']
del adata.obsm['q95_cell_abundance_w_sf']
del adata.obsm['stds_cell_abundance_w_sf']

#### **uns (Dataset Metadata)**

In [192]:
adata.uns

OverloadedDict, wrapping:
	{'log1p': {}, 'spatial': {'HCAHeartST10238252': {'images': {'hires': array([[[0.92941177, 0.9372549 , 0.9490196 ],
        [0.92941177, 0.9372549 , 0.9490196 ],
        [0.92941177, 0.9372549 , 0.9490196 ],
        ...,
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9411765 , 0.9490196 ]],

       [[0.92941177, 0.9372549 , 0.9490196 ],
        [0.92941177, 0.9372549 , 0.9490196 ],
        [0.92941177, 0.9372549 , 0.9490196 ],
        ...,
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9529412 ]],

       [[0.92941177, 0.9372549 , 0.9490196 ],
        [0.92941177, 0.9372549 , 0.94509804],
        [0.92941177, 0.9372549 , 0.9490196 ],
        ...,
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9490196 ]],

       ...,

       [[0.9333333

In [193]:
adata.uns['image_caption'] = 'Shown here is an image of 10 μm thick slice of left atria region of the adult human heart stained with H&E'

In [194]:
adata.uns['title'] = 'Visium spatial - LA'

In [195]:
adata.uns['default_embedding'] = 'X_spatial'

In [196]:
adata.uns.keys()

dict_keys(['log1p', 'spatial', 'image_caption', 'title', 'default_embedding'])

### **Final checks and adjustments**

In [197]:
adata

AnnData object with n_obs × n_vars = 5822 × 33234
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age', 'facility', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP', 'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso', 'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut', 'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic', 'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4', 'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed', 'vCM4', 'vCM5', 'AVN_P_cell', 'CD4+T_Tfh', 'CD4+T_Th1', 'CD4+T_Th2', 'CD4+T_reg', 'NC5_glial

In [198]:
adata.obs.dtypes

n_genes_by_counts                              int32
log1p_n_genes_by_counts                      float64
total_counts                                 float32
log1p_total_counts                           float32
sangerID                                    category
                                              ...   
organism_ontology_term_id                   category
self_reported_ethnicity_ontology_term_id    category
sex_ontology_term_id                        category
suspension_type                             category
tissue_ontology_term_id                     category
Length: 94, dtype: object

In [199]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [200]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed log1p_n_genes_by_counts from float64 to float32
changed Adip1 from float64 to float32
changed Adip2 from float64 to float32
changed Adip3 from float64 to float32
changed B from float64 to float32
changed B_plasma from float64 to float32
changed CD14+Mo from float64 to float32
changed CD16+Mo from float64 to float32
changed CD4+T_act from float64 to float32
changed CD4+T_naive from float64 to float32
changed CD8+T_cytox from float64 to float32
changed CD8+T_em from float64 to float32
changed CD8+T_te from float64 to float32
changed CD8+T_trans from float64 to float32
changed DC from float64 to float32
changed EC10_CMC-like from float64 to float32
changed EC1_cap from float64 to float32
changed EC2_cap from float64 to float32
changed EC3_cap from float64 to float32
changed EC4_immune from float64 to float32
changed EC5_art from float64 to float32
changed EC6_ven from float64 to float32
changed EC7_endocardial from float64 to float32
changed EC8_ln from float64 to float32
changed 

In [201]:
adata.obs

Unnamed: 0_level_0,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor_type,age,facility,flushed,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,2202,7.697576,7383.0,8.907071,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACAGGGTCTATATT-1,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACATGGTGAGAGGA-1,1422,7.260522,3173.0,8.062748,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002548,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,2662,7.887208,9018.0,9.107089,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,1806,7.499424,7245.0,8.888205,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTTCACATCCAGG-1,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079


In [202]:
adata.obs.columns

Index(['n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts',
       'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age',
       'facility', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3',
       'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive',
       'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC',
       'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune',
       'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3',
       'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed',
       'vCM4', 'vCM5', 'AVN_P_cell', 'CD4+T_Tfh', 'CD4+T_Th1

In [203]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids,Unnamed: 1_level_1
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [204]:
adata.obs

Unnamed: 0_level_0,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor_type,age,facility,flushed,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238252_AAACACCAATAACTGC-1,2298,7.740230,7449.0,8.915969,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACAGCTTTCAGAAG-1,2202,7.697576,7383.0,8.907071,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACAGGGTCTATATT-1,2703,7.902487,10462.0,9.255601,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACATGGTGAGAGGA-1,1422,7.260522,3173.0,8.062748,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002548,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST10238252_AAACCGTTCGTCCAGG-1,2662,7.887208,9018.0,9.107089,HCAHeartST10238252,LA,DCD,45-50,Sanger,no,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST9341983_TTGTGGTAGGAGGGAT-1,2049,7.625595,8797.0,9.082280,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTCAGTGTGCTAC-1,1806,7.499424,7245.0,8.888205,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTGTGTGTCAAGA-1,1303,7.173192,4787.0,8.473868,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079
HCAHeartST9341983_TTGTTTCACATCCAGG-1,479,6.173786,1278.0,7.153834,HCAHeartST9341983,LA,DCD,70-75,Sanger,no,...,CL:0002129,D6,HsapDv:0000242,PATO:0000461,True,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002079


In [205]:
adata.obs.columns

Index(['n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts',
       'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age',
       'facility', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3',
       'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive',
       'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC',
       'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune',
       'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3',
       'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP',
       'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso',
       'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut',
       'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic',
       'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4',
       'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed',
       'vCM4', 'vCM5', 'AVN_P_cell', 'CD4+T_Tfh', 'CD4+T_Th1

In [206]:
#check the format of expression matrix

In [207]:
adata.X

<5822x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 9892326 stored elements in Compressed Sparse Row format>

In [208]:
araw.X

<5822x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 9892326 stored elements in Compressed Sparse Row format>

In [209]:
#Copy raw counts to adata.raw

In [210]:
adata.raw = araw

In [211]:
#write the curated object to final_objects folder

In [212]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/final_objects/OCT_LA.h5ad', compression = 'gzip')