### **Curating HCAHeartST13233999(OCT).h5ad**

Article: Spatially resolved multiomics of human cardiac niches

DOI: https://doi.org/10.1038/s41586-023-06311-1 

Data Source : https://www.heartcellatlas.org

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/data/HCAHeartST13233999.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 2881 × 33538
    obs: 'array_row', 'array_col', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'sangerID', 'region', 'donor', 'donor_type', 'age', 'gender', 'facility', 'modality', 'kit_10x', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP', 'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso', 'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut', 'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic', 'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4', 'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed', 'vCM4', 'vCM5', 'AVN_P_c

##### **X- expression matrix**

In [7]:
# View the expression matrix of the anndata object

In [8]:
adata.X

<2881x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 6242376 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 21)	1.339231
  (0, 39)	1.8919389
  (0, 52)	1.339231
  (0, 53)	2.5067034
  (0, 61)	1.339231
  (0, 71)	1.339231
  (0, 89)	1.339231
  (0, 100)	1.339231
  (0, 154)	1.339231
  (0, 171)	1.8919389
  (0, 178)	1.339231
  (0, 190)	1.339231
  (0, 201)	3.0307498
  (0, 225)	1.339231
  (0, 244)	1.8919389
  (0, 267)	4.0966463
  (0, 268)	3.699461
  (0, 338)	1.8919389
  (0, 350)	1.8919389
  (0, 390)	1.339231
  (0, 405)	1.339231
  (0, 415)	2.2458375
  (0, 423)	2.7134051
  (0, 424)	1.339231
  (0, 443)	1.339231
  :	:
  (2880, 33249)	1.4541492
  (2880, 33254)	1.4541492
  (2880, 33294)	1.4541492
  (2880, 33375)	1.4541492
  (2880, 33376)	2.0230935
  (2880, 33399)	1.4541492
  (2880, 33413)	1.4541492
  (2880, 33446)	1.4541492
  (2880, 33451)	1.4541492
  (2880, 33474)	2.0230935
  (2880, 33479)	2.0230935
  (2880, 33490)	1.4541492
  (2880, 33492)	1.4541492
  (2880, 33496)	5.236484
  (2880, 33497)	4.6318707
  (2880, 33498)	5.8062334
  (2880, 33499)	5.5487084
  (2880, 33500)	2.3834753
  (2880, 33501)	5.440939

##### **Raw counts matrix**

In [11]:
# If X has normalized counts, check for the raw counts matrix.

In [12]:
#Here the raw counts are provided in a separate object, load the raw counts matrix

In [13]:
araw = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/Raw_counts/visium-OCT_adult-8reg-revision_raw.h5ad')

In [14]:
# view raw object

In [15]:
araw

AnnData object with n_obs × n_vars = 93788 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'region_cell2loc'
    var: 'gene_ids', 'feature_types', 'genome', 'SYMBOL'
    uns: 'spatial'
    obsm: 'MT', 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'

In [16]:
# view raw matrix

In [17]:
araw.X

<93788x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 147751779 stored elements in Compressed Sparse Row format>

In [18]:
print(araw.X)

  (0, 26)	1.0
  (0, 27)	1.0
  (0, 39)	1.0
  (0, 154)	2.0
  (0, 156)	1.0
  (0, 171)	1.0
  (0, 201)	1.0
  (0, 219)	2.0
  (0, 220)	1.0
  (0, 267)	61.0
  (0, 275)	1.0
  (0, 350)	1.0
  (0, 363)	1.0
  (0, 408)	1.0
  (0, 414)	2.0
  (0, 423)	2.0
  (0, 424)	1.0
  (0, 442)	1.0
  (0, 449)	1.0
  (0, 470)	5.0
  (0, 472)	2.0
  (0, 473)	1.0
  (0, 491)	1.0
  (0, 493)	1.0
  (0, 526)	1.0
  :	:
  (93787, 33249)	1.0
  (93787, 33254)	1.0
  (93787, 33294)	1.0
  (93787, 33375)	1.0
  (93787, 33376)	2.0
  (93787, 33399)	1.0
  (93787, 33413)	1.0
  (93787, 33446)	1.0
  (93787, 33451)	1.0
  (93787, 33474)	2.0
  (93787, 33479)	2.0
  (93787, 33490)	1.0
  (93787, 33492)	1.0
  (93787, 33496)	57.0
  (93787, 33497)	31.0
  (93787, 33498)	101.0
  (93787, 33499)	78.0
  (93787, 33500)	3.0
  (93787, 33501)	70.0
  (93787, 33502)	67.0
  (93787, 33503)	33.0
  (93787, 33504)	3.0
  (93787, 33505)	54.0
  (93787, 33506)	9.0
  (93787, 33508)	40.0


In [19]:
# since the raw object is combined one, extract the raw counts for this dataset 

In [20]:
araw = araw[araw.obs['sample']=='HCAHeartST13233999']

In [21]:
araw

View of AnnData object with n_obs × n_vars = 2881 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'region_cell2loc'
    var: 'gene_ids', 'feature_types', 'genome', 'SYMBOL'
    uns: 'spatial'
    obsm: 'MT', 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'

##### **Variables(var)**

In [22]:
#View the var of anndata and raw object

In [23]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome
SYMBOL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38
FAM138A,ENSG00000237613,Gene Expression,GRCh38
OR4F5,ENSG00000186092,Gene Expression,GRCh38
AL627309.1,ENSG00000238009,Gene Expression,GRCh38
AL627309.3,ENSG00000239945,Gene Expression,GRCh38
...,...,...,...
AC233755.2,ENSG00000277856,Gene Expression,GRCh38
AC233755.1,ENSG00000275063,Gene Expression,GRCh38
AC240274.1,ENSG00000271254,Gene Expression,GRCh38
AC213203.1,ENSG00000277475,Gene Expression,GRCh38


In [24]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [25]:
# Check the index column of var. Check whether ensembl ids are provided in the index column of var or not.

In [26]:
# If ensembl ids are not in the index column and is present in another column, set the ensembl ids column as the index column

In [27]:
adata.var['gene_symbols'] = adata.var_names

In [28]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
SYMBOL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
FAM138A,ENSG00000237613,Gene Expression,GRCh38,FAM138A
OR4F5,ENSG00000186092,Gene Expression,GRCh38,OR4F5
AL627309.1,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
AL627309.3,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
AC233755.2,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
AC233755.1,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
AC240274.1,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
AC213203.1,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [29]:
adata.var_names = adata.var['gene_ids']

In [30]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [31]:
# load the approved genes file

In [32]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [33]:
# Create a dictionary from the approved genes file using the symbols and feature id columns.

In [34]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [35]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [36]:
len(genedict)

116184

In [37]:
# Filter out the genes which are not in the approved genes file

In [38]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]
var_to_keep_araw = [x for x in araw.var_names if (x in genedict)]

In [39]:
len(var_to_keep_adata)

33234

In [40]:
len(var_to_keep_araw)

33234

In [41]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [42]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [43]:
# Modify the anndata object by filtering out the filtered genes. copy the index column values to a new column called gene_symbols

In [44]:
adata = adata[:, var_to_keep_adata].copy()
araw = araw[:, var_to_keep_araw].copy()

In [45]:
#  View the var

In [46]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [47]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


feature is filtered

In [48]:
# Assign False since the feature was not filtered out in the normalized matrix (X).

In [49]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [50]:
#View var

In [51]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,genome,gene_symbols,feature_is_filtered
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG,False
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A,False
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5,False
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1,False
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3,False
...,...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2,False
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1,False
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1,False
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1,False


In [52]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [53]:
#  Delete the unwanted columns in adata and araw.

In [54]:
del araw.var['gene_ids']
del araw.var['SYMBOL']
del araw.var['feature_types']
del araw.var['genome']

In [55]:
del adata.var['gene_ids']
del adata.var['gene_symbols']
del adata.var['feature_types']
del adata.var['genome']

In [56]:
# view var

In [57]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids,Unnamed: 1_level_1
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [58]:
araw.var

ENSG00000243485
ENSG00000237613
ENSG00000186092
ENSG00000238009
ENSG00000239945
...
ENSG00000277856
ENSG00000275063
ENSG00000271254
ENSG00000277475
ENSG00000268674


#### **Observations(obs) (Cell metadata)**

In [59]:
#view obs

In [60]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,CD4+T_Th1_abundance,CD4+T_Th2_abundance,CD4+T_reg_abundance,NC5_glial_abundance,aCM5_abundance,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,


In [61]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'CD4+T_Th1_abundance', 'CD4+T_Th2_abundance', 'CD4+T_reg_abundance',
       'NC5_glial_abundance', 'aCM5_abundance', 'Adip4_abundance',
       'NC3_glial_abundance', 'NC6_schwann_abundance', 'EC9_FB-like_abundance',
       'gdT_abundance'],
      dtype='object', length=161)

#### **assay_ontology_term_id**

In [62]:
# identify the column in adata which corresponds to assay

In [63]:
list(adata.obs['kit_10x'].unique())

['Visium-v1']

In [64]:
# add the assay_ontology_term_id column

In [65]:
adata.obs['assay_ontology_term_id'] = ['EFO:0010961'] * len(adata.obs)

In [66]:
# change datatype of the column

In [67]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [68]:
# view adata.obs

In [69]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,CD4+T_Th2_abundance,CD4+T_reg_abundance,NC5_glial_abundance,aCM5_abundance,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,,,,EFO:0010961


#### **cell_type_ontology_term_id**

In [70]:
#get the column in adata.obs related. to cell type annotation

In [71]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'CD4+T_Th2_abundance', 'CD4+T_reg_abundance', 'NC5_glial_abundance',
       'aCM5_abundance', 'Adip4_abundance', 'NC3_glial_abundance',
       'NC6_schwann_abundance', 'EC9_FB-like_abundance', 'gdT_abundance',
       'assay_ontology_term_id'],
      dtype='object', length=162)

In [72]:
adata.obsm

AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf

In [73]:
adata.obsm['means_cell_abundance_w_sf']

Unnamed: 0_level_0,meanscell_abundance_w_sf_Adip1,meanscell_abundance_w_sf_Adip2,meanscell_abundance_w_sf_Adip3,meanscell_abundance_w_sf_B,meanscell_abundance_w_sf_B_plasma,meanscell_abundance_w_sf_CD14+Mo,meanscell_abundance_w_sf_CD16+Mo,meanscell_abundance_w_sf_CD4+T_act,meanscell_abundance_w_sf_CD4+T_naive,meanscell_abundance_w_sf_CD8+T_cytox,...,meanscell_abundance_w_sf_CD4+T_Th1,meanscell_abundance_w_sf_CD4+T_Th2,meanscell_abundance_w_sf_CD4+T_reg,meanscell_abundance_w_sf_NC5_glial,meanscell_abundance_w_sf_aCM5,meanscell_abundance_w_sf_Adip4,meanscell_abundance_w_sf_NC3_glial,meanscell_abundance_w_sf_NC6_schwann,meanscell_abundance_w_sf_EC9_FB-like,meanscell_abundance_w_sf_gdT
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0.046705,0.049146,0.125617,0.057165,0.264412,0.049860,0.052603,0.078411,0.110199,0.125496,...,,,,,,,,,,
HCAHeartST13233999_AAACAAGTATCTCCCA-1,0.022354,0.015648,0.016667,0.048648,0.515664,0.040484,0.049359,0.115673,0.118317,0.126006,...,,,,,,,,,,
HCAHeartST13233999_AAACAATCTACTAGCA-1,0.062455,0.043552,0.044723,0.105223,0.301360,0.050835,0.080109,0.088667,0.142024,0.215171,...,,,,,,,,,,
HCAHeartST13233999_AAACAGAGCGACTCCT-1,0.018825,0.016354,0.009706,0.067882,0.729717,0.054036,0.048250,0.095662,0.092597,0.112908,...,,,,,,,,,,
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,0.528153,0.053287,0.056078,0.089883,0.055188,0.096593,0.095396,0.116020,0.147023,0.125031,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,0.045017,0.040282,0.123358,0.073854,0.039566,0.053058,0.066211,0.109318,0.165586,0.116247,...,,,,,,,,,,
HCAHeartST13233999_TTGTTCTAGATACGCT-1,0.079810,0.283361,0.159237,0.048809,0.147295,0.030586,0.036565,0.057601,0.115652,0.109343,...,,,,,,,,,,
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,0.026541,0.021953,0.039694,0.058919,0.205768,0.049325,0.059216,0.097200,0.105765,0.054038,...,,,,,,,,,,
HCAHeartST13233999_TTGTTTCCATACAACT-1,0.043556,0.020803,0.097937,0.081409,0.023540,0.073456,0.067056,0.142086,0.210233,0.271397,...,,,,,,,,,,


In [74]:
max_columns = adata.obsm['means_cell_abundance_w_sf'].apply(lambda x: x[x == x.max()].index.tolist(), axis=1)

In [75]:
max_columns

spot_id
HCAHeartST13233999_AAACAACGAATAGTTC-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST13233999_AAACAAGTATCTCCCA-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST13233999_AAACAATCTACTAGCA-1           [meanscell_abundance_w_sf_FB3]
HCAHeartST13233999_AAACAGAGCGACTCCT-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST13233999_AAACAGCTTTCAGAAG-1         [meanscell_abundance_w_sf_Adip1]
                                                         ...                  
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1    [meanscell_abundance_w_sf_SAN_P_cell]
HCAHeartST13233999_TTGTTCTAGATACGCT-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST13233999_TTGTTTCCATACAACT-1           [meanscell_abundance_w_sf_FB3]
HCAHeartST13233999_TTGTTTGTGTAAATTC-1          [meanscell_abundance_w_sf_aCM4]
Length: 2881, dtype: object

In [76]:
columns_with_multiple_max = max_columns.apply(lambda x: [col for col in x if x.count(col)>1])

In [77]:
columns_with_multiple_max 

spot_id
HCAHeartST13233999_AAACAACGAATAGTTC-1    []
HCAHeartST13233999_AAACAAGTATCTCCCA-1    []
HCAHeartST13233999_AAACAATCTACTAGCA-1    []
HCAHeartST13233999_AAACAGAGCGACTCCT-1    []
HCAHeartST13233999_AAACAGCTTTCAGAAG-1    []
                                         ..
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1    []
HCAHeartST13233999_TTGTTCTAGATACGCT-1    []
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1    []
HCAHeartST13233999_TTGTTTCCATACAACT-1    []
HCAHeartST13233999_TTGTTTGTGTAAATTC-1    []
Length: 2881, dtype: object

In [78]:
adata.obs['columns_with_multiple_max'] = columns_with_multiple_max

In [79]:
non_empty_columns = columns_with_multiple_max[columns_with_multiple_max .apply(lambda x: len(x) > 0)]

In [80]:
len(non_empty_columns )

0

In [81]:
max_columns = [col[0].replace('meanscell_abundance_w_sf_','') if len(col) > 0 else '' for col in max_columns]

In [82]:
adata.obs['highest_cell_Density_columns'] = max_columns

In [83]:
adata.obs['highest_cell_Density_columns']

spot_id
HCAHeartST13233999_AAACAACGAATAGTTC-1          aCM4
HCAHeartST13233999_AAACAAGTATCTCCCA-1          aCM4
HCAHeartST13233999_AAACAATCTACTAGCA-1           FB3
HCAHeartST13233999_AAACAGAGCGACTCCT-1          aCM4
HCAHeartST13233999_AAACAGCTTTCAGAAG-1         Adip1
                                            ...    
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1    SAN_P_cell
HCAHeartST13233999_TTGTTCTAGATACGCT-1          aCM4
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1          aCM4
HCAHeartST13233999_TTGTTTCCATACAACT-1           FB3
HCAHeartST13233999_TTGTTTGTGTAAATTC-1          aCM4
Name: highest_cell_Density_columns, Length: 2881, dtype: object

In [84]:
list(adata.obs['highest_cell_Density_columns'].unique())

['aCM4',
 'FB3',
 'Adip1',
 'FB5',
 'CD8+T_cytox',
 'LYVE1+TIMD4+MP',
 'EC6_ven',
 'LYVE1+IGF1+MP',
 'SAN_P_cell',
 'EC10_CMC-like',
 'Adip2',
 'NC1_glial',
 'FB4_activated',
 'FB6',
 'aCM3',
 'DC',
 'SMC1_basic',
 'EC5_art',
 'MoMP',
 'MAIT-like',
 'EC7_endocardial',
 'CD4+T_naive',
 'B_plasma',
 'FB2',
 'Mast',
 'SMC2_art',
 'Adip3',
 'Neut',
 'CD14+Mo',
 'PC2_atria',
 'EC8_ln',
 'CD16+Mo',
 'B',
 'CD4+T_act',
 'LYVE1+MP_cycling']

In [85]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['highest_cell_Density_columns']

In [86]:
# create a dictionary of cell type and ontology term

In [87]:
mapping= {'aCM1': 'CL:0002129',
 'aCM2': 'CL:0002129',
 'aCM3': 'CL:0002129',
 'aCM4': 'CL:0002129',
 'aCM5': 'CL:0002129',
 'Adip1': 'CL:0000136',
 'Adip2': 'CL:0000136',
 'Adip3': 'CL:0000136',
 'Adip4': 'CL:0000136',
 'Adipocyte': 'CL:0000136',
 'Atrial Cardiomyocyte': 'CL:0002129',
 'AVN_bundle_cell': 'CL:0010005',
 'AVN_P_cell': 'CL:1000477',
 'B': 'CL:0000236',
 'B_plasma': 'CL:0000786',
 'CD14+Mo': 'CL:0001054',
 'CD16+Mo': 'CL:0002396',
 'CD4+T_act': 'CL:0000896',
 'CD4+T_naive': 'CL:0000895',
 'CD4+T_Th2': 'CL:0000546',
 'CD8+T_cytox': 'CL:0000794',
 'CD8+T_em': 'CL:0000913',
 'CD8+T_te': 'CL:0000625',
 'CD8+T_trans': 'CL:0000625',
 'DC': 'CL:0001056',
 'EC1_cap': 'CL:0002144',
 'EC10_CMC-like': 'CL:0000115',
 'EC2_cap': 'CL:0002144',
 'EC3_cap': 'CL:0002144',
 'EC4_immune': 'CL:0000115',
 'EC5_art': 'CL:1000413',
 'EC6_ven': 'CL:0002543',
 'EC7_atria': 'CL:0002350',
 'EC7_endocardial': 'CL:0002350',
 'EC8_ln': 'CL:0002138',
 'Endothelial cell': 'CL:0000115',
 'FB1': 'CL:0002548',
 'FB2': 'CL:0002548',
 'FB3': 'CL:0002548',
 'FB4': 'CL:0002548',
 'FB4_activated': 'CL:0002548',
 'FB5': 'CL:0002548',
 'FB6': 'CL:0002548',
 'Fibroblast': 'CL:0000057',
 'ILC': 'CL:0001065',
 'Lymphatic Endothelial cell': 'CL:0002138',
 'Lymphoid': 'CL:0000542',
 'LYVE1+IGF1+MP': 'CL:0000235',
 'LYVE1+MP_cycling': 'CL:0000235',
 'LYVE1+TIMD4+MP': 'CL:0000235',
 'MAIT-like': 'CL:0000940',
 'Mast': 'CL:0000097',
 'Mast cell': 'CL:0000097',
 'Meso': 'CL:0000077',
 'Mesothelial cell': 'CL:0000077',
 'MoMP': 'CL:0000576',
 'Mural cell': 'CL:0008034',
 'Myeloid': 'CL:0000763',
 'NC1': 'CL:0000125',
 'NC1_glial': 'CL:0000125',
 'NC2': 'CL:0000125',
 'NC2_glial_NGF+': 'CL:0000125',
 'Neural cell': 'CL:0002319',
 'Neut': 'CL:0000775',
 'NK_CD16hi': 'CL:0000939',
 'NK_CD56hi': 'CL:0000938',
 'PC1_vent': 'CL:0000669',
 'PC2_atria': 'CL:0000669',
 'PC3_str': 'CL:0000669',
 'PC4_CMC-like': 'CL:0000669',
 'SAN_P_cell': 'CL:1000477',
 'SMC1_basic': 'CL:0000192',
 'SMC2_art': 'CL:0002591',
 'T/NK_cycling': 'CL:0000814',
 'vCM1': 'CL:0002131',
 'vCM3_stressed': 'CL:0002131',
 'vCM4': 'CL:0002131',
 'Ventricular Cardiomyocyte': 'CL:0002131',
 'unclassified': 'CL:0000003'}

In [88]:
# add the cell_type_ontology_term_id column

In [89]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].map(mapping)

In [90]:
# change datatype of the column

In [91]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [92]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,aCM5_abundance,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],FB3,CL:0002548
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],Adip1,CL:0000136
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],SAN_P_cell,CL:1000477
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,,EFO:0010961,[],FB3,CL:0002548


#### **donor_id**

In [93]:
#identify the column in adata.obs which provides donor information

In [94]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'aCM5_abundance', 'Adip4_abundance', 'NC3_glial_abundance',
       'NC6_schwann_abundance', 'EC9_FB-like_abundance', 'gdT_abundance',
       'assay_ontology_term_id', 'columns_with_multiple_max',
       'highest_cell_Density_columns', 'cell_type_ontology_term_id'],
      dtype='object', length=165)

In [95]:
list(adata.obs['donor'].unique())

['AH5']

In [96]:
# add the donor_id column

In [97]:
adata.obs['donor_id'] = adata.obs['donor']

In [98]:
# change datatype of the column

In [99]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [100]:
# view unique values of donor_id column

In [101]:
list(adata.obs['donor_id'].unique())

['AH5']

In [102]:
#view obs

In [103]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,Adip4_abundance,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],FB3,CL:0002548,AH5
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],Adip1,CL:0000136,AH5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,,EFO:0010961,[],FB3,CL:0002548,AH5


In [104]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'Adip4_abundance', 'NC3_glial_abundance', 'NC6_schwann_abundance',
       'EC9_FB-like_abundance', 'gdT_abundance', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id'],
      dtype='object', length=166)

#### **development_stage_ontology_term_id**

In [105]:
# identify the column in adata which corresponds to age

In [106]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'Adip4_abundance', 'NC3_glial_abundance', 'NC6_schwann_abundance',
       'EC9_FB-like_abundance', 'gdT_abundance', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id'],
      dtype='object', length=166)

In [107]:
list(adata.obs['age'].unique())

['20-25']

In [108]:
# create a dictionary for age and development stage ontology term id

In [109]:
mapping= {'50-55':'HsapDv:0000240', 
          '55-60':'HsapDv:0000240', 
          '70-75':'HsapDv:0000242', 
          '65-70':'HsapDv:0000241', 
          '60-65':'HsapDv:0000241',
          '40-45':'HsapDv:0000239', 
          '45-50':'HsapDv:0000239', 
          '20-25':'HsapDv:0000237'}

In [110]:
# add the development_stage_ontology_term_id column

In [111]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['age'].map(mapping)

In [112]:
# change datatype of the column

In [113]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [114]:
# view unique values of development_stage_ontology_term_id column

In [115]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000237']

In [116]:
# view adata.obs

In [117]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,NC3_glial_abundance,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],Adip1,CL:0000136,AH5,HsapDv:0000237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,,,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237


#### **disease_ontology_term_id**

In [118]:
# Assign normal since all are healthy patients

In [119]:
# add the disease_ontology_term_id column

In [120]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461']* len(adata.obs)

In [121]:
#change data type of column

In [122]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [123]:
# view obs

In [124]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,NC6_schwann_abundance,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],Adip1,CL:0000136,AH5,HsapDv:0000237,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461


#### **is_primary_data**

In [125]:
adata.obs['is_primary_data'] = [False] * len(adata.obs)

In [126]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,EC9_FB-like_abundance,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],Adip1,CL:0000136,AH5,HsapDv:0000237,PATO:0000461,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False


In [127]:
#change data type of column

In [128]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### **organism_ontology_term_id**

In [129]:
# assign organism id 

In [130]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [131]:
#change data type of column

In [132]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [133]:
# view obs

In [134]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,gdT_abundance,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],Adip1,CL:0000136,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [135]:
# create a dictionary of donor and ethinic_origin

In [136]:
mapping= {'D1': 'Caucasian',
 'D2': 'Caucasian',
 'D3': 'Caucasian',
 'D4': 'Caucasian',
 'D5': 'Caucasian',
 'D6': 'Caucasian',
 'D7': 'Caucasian',
 'D11': 'Caucasian',
 'H2': 'Caucasian',
 'H3': 'Asian',
 'H4': 'Caucasian',
 'H5': 'Caucasian',
 'H6': 'Caucasian',
 'H7': 'Caucasian',
 'A61': 'Caucasian',
 'AH1': 'Caucasian',
 'AH1-A61': 'unknown',
 'AH2': 'South Asian',
 'AV10': 'Caucasian',
 'AV13': 'Caucasian',
 'AV14': 'Caucasian',
 'AV3': 'Caucasian',
 'D8': 'Caucasian',
 'AH5': 'Caucasian',
 'AH6': 'Caucasian',
 'AV1': 'Caucasian'}

In [137]:
# add ethinic_origin column

In [138]:
adata.obs['ethinic_origin'] = adata.obs['donor'].map(mapping)

In [139]:
# create a dictionary of ethinic_origin and self_reported_ethnicity_ontology_term_id

In [140]:
mapping= {'Caucasian': 'HANCESTRO:0005',
 'Asian': 'HANCESTRO:0008',
 'unknown': 'unknown',
 'South Asian': 'HANCESTRO:0006'}

In [141]:
# add self_reported_ethnicity_ontology_term_id column

In [142]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['ethinic_origin'].map(mapping)

In [143]:
# change data type

In [144]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [145]:
# view obs

In [146]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,[],Adip1,CL:0000136,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005


#### **sex_ontology_term_id**

In [147]:
# identify the column in adata.obs which corresponds to sex

In [148]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object', length=172)

In [149]:
# list the unique values 

In [150]:
list(adata.obs['gender'].unique())

['Female']

In [151]:
# create a dictionary of sex and sex ontology term id

In [152]:
mapping= {'Female': 'PATO:0000383', 'Male': 'PATO:0000384'}

In [153]:
# add sex_ontology_term_id column

In [154]:
adata.obs['sex_ontology_term_id'] = adata.obs['gender'].map(mapping)

In [155]:
# change data type

In [156]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [157]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,Adip1,CL:0000136,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383


#### **suspension_type**

In [158]:
# since visium suspension type is 'na'

In [159]:
adata.obs['suspension_type'] = ['na'] * len(adata.obs)

In [160]:
# change data type

In [161]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [162]:
# view obs

In [163]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0000136,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na


#### **tissue_ontology_term_id**

In [164]:
# identify the column in adata.obs which corresponds to tissue

In [165]:
adata.obs.columns

Index(['array_row', 'array_col', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'sangerID', 'region', 'donor', 'donor_type',
       ...
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type'],
      dtype='object', length=174)

In [166]:
# list unique values

In [167]:
list(adata.obs['region'].unique())

['SAN']

In [168]:
# create a dictionary with tissue and corresponding tissue ontology term id

In [169]:
mapping= {'AX':'UBERON:0002098', 
          'LV':'UBERON:0002084', 
          'RV':'UBERON:0002080', 
          'LA':'UBERON:0002079', 
          'SP':'UBERON:0002094', 
          'RA':'UBERON:0002078', 
          'SAN':'UBERON:0002351', 
          'AVN':'UBERON:0002352'}

In [170]:
# add 'tissue_ontology_term_id' column

In [171]:
adata.obs['tissue_ontology_term_id'] = adata.obs['region'].map(mapping)

In [172]:
# change data type of column

In [173]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [174]:
#list the unique values in 'tissue_ontology_term_id' column

In [175]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0002351']

In [176]:
# view obs

In [177]:
adata.obs

Unnamed: 0_level_0,array_row,array_col,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor,donor_type,...,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,0,16,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAAGTATCTCCCA-1,50,102,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAATCTACTAGCA-1,3,43,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAGAGCGACTCCT-1,14,94,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,43,9,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,24,64,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTCTAGATACGCT-1,21,3,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,31,77,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTTCCATACAACT-1,45,27,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,AH5,DCD,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351


In [178]:
del adata.obs['donor']
del adata.obs['gender']
del adata.obs['highest_cell_Density_columns']
del adata.obs['columns_with_multiple_max']
del adata.obs['array_row']
del adata.obs['array_col']
del adata.obs['modality']
del adata.obs['kit_10x']
del adata.obs['ethinic_origin']

#### **obsm (Embeddings)**

In [179]:
adata.obsm

AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf

In [180]:
adata.obsm.keys()

KeysView(AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf)

In [181]:
adata.obsm['means_cell_abundance_w_sf']= adata.obsm['means_cell_abundance_w_sf'].values

In [182]:
adata.obsm['stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf'].values

In [183]:
adata.obsm['prop']=adata.obsm['prop'].values

In [184]:
adata.obsm['q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf'].values

In [185]:
adata.obsm['q95_cell_abundance_w_sf']= adata.obsm['q95_cell_abundance_w_sf'].values

In [186]:
adata.obsm['X_means_cell_abundance_w_sf'] = adata.obsm['means_cell_abundance_w_sf']
adata.obsm['X_prop'] = adata.obsm['prop']
adata.obsm['X_q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf']
adata.obsm['X_q95_cell_abundance_w_sf'] = adata.obsm['q95_cell_abundance_w_sf']
adata.obsm['X_stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf']

In [187]:
adata.obsm

AxisArrays with keys: MT, X_spatial, means_cell_abundance_w_sf, prop, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, stds_cell_abundance_w_sf, X_means_cell_abundance_w_sf, X_prop, X_q05_cell_abundance_w_sf, X_q95_cell_abundance_w_sf, X_stds_cell_abundance_w_sf

In [188]:
del adata.obsm['MT']
del adata.obsm['means_cell_abundance_w_sf']
del adata.obsm['prop']
del adata.obsm['q05_cell_abundance_w_sf']
del adata.obsm['q95_cell_abundance_w_sf']
del adata.obsm['stds_cell_abundance_w_sf']

#### **uns (Dataset Metadata)**

In [189]:
adata.uns

OverloadedDict, wrapping:
	{'log1p': {}, 'spatial': {'HCAHeartST13233999': {'images': {'hires': array([[[0.9490196 , 0.9411765 , 0.9372549 ],
        [0.9529412 , 0.9411765 , 0.9411765 ],
        [0.9529412 , 0.9411765 , 0.9411765 ],
        ...,
        [0.9529412 , 0.9411765 , 0.9411765 ],
        [0.9529412 , 0.9411765 , 0.9411765 ],
        [0.9529412 , 0.9411765 , 0.9411765 ]],

       [[0.9490196 , 0.9411765 , 0.9411765 ],
        [0.9490196 , 0.9411765 , 0.9411765 ],
        [0.9529412 , 0.9372549 , 0.9411765 ],
        ...,
        [0.95686275, 0.9411765 , 0.9411765 ],
        [0.9529412 , 0.94509804, 0.9411765 ],
        [0.9529412 , 0.9411765 , 0.9411765 ]],

       [[0.9529412 , 0.9411765 , 0.9411765 ],
        [0.9490196 , 0.9411765 , 0.9411765 ],
        [0.9529412 , 0.9411765 , 0.9411765 ],
        ...,
        [0.95686275, 0.9411765 , 0.9411765 ],
        [0.9529412 , 0.9411765 , 0.9411765 ],
        [0.95686275, 0.94509804, 0.9411765 ]],

       ...,

       [[0.9450980

In [190]:
adata.uns['image_caption'] = 'Shown here is an image of 10 μm thick slice of sinoatrial node region of the adult human heart stained with H&E'

In [191]:
adata.uns['title'] = 'Visium spatial - HCAHeartST13233999 (OCT)'

In [192]:
adata.uns['default_embedding'] = 'X_spatial'

In [193]:
adata.uns.keys()

dict_keys(['log1p', 'spatial', 'image_caption', 'title', 'default_embedding'])

### **Final checks and adjustments**

In [194]:
adata

AnnData object with n_obs × n_vars = 2881 × 33234
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age', 'facility', 'flushed', 'annotation_final', 'Adip1', 'Adip2', 'Adip3', 'B', 'B_plasma', 'CD14+Mo', 'CD16+Mo', 'CD4+T_act', 'CD4+T_naive', 'CD8+T_cytox', 'CD8+T_em', 'CD8+T_te', 'CD8+T_trans', 'DC', 'EC10_CMC-like', 'EC1_cap', 'EC2_cap', 'EC3_cap', 'EC4_immune', 'EC5_art', 'EC6_ven', 'EC7_endocardial', 'EC8_ln', 'FB1', 'FB2', 'FB3', 'FB4_activated', 'FB5', 'FB6', 'ILC', 'LYVE1+IGF1+MP', 'LYVE1+MP_cycling', 'LYVE1+TIMD4+MP', 'MAIT-like', 'Mast', 'Meso', 'MoMP', 'NC1_glial', 'NC2_glial_NGF+', 'NK_CD16hi', 'NK_CD56hi', 'Neut', 'PC1_vent', 'PC2_atria', 'PC3_str', 'SAN_P_cell', 'SMC1_basic', 'SMC2_art', 'T/NK_cycling', 'aCM1', 'aCM2', 'aCM3', 'aCM4', 'AVN_bundle_cell', 'PC4_CMC-like', 'vCM1', 'vCM2', 'vCM3_stressed', 'vCM4', 'vCM5', 'AVN_P_cell', 'CD4+T_Tfh', 'CD4+T_Th1', 'CD4+T_Th2', 'CD4+T_reg', 'NC5_glial

In [195]:
adata.obs.dtypes

n_genes_by_counts                              int32
log1p_n_genes_by_counts                      float64
total_counts                                 float32
log1p_total_counts                           float32
sangerID                                    category
                                              ...   
organism_ontology_term_id                   category
self_reported_ethnicity_ontology_term_id    category
sex_ontology_term_id                        category
suspension_type                             category
tissue_ontology_term_id                     category
Length: 166, dtype: object

In [196]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [197]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed log1p_n_genes_by_counts from float64 to float32
changed Adip1 from float64 to float32
changed Adip2 from float64 to float32
changed Adip3 from float64 to float32
changed B from float64 to float32
changed B_plasma from float64 to float32
changed CD14+Mo from float64 to float32
changed CD16+Mo from float64 to float32
changed CD4+T_act from float64 to float32
changed CD4+T_naive from float64 to float32
changed CD8+T_cytox from float64 to float32
changed CD8+T_em from float64 to float32
changed CD8+T_te from float64 to float32
changed CD8+T_trans from float64 to float32
changed DC from float64 to float32
changed EC10_CMC-like from float64 to float32
changed EC1_cap from float64 to float32
changed EC2_cap from float64 to float32
changed EC3_cap from float64 to float32
changed EC4_immune from float64 to float32
changed EC5_art from float64 to float32
changed EC6_ven from float64 to float32
changed EC7_endocardial from float64 to float32
changed EC8_ln from float64 to float32
changed 

In [198]:
adata.obs

Unnamed: 0_level_0,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor_type,age,facility,flushed,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAAGTATCTCCCA-1,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAATCTACTAGCA-1,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAGAGCGACTCCT-1,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0000136,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTTCCATACAACT-1,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351


In [199]:
adata.obs.columns

Index(['n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts',
       'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age',
       'facility', 'flushed',
       ...
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object', length=166)

In [200]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids,Unnamed: 1_level_1
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [201]:
adata.obs

Unnamed: 0_level_0,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,sangerID,region,donor_type,age,facility,flushed,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST13233999_AAACAACGAATAGTTC-1,1489,7.306531,3551.0,8.175266,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAAGTATCTCCCA-1,2361,7.767264,7473.0,8.919186,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAATCTACTAGCA-1,2616,7.869784,7538.0,8.927845,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAGAGCGACTCCT-1,4212,8.345930,22758.0,10.032716,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_AAACAGCTTTCAGAAG-1,833,6.726233,1614.0,7.387090,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0000136,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1979,7.590852,4601.0,8.434246,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1278,7.153834,3435.0,8.142063,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,2057,7.629490,5594.0,8.629628,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTTCCATACAACT-1,1322,7.187657,2916.0,7.978311,HCAHeartST13233999,SAN,DCD,20-25,Sanger,yes,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351


In [202]:
adata.obs.columns

Index(['n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts',
       'log1p_total_counts', 'sangerID', 'region', 'donor_type', 'age',
       'facility', 'flushed',
       ...
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object', length=166)

In [203]:
#check the format of expression matrix

In [204]:
adata.X

<2881x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 6238311 stored elements in Compressed Sparse Row format>

In [205]:
araw.X

<2881x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 6238311 stored elements in Compressed Sparse Row format>

In [206]:
#Copy raw counts to adata.raw

In [207]:
adata.raw = araw

In [208]:
#write the curated object to final_objects folder

In [209]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/final_objects/HCAHeartST13233999(OCT).h5ad', compression = 'gzip')