### **Curating visium-OCT_adult-8reg-revision_raw.h5ad**

Article: Spatially resolved multiomics of human cardiac niches

DOI: https://doi.org/10.1038/s41586-023-06311-1 

Data Source : https://www.heartcellatlas.org

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/Raw_counts/visium-OCT_adult-8reg-revision_raw.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 93788 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'region_cell2loc'
    var: 'gene_ids', 'feature_types', 'genome', 'SYMBOL'
    uns: 'spatial'
    obsm: 'MT', 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'

##### **X- expression matrix**

In [7]:
# View the expression matrix of the anndata object

In [8]:
adata.X

<93788x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 147751779 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 26)	1.0
  (0, 27)	1.0
  (0, 39)	1.0
  (0, 154)	2.0
  (0, 156)	1.0
  (0, 171)	1.0
  (0, 201)	1.0
  (0, 219)	2.0
  (0, 220)	1.0
  (0, 267)	61.0
  (0, 275)	1.0
  (0, 350)	1.0
  (0, 363)	1.0
  (0, 408)	1.0
  (0, 414)	2.0
  (0, 423)	2.0
  (0, 424)	1.0
  (0, 442)	1.0
  (0, 449)	1.0
  (0, 470)	5.0
  (0, 472)	2.0
  (0, 473)	1.0
  (0, 491)	1.0
  (0, 493)	1.0
  (0, 526)	1.0
  :	:
  (93787, 33249)	1.0
  (93787, 33254)	1.0
  (93787, 33294)	1.0
  (93787, 33375)	1.0
  (93787, 33376)	2.0
  (93787, 33399)	1.0
  (93787, 33413)	1.0
  (93787, 33446)	1.0
  (93787, 33451)	1.0
  (93787, 33474)	2.0
  (93787, 33479)	2.0
  (93787, 33490)	1.0
  (93787, 33492)	1.0
  (93787, 33496)	57.0
  (93787, 33497)	31.0
  (93787, 33498)	101.0
  (93787, 33499)	78.0
  (93787, 33500)	3.0
  (93787, 33501)	70.0
  (93787, 33502)	67.0
  (93787, 33503)	33.0
  (93787, 33504)	3.0
  (93787, 33505)	54.0
  (93787, 33506)	9.0
  (93787, 33508)	40.0


##### **Variables(var)**

In [11]:
#View the var of anndata and raw object

In [12]:
adata.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [13]:
# load the approved genes file

In [14]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [15]:
# Create a dictionary from the approved genes file using the symbols and feature id columns.

In [16]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [17]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [18]:
len(genedict)

116184

In [19]:
# Filter out the genes which are not in the approved genes file

In [20]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]

In [21]:
len(var_to_keep_adata)

33234

In [22]:
adata.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


In [23]:
# Modify the anndata object by filtering out the filtered genes. copy the index column values to a new column called gene_symbols

In [24]:
adata = adata[:, var_to_keep_adata].copy()

In [25]:
#  View the var

In [26]:
adata.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3
...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1


feature is filtered

In [27]:
# Assign False since the feature was not filtered out in the normalized matrix (X).

In [28]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [29]:
#View var

In [30]:
adata.var

Unnamed: 0,gene_ids,feature_types,genome,SYMBOL,feature_is_filtered
ENSG00000243485,ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG,False
ENSG00000237613,ENSG00000237613,Gene Expression,GRCh38,FAM138A,False
ENSG00000186092,ENSG00000186092,Gene Expression,GRCh38,OR4F5,False
ENSG00000238009,ENSG00000238009,Gene Expression,GRCh38,AL627309.1,False
ENSG00000239945,ENSG00000239945,Gene Expression,GRCh38,AL627309.3,False
...,...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,GRCh38,AC233755.2,False
ENSG00000275063,ENSG00000275063,Gene Expression,GRCh38,AC233755.1,False
ENSG00000271254,ENSG00000271254,Gene Expression,GRCh38,AC240274.1,False
ENSG00000277475,ENSG00000277475,Gene Expression,GRCh38,AC213203.1,False


In [31]:
#  Delete the unwanted columns in adata and araw.

In [32]:
del adata.var['gene_ids']
del adata.var['feature_types']
del adata.var['genome']
del adata.var['SYMBOL']

In [33]:
# view var

In [34]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


#### **Observations(obs) (Cell metadata)**

In [35]:
#view obs

In [36]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,region,region_finest,age,gender,facility,cell_or_nuclei,modality,kit_10x,flushed,region_cell2loc
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,RA,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,RA,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,RA,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,RA,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,RA,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,SAN,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,SAN,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,SAN,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,SAN,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN


In [37]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc'],
      dtype='object')

#### **assay_ontology_term_id**

In [38]:
# identify the column in adata which corresponds to assay

In [39]:
list(adata.obs['kit_10x'].unique())

['Visium-v1']

In [40]:
# add the assay_ontology_term_id column

In [41]:
adata.obs['assay_ontology_term_id'] = ['EFO:0010961'] * len(adata.obs)

In [42]:
# change datatype of the column

In [43]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [44]:
# view adata.obs

In [45]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,region_finest,age,gender,facility,cell_or_nuclei,modality,kit_10x,flushed,region_cell2loc,assay_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,RA,45-50,Male,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,SAN,20-25,Female,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961


#### **cell_type_ontology_term_id**

In [46]:
#get the column in adata.obs related. to cell type annotation

In [47]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc', 'assay_ontology_term_id'],
      dtype='object')

In [48]:
adata.obsm

AxisArrays with keys: MT, means_cell_abundance_w_sf, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf

In [49]:
adata.obsm['means_cell_abundance_w_sf']

Unnamed: 0_level_0,meanscell_abundance_w_sf_Adip1,meanscell_abundance_w_sf_Adip2,meanscell_abundance_w_sf_Adip3,meanscell_abundance_w_sf_B,meanscell_abundance_w_sf_B_plasma,meanscell_abundance_w_sf_CD14+Mo,meanscell_abundance_w_sf_CD16+Mo,meanscell_abundance_w_sf_CD4+T_act,meanscell_abundance_w_sf_CD4+T_naive,meanscell_abundance_w_sf_CD8+T_cytox,...,meanscell_abundance_w_sf_CD4+T_Th1,meanscell_abundance_w_sf_CD4+T_Th2,meanscell_abundance_w_sf_CD4+T_reg,meanscell_abundance_w_sf_NC5_glial,meanscell_abundance_w_sf_aCM5,meanscell_abundance_w_sf_Adip4,meanscell_abundance_w_sf_NC3_glial,meanscell_abundance_w_sf_NC6_schwann,meanscell_abundance_w_sf_EC9_FB-like,meanscell_abundance_w_sf_gdT
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,0.041823,0.032165,0.034121,0.091965,0.164414,0.126206,0.135767,0.157274,0.139191,0.140752,...,0.155389,0.155898,0.091258,0.080868,0.065659,,,,,
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,0.041537,0.024649,0.034493,0.146124,0.016041,0.065448,0.053411,0.139449,0.139479,0.090318,...,0.120184,0.172016,0.082086,0.104564,0.056001,,,,,
HCAHeartST10238251_AAACGAGACGGTTGAT-1,0.009075,0.005007,0.016471,0.061628,0.039088,0.133698,0.202981,0.131842,0.100715,0.102017,...,0.060015,1.771402,0.048682,0.014493,0.009115,,,,,
HCAHeartST10238251_AAACGGGCGTACGGGT-1,0.064591,0.065265,0.062102,0.108766,0.041947,0.123384,0.094140,0.220553,0.191648,0.182115,...,0.256267,0.197216,0.166708,0.134696,0.089384,,,,,
HCAHeartST10238251_AAACGGTTGCGAACTG-1,0.055311,0.014334,0.030682,0.143198,0.029537,0.067491,0.072974,0.143014,0.126148,0.137224,...,0.102078,0.144617,0.108127,0.084996,0.111481,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,0.045017,0.040282,0.123358,0.073854,0.039566,0.053058,0.066211,0.109318,0.165586,0.116247,...,,,,,,,,,,
HCAHeartST13233999_TTGTTCTAGATACGCT-1,0.079810,0.283361,0.159237,0.048809,0.147295,0.030586,0.036565,0.057601,0.115652,0.109343,...,,,,,,,,,,
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,0.026541,0.021953,0.039694,0.058919,0.205768,0.049325,0.059216,0.097200,0.105765,0.054038,...,,,,,,,,,,
HCAHeartST13233999_TTGTTTCCATACAACT-1,0.043556,0.020803,0.097937,0.081409,0.023540,0.073456,0.067056,0.142086,0.210233,0.271397,...,,,,,,,,,,


In [50]:
max_columns = adata.obsm['means_cell_abundance_w_sf'].apply(lambda x: x[x == x.max()].index.tolist(), axis=1)

In [51]:
max_columns

spot_id
HCAHeartST10238251_AAACATTTCCCGGATT-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST10238251_AAACCTAAGCAGCCGG-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST10238251_AAACGAGACGGTTGAT-1           [meanscell_abundance_w_sf_FB3]
HCAHeartST10238251_AAACGGGCGTACGGGT-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST10238251_AAACGGTTGCGAACTG-1          [meanscell_abundance_w_sf_aCM4]
                                                         ...                  
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1    [meanscell_abundance_w_sf_SAN_P_cell]
HCAHeartST13233999_TTGTTCTAGATACGCT-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1          [meanscell_abundance_w_sf_aCM4]
HCAHeartST13233999_TTGTTTCCATACAACT-1           [meanscell_abundance_w_sf_FB3]
HCAHeartST13233999_TTGTTTGTGTAAATTC-1          [meanscell_abundance_w_sf_aCM4]
Length: 93788, dtype: object

In [52]:
columns_with_multiple_max = max_columns.apply(lambda x: [col for col in x if x.count(col)>1])

In [53]:
columns_with_multiple_max 

spot_id
HCAHeartST10238251_AAACATTTCCCGGATT-1    []
HCAHeartST10238251_AAACCTAAGCAGCCGG-1    []
HCAHeartST10238251_AAACGAGACGGTTGAT-1    []
HCAHeartST10238251_AAACGGGCGTACGGGT-1    []
HCAHeartST10238251_AAACGGTTGCGAACTG-1    []
                                         ..
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1    []
HCAHeartST13233999_TTGTTCTAGATACGCT-1    []
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1    []
HCAHeartST13233999_TTGTTTCCATACAACT-1    []
HCAHeartST13233999_TTGTTTGTGTAAATTC-1    []
Length: 93788, dtype: object

In [54]:
adata.obs['columns_with_multiple_max'] = columns_with_multiple_max

In [55]:
non_empty_columns = columns_with_multiple_max[columns_with_multiple_max .apply(lambda x: len(x) > 0)]

In [56]:
len(non_empty_columns )

0

In [57]:
max_columns = [col[0].replace('meanscell_abundance_w_sf_','') if len(col) > 0 else '' for col in max_columns]

In [58]:
adata.obs['highest_cell_Density_columns'] = max_columns

In [59]:
adata.obs['highest_cell_Density_columns']

spot_id
HCAHeartST10238251_AAACATTTCCCGGATT-1          aCM4
HCAHeartST10238251_AAACCTAAGCAGCCGG-1          aCM4
HCAHeartST10238251_AAACGAGACGGTTGAT-1           FB3
HCAHeartST10238251_AAACGGGCGTACGGGT-1          aCM4
HCAHeartST10238251_AAACGGTTGCGAACTG-1          aCM4
                                            ...    
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1    SAN_P_cell
HCAHeartST13233999_TTGTTCTAGATACGCT-1          aCM4
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1          aCM4
HCAHeartST13233999_TTGTTTCCATACAACT-1           FB3
HCAHeartST13233999_TTGTTTGTGTAAATTC-1          aCM4
Name: highest_cell_Density_columns, Length: 93788, dtype: object

In [60]:
list(adata.obs['highest_cell_Density_columns'].unique())

['aCM4',
 'FB3',
 'DC',
 'FB4_activated',
 'EC7_endocardial',
 'FB5',
 'LYVE1+TIMD4+MP',
 'SMC1_basic',
 'NC1_glial',
 'EC5_art',
 'SMC2_art',
 'CD4+T_Th2',
 'LYVE1+IGF1+MP',
 'EC6_ven',
 'EC3_cap',
 'Meso',
 'FB1',
 'CD16+Mo',
 'PC3_str',
 'Adip1',
 'NK_CD16hi',
 'vCM4',
 'B',
 'EC8_ln',
 'vCM1',
 'vCM3_stressed',
 'FB6',
 'PC2_atria',
 'EC2_cap',
 'EC10_CMC-like',
 'MoMP',
 'CD8+T_te',
 'T/NK_cycling',
 'MAIT-like',
 'LYVE1+MP_cycling',
 'Neut',
 'CD14+Mo',
 'FB2',
 'aCM3',
 'SAN_P_cell',
 'Adip2',
 'NC2_glial_NGF+',
 'Mast',
 'CD8+T_cytox',
 'AVN_P_cell',
 'aCM1',
 'aCM2',
 'AVN_bundle_cell',
 'Adip3',
 'B_plasma',
 'Adip4',
 'PC4_CMC-like',
 'EC4_immune',
 'CD4+T_naive',
 'aCM5',
 'ILC',
 'PC1_vent',
 'CD4+T_act',
 'NK_CD56hi',
 'CD8+T_trans']

In [61]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['highest_cell_Density_columns']

In [62]:
# create a dictionary of cell type and ontology term

In [63]:
mapping= {'aCM1': 'CL:0002129',
 'aCM2': 'CL:0002129',
 'aCM3': 'CL:0002129',
 'aCM4': 'CL:0002129',
 'aCM5': 'CL:0002129',
 'Adip1': 'CL:0000136',
 'Adip2': 'CL:0000136',
 'Adip3': 'CL:0000136',
 'Adip4': 'CL:0000136',
 'Adipocyte': 'CL:0000136',
 'Atrial Cardiomyocyte': 'CL:0002129',
 'AVN_bundle_cell': 'CL:0010005',
 'AVN_P_cell': 'CL:1000477',
 'B': 'CL:0000236',
 'B_plasma': 'CL:0000786',
 'CD14+Mo': 'CL:0001054',
 'CD16+Mo': 'CL:0002396',
 'CD4+T_act': 'CL:0000896',
 'CD4+T_naive': 'CL:0000895',
 'CD4+T_Th2': 'CL:0000546',
 'CD8+T_cytox': 'CL:0000794',
 'CD8+T_em': 'CL:0000913',
 'CD8+T_te': 'CL:0000625',
 'CD8+T_trans': 'CL:0000625',
 'DC': 'CL:0001056',
 'EC1_cap': 'CL:0002144',
 'EC10_CMC-like': 'CL:0000115',
 'EC2_cap': 'CL:0002144',
 'EC3_cap': 'CL:0002144',
 'EC4_immune': 'CL:0000115',
 'EC5_art': 'CL:1000413',
 'EC6_ven': 'CL:0002543',
 'EC7_atria': 'CL:0002350',
 'EC7_endocardial': 'CL:0002350',
 'EC8_ln': 'CL:0002138',
 'Endothelial cell': 'CL:0000115',
 'FB1': 'CL:0002548',
 'FB2': 'CL:0002548',
 'FB3': 'CL:0002548',
 'FB4': 'CL:0002548',
 'FB4_activated': 'CL:0002548',
 'FB5': 'CL:0002548',
 'FB6': 'CL:0002548',
 'Fibroblast': 'CL:0000057',
 'ILC': 'CL:0001065',
 'Lymphatic Endothelial cell': 'CL:0002138',
 'Lymphoid': 'CL:0000542',
 'LYVE1+IGF1+MP': 'CL:0000235',
 'LYVE1+MP_cycling': 'CL:0000235',
 'LYVE1+TIMD4+MP': 'CL:0000235',
 'MAIT-like': 'CL:0000940',
 'Mast': 'CL:0000097',
 'Mast cell': 'CL:0000097',
 'Meso': 'CL:0000077',
 'Mesothelial cell': 'CL:0000077',
 'MoMP': 'CL:0000576',
 'Mural cell': 'CL:0008034',
 'Myeloid': 'CL:0000763',
 'NC1': 'CL:0000125',
 'NC1_glial': 'CL:0000125',
 'NC2': 'CL:0000125',
 'NC2_glial_NGF+': 'CL:0000125',
 'Neural cell': 'CL:0002319',
 'Neut': 'CL:0000775',
 'NK_CD16hi': 'CL:0000939',
 'NK_CD56hi': 'CL:0000938',
 'PC1_vent': 'CL:0000669',
 'PC2_atria': 'CL:0000669',
 'PC3_str': 'CL:0000669',
 'PC4_CMC-like': 'CL:0000669',
 'SAN_P_cell': 'CL:1000477',
 'SMC1_basic': 'CL:0000192',
 'SMC2_art': 'CL:0002591',
 'T/NK_cycling': 'CL:0000814',
 'vCM1': 'CL:0002131',
 'vCM3_stressed': 'CL:0002131',
 'vCM4': 'CL:0002131',
 'Ventricular Cardiomyocyte': 'CL:0002131',
 'unclassified': 'CL:0000003'}

In [64]:
# add the cell_type_ontology_term_id column

In [65]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].map(mapping)

In [66]:
# change datatype of the column

In [67]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [68]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,facility,cell_or_nuclei,modality,kit_10x,flushed,region_cell2loc,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961,[],FB3,CL:0002548
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,Sanger,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],SAN_P_cell,CL:1000477
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,Sanger,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],FB3,CL:0002548


#### **donor_id**

In [69]:
#identify the column in adata.obs which provides donor information

In [70]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id'],
      dtype='object')

In [71]:
list(adata.obs['donor'].unique())

['D8', 'D3', 'AH1', 'A61', 'D5', 'D6', 'D7', 'AH2', 'AH6', 'AV14', 'AH5']

In [72]:
# add the donor_id column

In [73]:
adata.obs['donor_id'] = adata.obs['donor']

In [74]:
# change datatype of the column

In [75]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [76]:
# view unique values of donor_id column

In [77]:
list(adata.obs['donor_id'].unique())

['D8', 'D3', 'AH1', 'A61', 'D5', 'D6', 'D7', 'AH2', 'AH6', 'AV14', 'AH5']

In [78]:
#view obs

In [79]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,cell_or_nuclei,modality,kit_10x,flushed,region_cell2loc,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,na,Visium,Visium-v1,no,RA,EFO:0010961,[],FB3,CL:0002548,D8
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,na,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,na,Visium,Visium-v1,yes,SAN,EFO:0010961,[],FB3,CL:0002548,AH5


In [80]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id'],
      dtype='object')

#### **development_stage_ontology_term_id**

In [81]:
# identify the column in adata which corresponds to age

In [82]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id'],
      dtype='object')

In [83]:
list(adata.obs['age'].unique())

['45-50', '55-60', '70-75', '65-70', '60-65', '40-45', '50-55', '20-25']

In [84]:
# create a dictionary for age and development stage ontology term id

In [85]:
mapping= {'50-55':'HsapDv:0000240', 
          '55-60':'HsapDv:0000240', 
          '70-75':'HsapDv:0000242', 
          '65-70':'HsapDv:0000241', 
          '60-65':'HsapDv:0000241',
          '40-45':'HsapDv:0000239', 
          '45-50':'HsapDv:0000239', 
          '20-25':'HsapDv:0000237'}

In [86]:
# add the development_stage_ontology_term_id column

In [87]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['age'].map(mapping)

In [88]:
# change datatype of the column

In [89]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [90]:
# view unique values of development_stage_ontology_term_id column

In [91]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000239',
 'HsapDv:0000240',
 'HsapDv:0000242',
 'HsapDv:0000241',
 'HsapDv:0000237']

In [92]:
# view adata.obs

In [93]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,modality,kit_10x,flushed,region_cell2loc,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,Visium,Visium-v1,no,RA,EFO:0010961,[],FB3,CL:0002548,D8,HsapDv:0000239
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,Visium,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,Visium,Visium-v1,yes,SAN,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,Visium,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,Visium,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,Visium,Visium-v1,yes,SAN,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237


#### **disease_ontology_term_id**

In [94]:
# Assign normal since all are healthy patients

In [95]:
# add the disease_ontology_term_id column

In [96]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461']* len(adata.obs)

In [97]:
#change data type of column

In [98]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [99]:
# view obs

In [100]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,kit_10x,flushed,region_cell2loc,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,Visium-v1,no,RA,EFO:0010961,[],FB3,CL:0002548,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,Visium-v1,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,Visium-v1,yes,SAN,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,Visium-v1,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,Visium-v1,yes,SAN,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461


#### **is_primary_data**

In [101]:
adata.obs['is_primary_data'] = [False] * len(adata.obs)

In [102]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,flushed,region_cell2loc,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,no,RA,EFO:0010961,[],FB3,CL:0002548,D8,HsapDv:0000239,PATO:0000461,False
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,no,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,yes,SAN,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,yes,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,yes,SAN,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False


In [103]:
#change data type of column

In [104]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### **organism_ontology_term_id**

In [105]:
# assign organism id 

In [106]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [107]:
#change data type of column

In [108]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [109]:
# view obs

In [110]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,region_cell2loc,assay_ontology_term_id,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,RA,EFO:0010961,[],FB3,CL:0002548,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,RA,EFO:0010961,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,SAN,EFO:0010961,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,SAN,EFO:0010961,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,SAN,EFO:0010961,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [111]:
# create a dictionary of donor and ethinic_origin

In [112]:
mapping= {'D1': 'Caucasian',
 'D2': 'Caucasian',
 'D3': 'Caucasian',
 'D4': 'Caucasian',
 'D5': 'Caucasian',
 'D6': 'Caucasian',
 'D7': 'Caucasian',
 'D11': 'Caucasian',
 'H2': 'Caucasian',
 'H3': 'Asian',
 'H4': 'Caucasian',
 'H5': 'Caucasian',
 'H6': 'Caucasian',
 'H7': 'Caucasian',
 'A61': 'Caucasian',
 'AH1': 'Caucasian',
 'AH1-A61': 'unknown',
 'AH2': 'South Asian',
 'AV10': 'Caucasian',
 'AV13': 'Caucasian',
 'AV14': 'Caucasian',
 'AV3': 'Caucasian',
 'D8': 'Caucasian',
 'AH5': 'Caucasian',
 'AH6': 'Caucasian',
 'AV1': 'Caucasian'}

In [113]:
# add ethinic_origin column

In [114]:
adata.obs['ethinic_origin'] = adata.obs['donor'].map(mapping)

In [115]:
# create a dictionary of ethinic_origin and self_reported_ethnicity_ontology_term_id

In [116]:
mapping= {'Caucasian': 'HANCESTRO:0005',
 'Asian': 'HANCESTRO:0008',
 'unknown': 'unknown',
 'South Asian': 'HANCESTRO:0006'}

In [117]:
# add self_reported_ethnicity_ontology_term_id column

In [118]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['ethinic_origin'].map(mapping)

In [119]:
# change data type

In [120]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [121]:
# view obs

In [122]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,columns_with_multiple_max,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,[],FB3,CL:0002548,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,[],aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,[],SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,[],aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,[],FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005


#### **sex_ontology_term_id**

In [123]:
# identify the column in adata.obs which corresponds to sex

In [124]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

In [125]:
# list the unique values 

In [126]:
list(adata.obs['gender'].unique())

['Male', 'Female']

In [127]:
# create a dictionary of sex and sex ontology term id

In [128]:
mapping= {'Female': 'PATO:0000383', 'Male': 'PATO:0000384'}

In [129]:
# add sex_ontology_term_id column

In [130]:
adata.obs['sex_ontology_term_id'] = adata.obs['gender'].map(mapping)

In [131]:
# change data type

In [132]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [133]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,highest_cell_Density_columns,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,FB3,CL:0002548,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,aCM4,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,SAN_P_cell,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,aCM4,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,FB3,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383


#### **suspension_type**

In [134]:
# since visium suspension type is 'na'

In [135]:
adata.obs['suspension_type'] = ['na'] * len(adata.obs)

In [136]:
# change data type

In [137]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [138]:
# view obs

In [139]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,CL:0002548,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na


#### **tissue_ontology_term_id**

In [140]:
# identify the column in adata.obs which corresponds to tissue

In [141]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type'],
      dtype='object')

In [142]:
# list unique values

In [143]:
list(adata.obs['region'].unique())

['RA', 'LA', 'AX', 'RV', 'SP', 'SAN', 'AVN', 'LV']

In [144]:
# create a dictionary with tissue and corresponding tissue ontology term id

In [145]:
mapping= {'AX':'UBERON:0002098', 
          'LV':'UBERON:0002084', 
          'RV':'UBERON:0002080', 
          'LA':'UBERON:0002079', 
          'SP':'UBERON:0002094', 
          'RA':'UBERON:0002078', 
          'SAN':'UBERON:0002351', 
          'AVN':'UBERON:0002352'}

In [146]:
# add 'tissue_ontology_term_id' column

In [147]:
adata.obs['tissue_ontology_term_id'] = adata.obs['region'].map(mapping)

In [148]:
# change data type of column

In [149]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [150]:
#list the unique values in 'tissue_ontology_term_id' column

In [151]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0002078',
 'UBERON:0002079',
 'UBERON:0002098',
 'UBERON:0002080',
 'UBERON:0002094',
 'UBERON:0002351',
 'UBERON:0002352',
 'UBERON:0002084']

In [152]:
# view obs

In [153]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,...,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,ethinic_origin,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,61,97,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390066,53.843720,...,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,65,83,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322442,56.491831,...,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,35,79,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802514,...,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,65,91,HCAHeartST10238251,1511,7.321189,4116.0,8.322880,49.781341,55.782313,...,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,67,59,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702101,...,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,24,64,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971311,40.056509,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,21,3,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497817,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,31,77,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238470,46.228102,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,45,27,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407407,42.661180,...,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,Caucasian,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351


In [154]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes',
       'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac',
       'n_counts', 'n_genes', 'annotation_JC', 'sangerID', 'Publication',
       'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age',
       'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x',
       'flushed', 'region_cell2loc', 'assay_ontology_term_id',
       'columns_with_multiple_max', 'highest_cell_Density_columns',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id', 'ethinic_origin',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [155]:
list(adata.obs['combinedID'].unique())

['na']

In [156]:
del adata.obs['donor']
del adata.obs['gender']
del adata.obs['highest_cell_Density_columns']
del adata.obs['columns_with_multiple_max']
del adata.obs['array_row']
del adata.obs['array_col']
del adata.obs['modality']
del adata.obs['kit_10x']
del adata.obs['ethinic_origin']
del adata.obs['sangerID']
del adata.obs['combinedID']
del adata.obs['region']
del adata.obs['cell_or_nuclei']
del adata.obs['region_cell2loc']

In [157]:
adata.obs.columns

Index(['in_tissue', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
       'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes',
       'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes',
       'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes',
       'annotation_JC', 'Publication', 'donor_type', 'region_finest', 'age',
       'facility', 'flushed', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

#### **obsm (Embeddings)**

In [158]:
adata.obsm

AxisArrays with keys: MT, means_cell_abundance_w_sf, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf

In [159]:
adata.obsm.keys()

KeysView(AxisArrays with keys: MT, means_cell_abundance_w_sf, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf)

In [160]:
adata.obsm['means_cell_abundance_w_sf']= adata.obsm['means_cell_abundance_w_sf'].values

In [161]:
adata.obsm['stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf'].values

In [162]:
adata.obsm['q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf'].values

In [163]:
adata.obsm['q95_cell_abundance_w_sf']= adata.obsm['q95_cell_abundance_w_sf'].values

In [164]:
adata.obsm['X_means_cell_abundance_w_sf'] = adata.obsm['means_cell_abundance_w_sf']
adata.obsm['X_q05_cell_abundance_w_sf'] = adata.obsm['q05_cell_abundance_w_sf']
adata.obsm['X_q95_cell_abundance_w_sf'] = adata.obsm['q95_cell_abundance_w_sf']
adata.obsm['X_stds_cell_abundance_w_sf'] = adata.obsm['stds_cell_abundance_w_sf']
adata.obsm['X_spatial'] = adata.obsm['spatial']

In [165]:
adata.obsm

AxisArrays with keys: MT, means_cell_abundance_w_sf, q05_cell_abundance_w_sf, q95_cell_abundance_w_sf, spatial, stds_cell_abundance_w_sf, X_means_cell_abundance_w_sf, X_q05_cell_abundance_w_sf, X_q95_cell_abundance_w_sf, X_stds_cell_abundance_w_sf, X_spatial

In [166]:
del adata.obsm['MT']
del adata.obsm['means_cell_abundance_w_sf']
del adata.obsm['q05_cell_abundance_w_sf']
del adata.obsm['q95_cell_abundance_w_sf']
del adata.obsm['stds_cell_abundance_w_sf']
del adata.obsm['spatial']

In [167]:
adata.obsm

AxisArrays with keys: X_means_cell_abundance_w_sf, X_q05_cell_abundance_w_sf, X_q95_cell_abundance_w_sf, X_stds_cell_abundance_w_sf, X_spatial

#### **uns (Dataset Metadata)**

In [168]:
adata.uns

OverloadedDict, wrapping:
	{'spatial': {'HCAHeartST10238251': {'images': {'hires': array([[[0.93333334, 0.9411765 , 0.9529412 ],
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9411765 , 0.9490196 ],
        ...,
        [0.93333334, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9372549 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9490196 ]],

       [[0.92941177, 0.9411765 , 0.9529412 ],
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9411765 , 0.9529412 ],
        ...,
        [0.93333334, 0.9411765 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9490196 ]],

       [[0.92941177, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.93333334, 0.9411765 , 0.9490196 ],
        ...,
        [0.93333334, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9411765 , 0.9490196 ],
        [0.92941177, 0.9372549 , 0.9490196 ]],

       ...,

       [[0.92941177, 0.9411765 

In [169]:
adata.uns['image_caption'] = 'Shown here is an image of 10 μm thick slice of the adult human heart stained with H&E'

In [170]:
adata.uns['title'] = 'Visium spatial - OCT'

In [171]:
adata.uns['default_embedding'] = 'X_spatial'

In [172]:
adata.uns.keys()

dict_keys(['spatial', 'image_caption', 'title', 'default_embedding'])

### **Final checks and adjustments**

In [173]:
adata

AnnData object with n_obs × n_vars = 93788 × 33234
    obs: 'in_tissue', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes', 'annotation_JC', 'Publication', 'donor_type', 'region_finest', 'age', 'facility', 'flushed', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'donor_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'tissue_ontology_term_id'
    var: 'feature_is_filtered'
    uns: 'spatial', 'image_caption', 'title', 'default_embedding'
    obsm: 'X_means_cell_abundance_w_sf', 'X_q05_cell_abundance_w_sf', 'X_q95_cell_abundance_w_sf', 'X_stds_cell_abundance_w_sf', 'X_spatial'

In [174]:
adata.obs.dtypes

in_tissue                                      int64
sample                                      category
n_genes_by_counts                              int32
log1p_n_genes_by_counts                      float64
total_counts                                 float32
log1p_total_counts                           float32
pct_counts_in_top_50_genes                   float64
pct_counts_in_top_100_genes                  float64
pct_counts_in_top_200_genes                  float64
pct_counts_in_top_500_genes                  float64
mt_frac                                      float32
n_counts                                     float32
n_genes                                        int64
annotation_JC                               category
Publication                                 category
donor_type                                  category
region_finest                               category
age                                         category
facility                                    ca

In [175]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [176]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed log1p_n_genes_by_counts from float64 to float32
changed pct_counts_in_top_50_genes from float64 to float32
changed pct_counts_in_top_100_genes from float64 to float32
changed pct_counts_in_top_200_genes from float64 to float32
changed pct_counts_in_top_500_genes from float64 to float32
changed in_tissue from int64 to int32
changed n_genes from int64 to int32


In [177]:
adata.obs

Unnamed: 0_level_0,in_tissue,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390068,53.843719,61.689339,76.210060,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322441,56.491833,64.251938,77.042130,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802513,52.646847,69.954071,...,CL:0002548,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,HCAHeartST10238251,1511,7.321188,4116.0,8.322880,49.781342,55.782314,63.289600,75.437317,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702099,65.531509,78.580521,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971310,40.056511,50.228210,65.550964,...,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497818,63.871906,77.350800,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238468,46.228100,55.362888,69.646049,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407406,42.661179,54.423870,71.810699,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351


In [178]:
adata.obs.columns

Index(['in_tissue', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
       'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes',
       'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes',
       'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes',
       'annotation_JC', 'Publication', 'donor_type', 'region_finest', 'age',
       'facility', 'flushed', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [179]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [180]:
adata.obs

Unnamed: 0_level_0,in_tissue,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,...,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeartST10238251_AAACATTTCCCGGATT-1,1,HCAHeartST10238251,1252,7.133296,3161.0,8.058960,47.390068,53.843719,61.689339,76.210060,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACCTAAGCAGCCGG-1,1,HCAHeartST10238251,1568,7.358194,4652.0,8.445268,50.322441,56.491833,64.251938,77.042130,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGAGACGGTTGAT-1,1,HCAHeartST10238251,1689,7.432484,4137.0,8.327968,31.182016,40.802513,52.646847,69.954071,...,CL:0002548,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGGGCGTACGGGT-1,1,HCAHeartST10238251,1511,7.321188,4116.0,8.322880,49.781342,55.782314,63.289600,75.437317,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
HCAHeartST10238251_AAACGGTTGCGAACTG-1,1,HCAHeartST10238251,1173,7.068172,3142.0,8.052933,51.750477,57.702099,65.531509,78.580521,...,CL:0002129,D8,HsapDv:0000239,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000384,na,UBERON:0002078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST13233999_TTGTTCAGTGTGCTAC-1,1,HCAHeartST13233999,1979,7.590852,4601.0,8.434246,31.971310,40.056511,50.228210,65.550964,...,CL:1000477,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTCTAGATACGCT-1,1,HCAHeartST13233999,1278,7.153834,3435.0,8.142063,46.841339,54.497818,63.871906,77.350800,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTGTGTGTCAAGA-1,1,HCAHeartST13233999,2057,7.629490,5594.0,8.629628,39.238468,46.228100,55.362888,69.646049,...,CL:0002129,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351
HCAHeartST13233999_TTGTTTCCATACAACT-1,1,HCAHeartST13233999,1322,7.187657,2916.0,7.978311,32.407406,42.661179,54.423870,71.810699,...,CL:0002548,AH5,HsapDv:0000237,PATO:0000461,False,NCBITaxon:9606,HANCESTRO:0005,PATO:0000383,na,UBERON:0002351


In [181]:
adata.obs.columns

Index(['in_tissue', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
       'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes',
       'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes',
       'pct_counts_in_top_500_genes', 'mt_frac', 'n_counts', 'n_genes',
       'annotation_JC', 'Publication', 'donor_type', 'region_finest', 'age',
       'facility', 'flushed', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [182]:
#check the format of expression matrix

In [183]:
adata.X

<93788x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 147669634 stored elements in Compressed Sparse Row format>

In [184]:
#write the curated object to final_objects folder

In [185]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/heart_cell_atlas/final_objects/visium_OCT.h5ad', compression = 'gzip')