### **Curating visium_slide4.h5ad**

Article:   A human embryonic limb cell atlas resolved in space and time

DOI: https://doi.org/10.1038/s41586-023-06806-x

Data Source : https://developmental.cellatlas.io/embryonic-limb

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Data/New_data/visium_slide4.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 2884 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'barcode', 'library_id', 'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal', 'C2L: ArterialEndo', 'C2L: ArtiChon', 'C2L: B', 'C2L: Basal', 'C2L: CMP/GMP', 'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro', 'C2L: DefReticulo', 'C2L: DermFibro', 'C2L: DistalMes', 'C2L: F10+DermFibroProg', 'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon', 'C2L: ISL1+Mes', 'C2L: InterMusFibro', 'C2L: InterZone', 'C2L: LMPP/ELP', 'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC', 'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo', 'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4', 'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1', 'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro', 'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg', 'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Periderm', 'C2L: Perimysium', 'C2L: Prehyper

##### **X- expression matrix**

In [7]:
# View the expression matrix of the anndata object

In [8]:
adata.X

<2884x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 3666181 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 21)	1.0
  (0, 29)	2.0
  (0, 38)	1.0
  (0, 48)	1.0
  (0, 51)	1.0
  (0, 53)	1.0
  (0, 66)	1.0
  (0, 69)	1.0
  (0, 76)	1.0
  (0, 114)	1.0
  (0, 122)	1.0
  (0, 130)	1.0
  (0, 154)	17.0
  (0, 160)	1.0
  (0, 166)	1.0
  (0, 167)	1.0
  (0, 190)	1.0
  (0, 210)	1.0
  (0, 229)	2.0
  (0, 240)	2.0
  (0, 244)	1.0
  (0, 259)	1.0
  (0, 264)	1.0
  (0, 346)	1.0
  (0, 378)	1.0
  :	:
  (2883, 32860)	1.0
  (2883, 32877)	1.0
  (2883, 32883)	1.0
  (2883, 32962)	1.0
  (2883, 33242)	1.0
  (2883, 33249)	1.0
  (2883, 33277)	1.0
  (2883, 33279)	1.0
  (2883, 33313)	1.0
  (2883, 33326)	2.0
  (2883, 33334)	1.0
  (2883, 33394)	1.0
  (2883, 33396)	1.0
  (2883, 33412)	1.0
  (2883, 33445)	2.0
  (2883, 33479)	2.0
  (2883, 33487)	2.0
  (2883, 33496)	5.0
  (2883, 33498)	1.0
  (2883, 33499)	3.0
  (2883, 33501)	8.0
  (2883, 33502)	7.0
  (2883, 33503)	4.0
  (2883, 33505)	7.0
  (2883, 33508)	2.0


##### **Variables(var)**

In [11]:
#View the var of anndata and raw object

In [12]:
adata.var

Unnamed: 0_level_0,feature_types,genome,ensg_ids
SYMBOL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MIR1302-2HG,Gene Expression,GRCh38,ENSG00000243485
FAM138A,Gene Expression,GRCh38,ENSG00000237613
OR4F5,Gene Expression,GRCh38,ENSG00000186092
AL627309.1,Gene Expression,GRCh38,ENSG00000238009
AL627309.3,Gene Expression,GRCh38,ENSG00000239945
...,...,...,...
AC233755.2,Gene Expression,GRCh38,ENSG00000277856
AC233755.1,Gene Expression,GRCh38,ENSG00000275063
AC240274.1,Gene Expression,GRCh38,ENSG00000271254
AC213203.1,Gene Expression,GRCh38,ENSG00000277475


In [13]:
#Store ensembl ids in a new column in adata.var by matching gene symbols and ensembl ids from the gene information file

In [14]:
adata.var['gene_symbols'] = adata.var_names

In [15]:
adata.var_names = adata.var['ensg_ids']

In [16]:
adata.var

Unnamed: 0_level_0,feature_types,genome,ensg_ids,gene_symbols
ensg_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,Gene Expression,GRCh38,ENSG00000243485,MIR1302-2HG
ENSG00000237613,Gene Expression,GRCh38,ENSG00000237613,FAM138A
ENSG00000186092,Gene Expression,GRCh38,ENSG00000186092,OR4F5
ENSG00000238009,Gene Expression,GRCh38,ENSG00000238009,AL627309.1
ENSG00000239945,Gene Expression,GRCh38,ENSG00000239945,AL627309.3
...,...,...,...,...
ENSG00000277856,Gene Expression,GRCh38,ENSG00000277856,AC233755.2
ENSG00000275063,Gene Expression,GRCh38,ENSG00000275063,AC233755.1
ENSG00000271254,Gene Expression,GRCh38,ENSG00000271254,AC240274.1
ENSG00000277475,Gene Expression,GRCh38,ENSG00000277475,AC213203.1


In [17]:
# load the approved genes file

In [18]:
approved_genes = pd.read_csv('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/Endometrium_reference_integrated_atlas/genes_approved.csv')

In [19]:
# Create a dictionary from the approved genes file using the symbols and feature id columns.

In [20]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [21]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [22]:
len(genedict)

119799

In [23]:
# Filter out the genes which are not in the approved genes file

In [24]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]

In [25]:
len(var_to_keep_adata)

33137

In [26]:
# Modify the anndata object by filtering out the filtered genes. copy the index column values to a new column called gene_symbols

In [27]:
adata = adata[:, var_to_keep_adata].copy()

In [28]:
#  View the var

In [29]:
adata.var

Unnamed: 0_level_0,feature_types,genome,ensg_ids,gene_symbols
ensg_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,Gene Expression,GRCh38,ENSG00000243485,MIR1302-2HG
ENSG00000237613,Gene Expression,GRCh38,ENSG00000237613,FAM138A
ENSG00000186092,Gene Expression,GRCh38,ENSG00000186092,OR4F5
ENSG00000238009,Gene Expression,GRCh38,ENSG00000238009,AL627309.1
ENSG00000239945,Gene Expression,GRCh38,ENSG00000239945,AL627309.3
...,...,...,...,...
ENSG00000277856,Gene Expression,GRCh38,ENSG00000277856,AC233755.2
ENSG00000275063,Gene Expression,GRCh38,ENSG00000275063,AC233755.1
ENSG00000271254,Gene Expression,GRCh38,ENSG00000271254,AC240274.1
ENSG00000277475,Gene Expression,GRCh38,ENSG00000277475,AC213203.1


feature is filtered

In [30]:
# view var

In [31]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [32]:
adata.var

Unnamed: 0_level_0,feature_types,genome,ensg_ids,gene_symbols,feature_is_filtered
ensg_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000243485,Gene Expression,GRCh38,ENSG00000243485,MIR1302-2HG,False
ENSG00000237613,Gene Expression,GRCh38,ENSG00000237613,FAM138A,False
ENSG00000186092,Gene Expression,GRCh38,ENSG00000186092,OR4F5,False
ENSG00000238009,Gene Expression,GRCh38,ENSG00000238009,AL627309.1,False
ENSG00000239945,Gene Expression,GRCh38,ENSG00000239945,AL627309.3,False
...,...,...,...,...,...
ENSG00000277856,Gene Expression,GRCh38,ENSG00000277856,AC233755.2,False
ENSG00000275063,Gene Expression,GRCh38,ENSG00000275063,AC233755.1,False
ENSG00000271254,Gene Expression,GRCh38,ENSG00000271254,AC240274.1,False
ENSG00000277475,Gene Expression,GRCh38,ENSG00000277475,AC213203.1,False


In [33]:
del adata.var['ensg_ids']
del adata.var['gene_symbols']

#### **Observations(obs) (Cell metadata)**

In [34]:
#view obs

In [35]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: SMC,C2L: SMProg,C2L: STMN2+Fibro,C2L: Schwann,C2L: SchwannProg,C2L: SynapSchwann,C2L: Teno,C2L: TenoProg,C2L: TransMes,C2L: VenousEndo
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.135793,0.088756,0.010351,0.011605,0.011769,0.007822,0.034527,0.006111,0.013211,0.133460
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.006821,0.000908,0.002018,0.000799,0.000975,0.004365,2.598514,0.014163,0.006748,0.000287
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.152286,0.024667,0.024501,0.017384,0.006300,0.010542,0.124252,0.009752,0.012791,0.019356
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.609848,0.305564,0.022923,0.072731,0.031010,0.020083,0.142549,0.017242,0.025883,0.809012
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.062173,0.022973,0.004813,0.008480,0.007687,0.008087,0.023354,0.002875,0.005616,0.035984
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,10.768473,0.575757,0.015881,0.103607,0.043263,0.012829,0.018054,0.009941,0.027356,1.095297
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.121389,0.050335,0.032287,0.022685,0.012596,0.016812,0.219800,0.019495,0.022607,0.077236
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.005470,0.001746,0.002808,0.000849,0.000625,0.000774,0.183933,0.001021,0.002328,0.002279
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.012821,0.006812,0.009767,0.003383,0.003804,0.005791,0.036483,0.003499,0.009625,0.002990


In [36]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'barcode', 'library_id',
       'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal', 'C2L: ArterialEndo',
       'C2L: ArtiChon', 'C2L: B', 'C2L: Basal', 'C2L: CMP/GMP',
       'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro', 'C2L: DefReticulo',
       'C2L: DermFibro', 'C2L: DistalMes', 'C2L: F10+DermFibroProg',
       'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon', 'C2L: ISL1+Mes',
       'C2L: InterMusFibro', 'C2L: InterZone', 'C2L: LMPP/ELP',
       'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC',
       'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo',
       'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4',
       'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1',
       'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro',
       'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg',
       'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Perid

#### **assay_ontology_term_id**

In [37]:
# add the assay_ontology_term_id column

In [38]:
adata.obs['assay_ontology_term_id'] = ['EFO:0010961'] * len(adata.obs)

In [39]:
# change datatype of the column

In [40]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [41]:
# view adata.obs

In [42]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: SMProg,C2L: STMN2+Fibro,C2L: Schwann,C2L: SchwannProg,C2L: SynapSchwann,C2L: Teno,C2L: TenoProg,C2L: TransMes,C2L: VenousEndo,assay_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.088756,0.010351,0.011605,0.011769,0.007822,0.034527,0.006111,0.013211,0.133460,EFO:0010961
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.000908,0.002018,0.000799,0.000975,0.004365,2.598514,0.014163,0.006748,0.000287,EFO:0010961
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.024667,0.024501,0.017384,0.006300,0.010542,0.124252,0.009752,0.012791,0.019356,EFO:0010961
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.305564,0.022923,0.072731,0.031010,0.020083,0.142549,0.017242,0.025883,0.809012,EFO:0010961
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.022973,0.004813,0.008480,0.007687,0.008087,0.023354,0.002875,0.005616,0.035984,EFO:0010961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,0.575757,0.015881,0.103607,0.043263,0.012829,0.018054,0.009941,0.027356,1.095297,EFO:0010961
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.050335,0.032287,0.022685,0.012596,0.016812,0.219800,0.019495,0.022607,0.077236,EFO:0010961
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.001746,0.002808,0.000849,0.000625,0.000774,0.183933,0.001021,0.002328,0.002279,EFO:0010961
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.006812,0.009767,0.003383,0.003804,0.005791,0.036483,0.003499,0.009625,0.002990,EFO:0010961


#### **cell_type_ontology_term_id**

In [43]:
#get the column in adata.obs related. to cell type annotation

In [44]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'barcode', 'library_id',
       'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal', 'C2L: ArterialEndo',
       'C2L: ArtiChon', 'C2L: B', 'C2L: Basal', 'C2L: CMP/GMP',
       'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro', 'C2L: DefReticulo',
       'C2L: DermFibro', 'C2L: DistalMes', 'C2L: F10+DermFibroProg',
       'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon', 'C2L: ISL1+Mes',
       'C2L: InterMusFibro', 'C2L: InterZone', 'C2L: LMPP/ELP',
       'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC',
       'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo',
       'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4',
       'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1',
       'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro',
       'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg',
       'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Perid

In [45]:
adata.obsm

AxisArrays with keys: X_spatial

In [46]:
c2l_columns = [col for col in adata.obs.columns if col.startswith('C2L')]

In [47]:
c2l_columns

['C2L: ADH+Fibro',
 'C2L: AER-Basal',
 'C2L: ArterialEndo',
 'C2L: ArtiChon',
 'C2L: B',
 'C2L: Basal',
 'C2L: CMP/GMP',
 'C2L: ChondroProg',
 'C2L: DC2',
 'C2L: DefErythro',
 'C2L: DefReticulo',
 'C2L: DermFibro',
 'C2L: DistalMes',
 'C2L: F10+DermFibroProg',
 'C2L: HOXC5+DermFibroProg',
 'C2L: HyperChon',
 'C2L: ISL1+Mes',
 'C2L: InterMusFibro',
 'C2L: InterZone',
 'C2L: LMPP/ELP',
 'C2L: LymphEndo',
 'C2L: MFAP5+Fibro',
 'C2L: MYH3+MyoC',
 'C2L: MYL3+MyoC',
 'C2L: Macro',
 'C2L: Mast',
 'C2L: Megakaryo',
 'C2L: Melano',
 'C2L: Mes1',
 'C2L: Mes2',
 'C2L: Mes3',
 'C2L: Mes4',
 'C2L: MesCond',
 'C2L: Monocyte',
 'C2L: Myelocyte',
 'C2L: MyoB1',
 'C2L: MyoB2',
 'C2L: MyoC1',
 'C2L: MyoC2',
 'C2L: NK',
 'C2L: NeuralFibro',
 'C2L: Neuronal',
 'C2L: OCP',
 'C2L: OsteoB',
 'C2L: PAX3+MyoProg',
 'C2L: PAX7+MyoProg',
 'C2L: Perichon',
 'C2L: Pericyte',
 'C2L: Periderm',
 'C2L: Perimysium',
 'C2L: PrehyperChon',
 'C2L: PrimErythro1',
 'C2L: PrimErythro2',
 'C2L: ProlifChon',
 'C2L: ProxMes',


In [48]:
adata.obs['max_c2l_column'] = adata.obs[c2l_columns].idxmax(axis=1)

In [49]:
adata.obs['max_c2l_column_value'] = adata.obs[c2l_columns].max(axis=1)

In [50]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: Schwann,C2L: SchwannProg,C2L: SynapSchwann,C2L: Teno,C2L: TenoProg,C2L: TransMes,C2L: VenousEndo,assay_ontology_term_id,max_c2l_column,max_c2l_column_value
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.011605,0.011769,0.007822,0.034527,0.006111,0.013211,0.133460,EFO:0010961,C2L: MYL3+MyoC,14.048070
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.000799,0.000975,0.004365,2.598514,0.014163,0.006748,0.000287,EFO:0010961,C2L: RestingChon,3.953318
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.017384,0.006300,0.010542,0.124252,0.009752,0.012791,0.019356,EFO:0010961,C2L: MFAP5+Fibro,8.850656
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.072731,0.031010,0.020083,0.142549,0.017242,0.025883,0.809012,EFO:0010961,C2L: MYL3+MyoC,8.408436
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.008480,0.007687,0.008087,0.023354,0.002875,0.005616,0.035984,EFO:0010961,C2L: MYL3+MyoC,8.520630
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,0.103607,0.043263,0.012829,0.018054,0.009941,0.027356,1.095297,EFO:0010961,C2L: SMC,10.768473
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.022685,0.012596,0.016812,0.219800,0.019495,0.022607,0.077236,EFO:0010961,C2L: MFAP5+Fibro,6.152310
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.000849,0.000625,0.000774,0.183933,0.001021,0.002328,0.002279,EFO:0010961,C2L: Teno,0.183933
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.003383,0.003804,0.005791,0.036483,0.003499,0.009625,0.002990,EFO:0010961,C2L: MYL3+MyoC,5.594432


In [51]:
list(adata.obs['max_c2l_column'].unique())

['C2L: MYL3+MyoC',
 'C2L: RestingChon',
 'C2L: MFAP5+Fibro',
 'C2L: Teno',
 'C2L: F10+DermFibroProg',
 'C2L: ArterialEndo',
 'C2L: HyperChon',
 'C2L: MYH3+MyoC',
 'C2L: DC2',
 'C2L: Perimysium',
 'C2L: HOXC5+DermFibroProg',
 'C2L: SMC',
 'C2L: DermFibro',
 'C2L: DefReticulo',
 'C2L: ArtiChon',
 'C2L: Schwann',
 'C2L: Periderm',
 'C2L: PAX7+MyoProg',
 'C2L: Pericyte',
 'C2L: NeuralFibro',
 'C2L: Macro',
 'C2L: VenousEndo',
 'C2L: OsteoB',
 'C2L: Megakaryo',
 'C2L: MesCond',
 'C2L: ChondroProg',
 'C2L: Basal',
 'C2L: LymphEndo',
 'C2L: SchwannProg',
 'C2L: DistalMes']

In [52]:
import pandas as pd

# Assuming adata.obs['col'] is a pandas Series object
adata.obs['max_c2l_column'] = adata.obs['max_c2l_column'].fillna('NaN').astype(str)


In [53]:
list(adata.obs['max_c2l_column'].unique())

['C2L: MYL3+MyoC',
 'C2L: RestingChon',
 'C2L: MFAP5+Fibro',
 'C2L: Teno',
 'C2L: F10+DermFibroProg',
 'C2L: ArterialEndo',
 'C2L: HyperChon',
 'C2L: MYH3+MyoC',
 'C2L: DC2',
 'C2L: Perimysium',
 'C2L: HOXC5+DermFibroProg',
 'C2L: SMC',
 'C2L: DermFibro',
 'C2L: DefReticulo',
 'C2L: ArtiChon',
 'C2L: Schwann',
 'C2L: Periderm',
 'C2L: PAX7+MyoProg',
 'C2L: Pericyte',
 'C2L: NeuralFibro',
 'C2L: Macro',
 'C2L: VenousEndo',
 'C2L: OsteoB',
 'C2L: Megakaryo',
 'C2L: MesCond',
 'C2L: ChondroProg',
 'C2L: Basal',
 'C2L: LymphEndo',
 'C2L: SchwannProg',
 'C2L: DistalMes']

In [54]:
mapping= {'C2L: Teno' :'CL:0000388',
 'C2L: SMC':'CL:0000192',
 'C2L: F10+DermFibroProg':'CL:0002551',
 'C2L: MYL3+MyoC':'CL:0000187',
 'C2L: HOXC5+DermFibroProg':'CL:0002551',
 'C2L: MFAP5+Fibro':'CL:0000057',
 'C2L: Pericyte':'CL:0000669',
 'C2L: HyperChon':'CL:0000743',
 'C2L: MYH3+MyoC':'CL:0000187',
 'C2L: Periderm':'CL:0000078',
 'C2L: Perimysium':'CL:0002320',
 'C2L: DermFibro':'CL:0002551',
 'C2L: Schwann':'CL:0002573',
 'C2L: ADH+Fibro':'CL:1001609',
 'C2L: RestingChon':'CL:0000138',
 'C2L: MesCond':'CL:0000138',
 'C2L: DefErythro':'CL:0000232',
 'C2L: ArtiChon':'CL:1001607',
 'C2L: Basal':'CL:0000646',
 'C2L: PAX7+MyoProg':'CL:0000187',
 'C2L: Macro':'CL:0000235',
 'C2L: ArterialEndo':'CL:1000413',
 'C2L: DefReticulo':'CL:0000558',
 'C2L: VenousEndo':'CL:0002543',
 'C2L: OsteoB':'CL:0000062',
 'C2L: NeuralFibro':'CL:0000057',
 'C2L: PrimErythro1':'CL:0002355',
 'C2L: AER-Basal':'CL:0000646',
 'C2L: ChondroProg':'CL:0000138',
 'C2L: SMProg':'CL:0000192',
 'NaN':'unknown',
 'C2L: DistalMes':'CL:0008019',
 'C2L: Neuronal':'CL:0000540',
 'C2L: PrehyperChon':'CL:0000138', 
 'C2L: Megakaryo' :'CL:0000556',
 'C2L: LymphEndo' : 'CL:0002138',
 'C2L: Mes4':'CL:0008019',
 'C2L: STMN2+Fibro':'CL:0002551',
 'C2L: InterZone':'CL:0008019',
 'C2L: SynapSchwann':'CL:0002573',
 'C2L: DC2':'CL:0000990',
 'C2L: SchwannProg':'CL:0002573',
 'C2L: PAX3+MyoProg':'CL:0000515',
 'C2L: Mes3':'CL:0008019',
 'C2L: Perichon':'CL:0000058',
 'C2L: TenoProg':'CL:0000388',
 'C2L: ProlifChon':'CL:0000138',
 'C2L: RDH10+DistalMes':'CL:0008019',
 'C2L: PrimErythro2':'CL:0002355',
 'C2L: TransMes':'CL:0008019',
 'C2L: Mes2':'CL:0008019'}

In [55]:
# create a dictionary of cell type and ontology term

In [56]:
# add the cell_type_ontology_term_id column

In [57]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['max_c2l_column'].map(mapping)

In [58]:
# change datatype of the column

In [59]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [60]:
list(adata.obs['cell_type_ontology_term_id'].unique())

['CL:0000187',
 'CL:0000138',
 'CL:0000057',
 'CL:0000388',
 'CL:0002551',
 'CL:1000413',
 'CL:0000743',
 'CL:0000990',
 'CL:0002320',
 'CL:0000192',
 'CL:0000558',
 'CL:1001607',
 'CL:0002573',
 'CL:0000078',
 'CL:0000669',
 'CL:0000235',
 'CL:0002543',
 'CL:0000062',
 'CL:0000556',
 'CL:0000646',
 'CL:0002138',
 'CL:0008019']

In [61]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: SchwannProg,C2L: SynapSchwann,C2L: Teno,C2L: TenoProg,C2L: TransMes,C2L: VenousEndo,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.011769,0.007822,0.034527,0.006111,0.013211,0.133460,EFO:0010961,C2L: MYL3+MyoC,14.048070,CL:0000187
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.000975,0.004365,2.598514,0.014163,0.006748,0.000287,EFO:0010961,C2L: RestingChon,3.953318,CL:0000138
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.006300,0.010542,0.124252,0.009752,0.012791,0.019356,EFO:0010961,C2L: MFAP5+Fibro,8.850656,CL:0000057
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.031010,0.020083,0.142549,0.017242,0.025883,0.809012,EFO:0010961,C2L: MYL3+MyoC,8.408436,CL:0000187
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.007687,0.008087,0.023354,0.002875,0.005616,0.035984,EFO:0010961,C2L: MYL3+MyoC,8.520630,CL:0000187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,0.043263,0.012829,0.018054,0.009941,0.027356,1.095297,EFO:0010961,C2L: SMC,10.768473,CL:0000192
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.012596,0.016812,0.219800,0.019495,0.022607,0.077236,EFO:0010961,C2L: MFAP5+Fibro,6.152310,CL:0000057
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.000625,0.000774,0.183933,0.001021,0.002328,0.002279,EFO:0010961,C2L: Teno,0.183933,CL:0000388
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.003804,0.005791,0.036483,0.003499,0.009625,0.002990,EFO:0010961,C2L: MYL3+MyoC,5.594432,CL:0000187


#### **donor_id**

In [62]:
#identify the column in adata.obs which provides donor information

In [63]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'barcode', 'library_id',
       'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal', 'C2L: ArterialEndo',
       'C2L: ArtiChon', 'C2L: B', 'C2L: Basal', 'C2L: CMP/GMP',
       'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro', 'C2L: DefReticulo',
       'C2L: DermFibro', 'C2L: DistalMes', 'C2L: F10+DermFibroProg',
       'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon', 'C2L: ISL1+Mes',
       'C2L: InterMusFibro', 'C2L: InterZone', 'C2L: LMPP/ELP',
       'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC',
       'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo',
       'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4',
       'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1',
       'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro',
       'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg',
       'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Perid

In [64]:
list(adata.obs['library_id'].unique())

['WSSS_THYst9383362']

In [65]:
# add the donor_id column

In [66]:
adata.obs['donor_id'] = adata.obs['library_id']

In [67]:
# change datatype of the column

In [68]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [69]:
# view unique values of donor_id column

In [70]:
list(adata.obs['donor_id'].unique())

['WSSS_THYst9383362']

In [71]:
#view obs

In [72]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: SynapSchwann,C2L: Teno,C2L: TenoProg,C2L: TransMes,C2L: VenousEndo,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.007822,0.034527,0.006111,0.013211,0.133460,EFO:0010961,C2L: MYL3+MyoC,14.048070,CL:0000187,WSSS_THYst9383362
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.004365,2.598514,0.014163,0.006748,0.000287,EFO:0010961,C2L: RestingChon,3.953318,CL:0000138,WSSS_THYst9383362
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.010542,0.124252,0.009752,0.012791,0.019356,EFO:0010961,C2L: MFAP5+Fibro,8.850656,CL:0000057,WSSS_THYst9383362
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.020083,0.142549,0.017242,0.025883,0.809012,EFO:0010961,C2L: MYL3+MyoC,8.408436,CL:0000187,WSSS_THYst9383362
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.008087,0.023354,0.002875,0.005616,0.035984,EFO:0010961,C2L: MYL3+MyoC,8.520630,CL:0000187,WSSS_THYst9383362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,0.012829,0.018054,0.009941,0.027356,1.095297,EFO:0010961,C2L: SMC,10.768473,CL:0000192,WSSS_THYst9383362
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.016812,0.219800,0.019495,0.022607,0.077236,EFO:0010961,C2L: MFAP5+Fibro,6.152310,CL:0000057,WSSS_THYst9383362
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.000774,0.183933,0.001021,0.002328,0.002279,EFO:0010961,C2L: Teno,0.183933,CL:0000388,WSSS_THYst9383362
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.005791,0.036483,0.003499,0.009625,0.002990,EFO:0010961,C2L: MYL3+MyoC,5.594432,CL:0000187,WSSS_THYst9383362


In [73]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'barcode', 'library_id',
       'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal', 'C2L: ArterialEndo',
       'C2L: ArtiChon', 'C2L: B', 'C2L: Basal', 'C2L: CMP/GMP',
       'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro', 'C2L: DefReticulo',
       'C2L: DermFibro', 'C2L: DistalMes', 'C2L: F10+DermFibroProg',
       'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon', 'C2L: ISL1+Mes',
       'C2L: InterMusFibro', 'C2L: InterZone', 'C2L: LMPP/ELP',
       'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC',
       'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo',
       'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4',
       'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1',
       'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro',
       'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg',
       'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Perid

In [74]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: SynapSchwann,C2L: Teno,C2L: TenoProg,C2L: TransMes,C2L: VenousEndo,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.007822,0.034527,0.006111,0.013211,0.133460,EFO:0010961,C2L: MYL3+MyoC,14.048070,CL:0000187,WSSS_THYst9383362
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.004365,2.598514,0.014163,0.006748,0.000287,EFO:0010961,C2L: RestingChon,3.953318,CL:0000138,WSSS_THYst9383362
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.010542,0.124252,0.009752,0.012791,0.019356,EFO:0010961,C2L: MFAP5+Fibro,8.850656,CL:0000057,WSSS_THYst9383362
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.020083,0.142549,0.017242,0.025883,0.809012,EFO:0010961,C2L: MYL3+MyoC,8.408436,CL:0000187,WSSS_THYst9383362
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.008087,0.023354,0.002875,0.005616,0.035984,EFO:0010961,C2L: MYL3+MyoC,8.520630,CL:0000187,WSSS_THYst9383362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,0.012829,0.018054,0.009941,0.027356,1.095297,EFO:0010961,C2L: SMC,10.768473,CL:0000192,WSSS_THYst9383362
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.016812,0.219800,0.019495,0.022607,0.077236,EFO:0010961,C2L: MFAP5+Fibro,6.152310,CL:0000057,WSSS_THYst9383362
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.000774,0.183933,0.001021,0.002328,0.002279,EFO:0010961,C2L: Teno,0.183933,CL:0000388,WSSS_THYst9383362
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.005791,0.036483,0.003499,0.009625,0.002990,EFO:0010961,C2L: MYL3+MyoC,5.594432,CL:0000187,WSSS_THYst9383362


#### **development_stage_ontology_term_id**

In [75]:
# identify the column in adata which corresponds to age

In [76]:
# add the development_stage_ontology_term_id column

In [77]:
mapping = {'WSSS_THYst9383359':'PCW8.1',
'WSSS_THYst9383360':'PCW8.1',
'WSSS_THYst9383361':'PCW8.1',
'WSSS_THYst9383362':'PCW8.1',
'WSSS_THYst9699523':'PCW7.0',
'WSSS_THYst9699524':'PCW7.0',
'WSSS_THYst9699525':'PCW6.2',
'WSSS_THYst9699526':'PCW5.6'}

In [78]:
adata.obs['stage'] = adata.obs['library_id'].map(mapping)

In [79]:
mapping= {'PCW8.1' :'HsapDv:0000030','PCW7.0':'HsapDv:0000026','PCW6.2':'HsapDv:0000024','PCW5.6':'HsapDv:0000023'}

In [80]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['stage'].map(mapping)

In [81]:
# change datatype of the column

In [82]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [83]:
# view unique values of development_stage_ontology_term_id column

In [84]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000030']

In [85]:
# view adata.obs

In [86]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: TenoProg,C2L: TransMes,C2L: VenousEndo,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,stage,development_stage_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.006111,0.013211,0.133460,EFO:0010961,C2L: MYL3+MyoC,14.048070,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.014163,0.006748,0.000287,EFO:0010961,C2L: RestingChon,3.953318,CL:0000138,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.009752,0.012791,0.019356,EFO:0010961,C2L: MFAP5+Fibro,8.850656,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.017242,0.025883,0.809012,EFO:0010961,C2L: MYL3+MyoC,8.408436,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.002875,0.005616,0.035984,EFO:0010961,C2L: MYL3+MyoC,8.520630,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,0.009941,0.027356,1.095297,EFO:0010961,C2L: SMC,10.768473,CL:0000192,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.019495,0.022607,0.077236,EFO:0010961,C2L: MFAP5+Fibro,6.152310,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.001021,0.002328,0.002279,EFO:0010961,C2L: Teno,0.183933,CL:0000388,WSSS_THYst9383362,PCW8.1,HsapDv:0000030
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.003499,0.009625,0.002990,EFO:0010961,C2L: MYL3+MyoC,5.594432,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030


#### **disease_ontology_term_id**

In [87]:
# Assign normal since all are healthy patients

In [88]:
# add the disease_ontology_term_id column

In [89]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461'] * len(adata.obs)

In [90]:
#change data type of column

In [91]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [92]:
# view obs

In [93]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: TransMes,C2L: VenousEndo,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,stage,development_stage_ontology_term_id,disease_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.013211,0.133460,EFO:0010961,C2L: MYL3+MyoC,14.048070,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.006748,0.000287,EFO:0010961,C2L: RestingChon,3.953318,CL:0000138,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.012791,0.019356,EFO:0010961,C2L: MFAP5+Fibro,8.850656,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.025883,0.809012,EFO:0010961,C2L: MYL3+MyoC,8.408436,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.005616,0.035984,EFO:0010961,C2L: MYL3+MyoC,8.520630,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,0.027356,1.095297,EFO:0010961,C2L: SMC,10.768473,CL:0000192,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.022607,0.077236,EFO:0010961,C2L: MFAP5+Fibro,6.152310,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.002328,0.002279,EFO:0010961,C2L: Teno,0.183933,CL:0000388,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.009625,0.002990,EFO:0010961,C2L: MYL3+MyoC,5.594432,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461


#### **is_primary_data**

In [94]:
adata.obs['is_primary_data'] = [True] * len(adata.obs)

In [95]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,C2L: VenousEndo,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,stage,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,0.133460,EFO:0010961,C2L: MYL3+MyoC,14.048070,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,0.000287,EFO:0010961,C2L: RestingChon,3.953318,CL:0000138,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,0.019356,EFO:0010961,C2L: MFAP5+Fibro,8.850656,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,0.809012,EFO:0010961,C2L: MYL3+MyoC,8.408436,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,0.035984,EFO:0010961,C2L: MYL3+MyoC,8.520630,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,1.095297,EFO:0010961,C2L: SMC,10.768473,CL:0000192,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,0.077236,EFO:0010961,C2L: MFAP5+Fibro,6.152310,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,0.002279,EFO:0010961,C2L: Teno,0.183933,CL:0000388,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,0.002990,EFO:0010961,C2L: MYL3+MyoC,5.594432,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True


In [96]:
#change data type of column

In [97]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### **organism_ontology_term_id**

In [98]:
# assign organism id 

In [99]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [100]:
#change data type of column

In [101]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [102]:
# view obs

In [103]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,stage,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,EFO:0010961,C2L: MYL3+MyoC,14.048070,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,EFO:0010961,C2L: RestingChon,3.953318,CL:0000138,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,EFO:0010961,C2L: MFAP5+Fibro,8.850656,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,EFO:0010961,C2L: MYL3+MyoC,8.408436,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,EFO:0010961,C2L: MYL3+MyoC,8.520630,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,EFO:0010961,C2L: SMC,10.768473,CL:0000192,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,EFO:0010961,C2L: MFAP5+Fibro,6.152310,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,EFO:0010961,C2L: Teno,0.183933,CL:0000388,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,EFO:0010961,C2L: MYL3+MyoC,5.594432,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [104]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [105]:
# change data type

In [106]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [107]:
# view obs

In [108]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,stage,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,C2L: MYL3+MyoC,14.048070,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,C2L: RestingChon,3.953318,CL:0000138,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,C2L: MFAP5+Fibro,8.850656,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,C2L: MYL3+MyoC,8.408436,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,C2L: MYL3+MyoC,8.520630,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,C2L: SMC,10.768473,CL:0000192,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,C2L: MFAP5+Fibro,6.152310,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,C2L: Teno,0.183933,CL:0000388,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,C2L: MYL3+MyoC,5.594432,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown


In [109]:
list(adata.obs['self_reported_ethnicity_ontology_term_id'].unique())

['unknown']

#### **sex_ontology_term_id**

In [110]:
mapping = {'5386STDY7537944':'F',
           '5478STDY7717491':'M',
'5478STDY7717492':'M',
'5478STDY7652318':'M',
'5386STDY7557336':'M',
'5386STDY7557337':'M',
'5386STDY7557335':'F',
'FCAImmP7536758':'F',
'FCAImmP7536759':'F',
'5478STDY7980348':'M',
'5478STDY7980349':'M',
'5478STDY7935101':'F',
'5478STDY7935102':'M',
'WSSS_THYst9384953':'M',
'WSSS_THYst9384954':'M',
'WSSS_THYst9384955':'M',
'WSSS_THYst9384956':'M',
'WSSS_THYst9384957':'M',
'WSSS_THYst9384958':'M',
'WSSS_THYst8796437':'M',
'WSSS_THYst8796438':'M',
'WSSS_THYst8796439':'M',
'WSSS_THYst8796440':'M',
'WSSS_THYst8796441':'M',
'WSSS_THYst8796442':'M',
'WSSS_THYst9383359':'M',
'WSSS_THYst9383360':'M',
'WSSS_THYst9383361':'M',
'WSSS_THYst9383362':'M',
'WSSS_THYst9699523':'F',
'WSSS_THYst9699524':'F',
'WSSS_THYst9699525':'F',
'WSSS_THYst9699526':'unknown'}

In [111]:
adata.obs['sex'] = adata.obs['library_id'].map(mapping)

In [112]:
mapping= {'F': 'PATO:0000383', 'M': 'PATO:0000384', 'unknown':'unknown'}

In [113]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex'].map(mapping)

In [114]:
adata.obs['sex_ontology_term_id'] = ['PATO:0000384'] * len(adata.obs)

In [115]:
# change data type

In [116]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [117]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,cell_type_ontology_term_id,donor_id,stage,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,CL:0000138,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,CL:0000192,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,CL:0000057,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,CL:0000388,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,CL:0000187,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384


#### **suspension_type**

In [118]:
# since visium suspension type is 'na'

In [119]:
adata.obs['suspension_type'] = ['na'] * len(adata.obs)

In [120]:
# change data type

In [121]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [122]:
# view obs

In [123]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,donor_id,stage,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id,suspension_type
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,WSSS_THYst9383362,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na


#### **tissue_type**

In [124]:
adata.obs['tissue_type'] = ['tissue'] * len(adata.obs)

In [125]:
adata.obs['tissue_type'] = adata.obs['tissue_type'].astype('category')

#### **tissue_ontology_term_id**

In [126]:
list(adata.obs['library_id'].unique())

['WSSS_THYst9383362']

In [127]:
mapping= {'WSSS_THYst9383360':'UBERON:0000978', 'WSSS_THYst9383361':'UBERON:0000978', 'WSSS_THYst9383362':'UBERON:0000978'}

In [128]:
adata.obs['tissue_ontology_term_id'] = adata.obs['library_id'].map(mapping)

In [129]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [130]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0000978']

In [131]:
# view obs

In [132]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,barcode,library_id,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,...,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,50,102,AAACAAGTATCTCCCA-1,WSSS_THYst9383362,0.0,2.218564,0.010439,0.133704,0.018929,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,14,94,AAACAGAGCGACTCCT-1,WSSS_THYst9383362,0.0,0.001227,0.000503,0.001423,1.043135,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,61,97,AAACATTTCCCGGATT-1,WSSS_THYst9383362,0.0,0.087009,0.004649,0.018821,0.101900,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,45,115,AAACCCGAACGAAATC-1,WSSS_THYst9383362,0.0,1.935481,0.008130,0.479325,0.059956,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,54,124,AAACCGGAAATGTTAA-1,WSSS_THYst9383362,0.0,1.796638,0.007400,0.056878,0.006483,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,22,42,TTGTTAGCAAATTCGA-1,WSSS_THYst9383362,0.0,0.156103,0.005125,3.687505,0.043005,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,24,64,TTGTTCAGTGTGCTAC-1,WSSS_THYst9383362,0.0,0.456833,0.010483,0.074283,0.084770,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,21,3,TTGTTCTAGATACGCT-1,WSSS_THYst9383362,0.0,0.006329,0.000652,0.007203,0.016021,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,31,77,TTGTTGTGTGTCAAGA-1,WSSS_THYst9383362,0.0,0.671205,0.004617,0.004422,0.014033,...,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,M,PATO:0000384,na,tissue,UBERON:0000978


In [133]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'barcode', 'library_id',
       'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal', 'C2L: ArterialEndo',
       'C2L: ArtiChon', 'C2L: B', 'C2L: Basal', 'C2L: CMP/GMP',
       'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro', 'C2L: DefReticulo',
       'C2L: DermFibro', 'C2L: DistalMes', 'C2L: F10+DermFibroProg',
       'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon', 'C2L: ISL1+Mes',
       'C2L: InterMusFibro', 'C2L: InterZone', 'C2L: LMPP/ELP',
       'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC',
       'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo',
       'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4',
       'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1',
       'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro',
       'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg',
       'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Perid

In [134]:
del adata.obs['barcode']
del adata.obs['max_c2l_column']
del adata.obs['max_c2l_column_value']
del adata.obs['array_row']
del adata.obs['array_col']
del adata.obs['library_id']
del adata.obs['sex']

#### **obsm (Embeddings)**

In [135]:
adata.obsm

AxisArrays with keys: X_spatial

In [136]:
adata.obsm.keys()

KeysView(AxisArrays with keys: X_spatial)

#### **uns (Dataset Metadata)**

In [137]:
adata.uns

OverloadedDict, wrapping:
	{'spatial': {'WSSS_THYst9383362': {'images': {'hires': array([[[0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        ...,
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275]],

       [[0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        ...,
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275]],

       [[0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        ...,
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275],
        [0.9372549 , 0.9490196 , 0.95686275]],

       ...,

       [[0.9372549 , 0.9490196 ,

In [138]:
adata.uns['image_caption'] = 'Shown here is an image of ten micron thick cryosections of whole embryonic limb samples stained with H&E'

In [139]:
adata.uns['title'] = 'visium_slide4'

In [140]:
adata.uns['default_embedding'] = 'X_spatial'

In [141]:
adata.uns.keys()

dict_keys(['spatial', 'image_caption', 'title', 'default_embedding'])

### **Final checks and adjustments**

In [142]:
adata

AnnData object with n_obs × n_vars = 2884 × 33137
    obs: 'in_tissue', 'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal', 'C2L: ArterialEndo', 'C2L: ArtiChon', 'C2L: B', 'C2L: Basal', 'C2L: CMP/GMP', 'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro', 'C2L: DefReticulo', 'C2L: DermFibro', 'C2L: DistalMes', 'C2L: F10+DermFibroProg', 'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon', 'C2L: ISL1+Mes', 'C2L: InterMusFibro', 'C2L: InterZone', 'C2L: LMPP/ELP', 'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC', 'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo', 'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4', 'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1', 'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro', 'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg', 'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Periderm', 'C2L: Perimysium', 'C2L: PrehyperChon', 'C2L: PrimErythro1', 'C2L: PrimErythro2', 'C

In [143]:
adata.obs.dtypes

in_tissue                                      int64
percent_mito                                 float64
C2L: ADH+Fibro                               float64
C2L: AER-Basal                               float64
C2L: ArterialEndo                            float64
                                              ...   
self_reported_ethnicity_ontology_term_id    category
sex_ontology_term_id                        category
suspension_type                             category
tissue_type                                 category
tissue_ontology_term_id                     category
Length: 82, dtype: object

In [144]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [145]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed percent_mito from float64 to float32
changed C2L: ADH+Fibro from float64 to float32
changed C2L: AER-Basal from float64 to float32
changed C2L: ArterialEndo from float64 to float32
changed C2L: ArtiChon from float64 to float32
changed C2L: B from float64 to float32
changed C2L: Basal from float64 to float32
changed C2L: CMP/GMP from float64 to float32
changed C2L: ChondroProg from float64 to float32
changed C2L: DC2 from float64 to float32
changed C2L: DefErythro from float64 to float32
changed C2L: DefReticulo from float64 to float32
changed C2L: DermFibro from float64 to float32
changed C2L: DistalMes from float64 to float32
changed C2L: F10+DermFibroProg from float64 to float32
changed C2L: HOXC5+DermFibroProg from float64 to float32
changed C2L: HyperChon from float64 to float32
changed C2L: ISL1+Mes from float64 to float32
changed C2L: InterMusFibro from float64 to float32
changed C2L: InterZone from float64 to float32
changed C2L: LMPP/ELP from float64 to float32
changed 

In [146]:
adata.obs

Unnamed: 0,in_tissue,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,C2L: B,C2L: Basal,C2L: CMP/GMP,C2L: ChondroProg,...,stage,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,0.0,2.218565,0.010439,0.133704,0.018929,0.004846,0.003178,0.008269,0.013478,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,0.0,0.001227,0.000503,0.001423,1.043135,0.007392,0.000522,0.001944,2.952307,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,0.0,0.087009,0.004649,0.018821,0.101900,0.010278,0.012468,0.010037,0.018406,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,0.0,1.935481,0.008130,0.479325,0.059956,0.013418,0.003734,0.019167,0.017114,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,0.0,1.796638,0.007400,0.056878,0.006483,0.002821,0.004752,0.004584,0.006229,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,0.0,0.156103,0.005125,3.687505,0.043005,0.012913,0.001314,0.019932,0.018588,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,0.0,0.456833,0.010483,0.074283,0.084770,0.014000,0.007803,0.155755,0.028578,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,0.0,0.006329,0.000652,0.007203,0.016021,0.000708,0.000735,0.000634,0.017554,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,0.0,0.671205,0.004617,0.004422,0.014033,0.002987,0.002781,0.003661,0.008560,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978


In [147]:
adata.obs.columns

Index(['in_tissue', 'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal',
       'C2L: ArterialEndo', 'C2L: ArtiChon', 'C2L: B', 'C2L: Basal',
       'C2L: CMP/GMP', 'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro',
       'C2L: DefReticulo', 'C2L: DermFibro', 'C2L: DistalMes',
       'C2L: F10+DermFibroProg', 'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon',
       'C2L: ISL1+Mes', 'C2L: InterMusFibro', 'C2L: InterZone',
       'C2L: LMPP/ELP', 'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC',
       'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo',
       'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4',
       'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1',
       'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro',
       'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg',
       'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Periderm',
       'C2L: Perimysium', 'C2L: PrehyperChon', 'C2L:

In [148]:
adata.var

Unnamed: 0_level_0,feature_types,genome,feature_is_filtered
ensg_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ENSG00000243485,Gene Expression,GRCh38,False
ENSG00000237613,Gene Expression,GRCh38,False
ENSG00000186092,Gene Expression,GRCh38,False
ENSG00000238009,Gene Expression,GRCh38,False
ENSG00000239945,Gene Expression,GRCh38,False
...,...,...,...
ENSG00000277856,Gene Expression,GRCh38,False
ENSG00000275063,Gene Expression,GRCh38,False
ENSG00000271254,Gene Expression,GRCh38,False
ENSG00000277475,Gene Expression,GRCh38,False


In [149]:
adata.obs

Unnamed: 0,in_tissue,percent_mito,C2L: ADH+Fibro,C2L: AER-Basal,C2L: ArterialEndo,C2L: ArtiChon,C2L: B,C2L: Basal,C2L: CMP/GMP,C2L: ChondroProg,...,stage,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACAAGTATCTCCCA-1_WSSS_THYst9383362,1,0.0,2.218565,0.010439,0.133704,0.018929,0.004846,0.003178,0.008269,0.013478,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACAGAGCGACTCCT-1_WSSS_THYst9383362,1,0.0,0.001227,0.000503,0.001423,1.043135,0.007392,0.000522,0.001944,2.952307,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACATTTCCCGGATT-1_WSSS_THYst9383362,1,0.0,0.087009,0.004649,0.018821,0.101900,0.010278,0.012468,0.010037,0.018406,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACCCGAACGAAATC-1_WSSS_THYst9383362,1,0.0,1.935481,0.008130,0.479325,0.059956,0.013418,0.003734,0.019167,0.017114,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
AAACCGGAAATGTTAA-1_WSSS_THYst9383362,1,0.0,1.796638,0.007400,0.056878,0.006483,0.002821,0.004752,0.004584,0.006229,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1_WSSS_THYst9383362,1,0.0,0.156103,0.005125,3.687505,0.043005,0.012913,0.001314,0.019932,0.018588,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
TTGTTCAGTGTGCTAC-1_WSSS_THYst9383362,1,0.0,0.456833,0.010483,0.074283,0.084770,0.014000,0.007803,0.155755,0.028578,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
TTGTTCTAGATACGCT-1_WSSS_THYst9383362,1,0.0,0.006329,0.000652,0.007203,0.016021,0.000708,0.000735,0.000634,0.017554,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978
TTGTTGTGTGTCAAGA-1_WSSS_THYst9383362,1,0.0,0.671205,0.004617,0.004422,0.014033,0.002987,0.002781,0.003661,0.008560,...,PCW8.1,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0000978


In [150]:
adata.obs.columns

Index(['in_tissue', 'percent_mito', 'C2L: ADH+Fibro', 'C2L: AER-Basal',
       'C2L: ArterialEndo', 'C2L: ArtiChon', 'C2L: B', 'C2L: Basal',
       'C2L: CMP/GMP', 'C2L: ChondroProg', 'C2L: DC2', 'C2L: DefErythro',
       'C2L: DefReticulo', 'C2L: DermFibro', 'C2L: DistalMes',
       'C2L: F10+DermFibroProg', 'C2L: HOXC5+DermFibroProg', 'C2L: HyperChon',
       'C2L: ISL1+Mes', 'C2L: InterMusFibro', 'C2L: InterZone',
       'C2L: LMPP/ELP', 'C2L: LymphEndo', 'C2L: MFAP5+Fibro', 'C2L: MYH3+MyoC',
       'C2L: MYL3+MyoC', 'C2L: Macro', 'C2L: Mast', 'C2L: Megakaryo',
       'C2L: Melano', 'C2L: Mes1', 'C2L: Mes2', 'C2L: Mes3', 'C2L: Mes4',
       'C2L: MesCond', 'C2L: Monocyte', 'C2L: Myelocyte', 'C2L: MyoB1',
       'C2L: MyoB2', 'C2L: MyoC1', 'C2L: MyoC2', 'C2L: NK', 'C2L: NeuralFibro',
       'C2L: Neuronal', 'C2L: OCP', 'C2L: OsteoB', 'C2L: PAX3+MyoProg',
       'C2L: PAX7+MyoProg', 'C2L: Perichon', 'C2L: Pericyte', 'C2L: Periderm',
       'C2L: Perimysium', 'C2L: PrehyperChon', 'C2L:

In [151]:
#check the format of expression matrix

In [152]:
adata.X

<2884x33137 sparse matrix of type '<class 'numpy.float32'>'
	with 3663077 stored elements in Compressed Sparse Row format>

In [153]:
#write the curated object to final_objects folder

In [154]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Final_objects/visium_slide4.h5ad', compression = 'gzip')