### **Curating Combined_human_and_mouse_limb.h5ad**

Article: A human embryonic limb cell atlas resolved in space and time

DOI: https://doi.org/10.1038/s41586-023-06806-x

Data Source : https://developmental.cellatlas.io/embryonic-limb

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math
import re
from collections import Counter

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Data/New_data/Combined_human_and_mouse_limb.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 341022 × 35849
    obs: 'batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region', 'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy', 'project', 'organism', 'celltype_integrated', 'human_stage', 'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue', 'multimap_density_organism'
    uns: 'batch_colors', 'celltype_colors', 'celltype_integrated_colors', 'celltype_per_organism_colors', 'human_stage_colors', 'leiden', 'leiden_R_colors', 'leiden_colors', 'mouse_stage_colors', 'multimap_density_organism_params', 'neighbors', 'organism_colors', 'rank_genes_groups', 'tissue_colors'
    obsm: 'X_multimap', 'X_multimap_separate', 'X_pca', 'X_umap', 'X_umap_separate'
    obsp: 'connectivities'

##### **X - expression matrix**

In [7]:
# View the expression matrix of the anndata object.

In [8]:
adata.X

<341022x35849 sparse matrix of type '<class 'numpy.float32'>'
	with 1009014321 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 25136)	2.4003232
  (0, 25140)	3.0470657
  (0, 25141)	1.4683918
  (0, 25139)	2.665072
  (0, 25135)	2.0392044
  (0, 25131)	1.4683918
  (0, 25134)	2.874199
  (0, 25133)	3.322802
  (0, 25138)	2.4003232
  (0, 25137)	1.4683918
  (0, 24484)	1.4683918
  (0, 11695)	1.4683918
  (0, 31819)	1.4683918
  (0, 20965)	1.4683918
  (0, 9682)	1.4683918
  (0, 25076)	2.0392044
  (0, 9003)	2.4003232
  (0, 11950)	1.4683918
  (0, 31260)	1.4683918
  (0, 21364)	2.0392044
  (0, 21358)	1.4683918
  (0, 21212)	1.4683918
  (0, 31251)	2.4003232
  (0, 9002)	1.4683918
  (0, 24789)	2.0392044
  :	:
  (341021, 9969)	0.59506035
  (341021, 22135)	0.59506035
  (341021, 454)	0.59506035
  (341021, 443)	0.59506035
  (341021, 442)	0.59506035
  (341021, 438)	0.59506035
  (341021, 435)	0.59506035
  (341021, 426)	1.6224927
  (341021, 9856)	0.59506035
  (341021, 10057)	0.59506035
  (341021, 361)	0.96556866
  (341021, 338)	0.96556866
  (341021, 332)	1.2353032
  (341021, 12145)	0.59506035
  (341021, 10043)	0.59506035
  (341021, 1

##### **Raw counts matrix**

In [11]:
araw = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Data/Raw_data/human_mouse_limb_raw.h5ad')

In [12]:
araw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


In [13]:
print(araw.X)

  (0, 16146)	3.0
  (0, 16227)	6.0
  (0, 16848)	1.0
  (0, 16203)	4.0
  (0, 16257)	2.0
  (0, 16235)	1.0
  (0, 16138)	5.0
  (0, 16182)	8.0
  (0, 16164)	3.0
  (0, 16229)	1.0
  (0, 17116)	1.0
  (0, 7892)	1.0
  (0, 14599)	1.0
  (0, 16689)	1.0
  (0, 14763)	1.0
  (0, 20129)	2.0
  (0, 19979)	3.0
  (0, 16716)	1.0
  (0, 9900)	1.0
  (0, 9898)	2.0
  (0, 7893)	1.0
  (0, 7891)	1.0
  (0, 7894)	3.0
  (0, 9417)	1.0
  (0, 17113)	2.0
  :	:
  (341021, 30525)	1.0
  (341021, 35653)	1.0
  (341021, 46887)	1.0
  (341021, 34213)	1.0
  (341021, 40499)	1.0
  (341021, 42429)	1.0
  (341021, 43257)	1.0
  (341021, 45271)	5.0
  (341021, 34705)	1.0
  (341021, 44049)	1.0
  (341021, 42168)	2.0
  (341021, 43245)	2.0
  (341021, 40804)	3.0
  (341021, 38748)	1.0
  (341021, 50897)	1.0
  (341021, 42995)	1.0
  (341021, 35042)	1.0
  (341021, 43168)	2.0
  (341021, 35349)	1.0
  (341021, 43843)	3.0
  (341021, 46193)	1.0
  (341021, 37286)	1.0
  (341021, 33929)	5.0
  (341021, 45779)	1.0
  (341021, 38274)	1.0


In [14]:
araw.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,region,tissue,adj_stage,adj_sample,...,phase,leiden_R,celltype,sequencing_center,stage,dissection,anatomy,final_leiden_R,project,barcode
AAACCTGCACATTTCT_5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.286585,0.872782,WholeLimb,LowerLimb,Pcw5.1,Pcw5.1_WholeLimb_LowerLimb,...,G1,2,Mes1,,,,,,,AAACCTGCACATTTCT-5386STDY7537944-0
AAACCTGCAGGGATTG_5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.194631,0.872782,WholeLimb,LowerLimb,Pcw5.1,Pcw5.1_WholeLimb_LowerLimb,...,G1,40,OCP,,,,,,,AAACCTGCAGGGATTG-5386STDY7537944-0
AAACCTGCATCGGGTC_5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.098748,0.872782,WholeLimb,LowerLimb,Pcw5.1,Pcw5.1_WholeLimb_LowerLimb,...,G2M,2,Mes1,,,,,,,AAACCTGCATCGGGTC-5386STDY7537944-0
AAACCTGCATGGGACA_5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.185345,0.872782,WholeLimb,LowerLimb,Pcw5.1,Pcw5.1_WholeLimb_LowerLimb,...,G2M,2,Mes1,,,,,,,AAACCTGCATGGGACA-5386STDY7537944-0
AAACCTGGTGATGCCC_5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.130000,0.927508,WholeLimb,LowerLimb,Pcw5.1,Pcw5.1_WholeLimb_LowerLimb,...,G1,140,VenousEndo,,,,,,,AAACCTGGTGATGCCC-5386STDY7537944-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA_GSM4227227,GSM4227227,0.055120,8164.0,2389,,,,,,,...,G1,231,Pericyte,Kelly,18.5,whole,hindlimb,,Kelly,TTTGTCAGTCAGGACA-GSM4227227-Kelly-1
TTTGTCAGTTCCGTCT_GSM4227227,GSM4227227,0.054932,5878.0,1818,,,,,,,...,G2M,83,Pax7+MyoProg,Kelly,18.5,whole,hindlimb,,Kelly,TTTGTCAGTTCCGTCT-GSM4227227-Kelly-1
TTTGTCATCCACGTGG_GSM4227227,GSM4227227,0.053890,5270.0,1783,,,,,,,...,G1,213,Basal,Kelly,18.5,whole,hindlimb,,Kelly,TTTGTCATCCACGTGG-GSM4227227-Kelly-1
TTTGTCATCCCAAGTA_GSM4227227,GSM4227227,0.064262,10037.0,2650,,,,,,,...,G2M,83,Pax7+MyoProg,Kelly,18.5,whole,hindlimb,,Kelly,TTTGTCATCCCAAGTA-GSM4227227-Kelly-1


##### **Variables(var)**

In [15]:
# View the var of anndata and raw object

In [16]:
adata.var

0610005C13Rik
0610006L08Rik
0610009B22Rik
0610009E02Rik
0610009L18Rik
...
Znrd1as
Znrd2
Zranb2
Zrsr1
mt-Atp8


In [17]:
araw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


In [18]:
import pandas as pd

# Load CSV file into DataFrame
df = pd.read_csv('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Suppl_info/combined_gene_id_mapping.csv')

# Convert DataFrame to dictionary
dict_gene_id = df.set_index('gene_symbol')['Gene_ID'].to_dict()


In [19]:
adata.var['gene_ids'] = adata.var_names.map(dict_gene_id)

In [20]:
adata.var.index = adata.var['gene_ids'] 

In [21]:
adata.var

Unnamed: 0_level_0,gene_ids
gene_ids,Unnamed: 1_level_1
ENSMUSG00000109644,ENSMUSG00000109644
ENSMUSG00000108652,ENSMUSG00000108652
ENSMUSG00000007777,ENSMUSG00000007777
ENSMUSG00000086714,ENSMUSG00000086714
ENSMUSG00000043644,ENSMUSG00000043644
...,...
ENSMUSG00000036214,ENSMUSG00000036214
ENSMUSG00000079478,ENSMUSG00000079478
ENSMUSG00000028180,ENSMUSG00000028180
ENSMUSG00000044068,ENSMUSG00000044068


In [22]:
del adata.var['gene_ids']

In [23]:
adata.var

ENSMUSG00000109644
ENSMUSG00000108652
ENSMUSG00000007777
ENSMUSG00000086714
ENSMUSG00000043644
...
ENSMUSG00000036214
ENSMUSG00000079478
ENSMUSG00000028180
ENSMUSG00000044068
ENSMUSG00000064356


In [24]:
araw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


In [25]:
genes_only_in_adata = set(adata.var_names) - set(araw.var_names)
# Filter adata.var to keep only the genes present in both datasets
adata = adata[:, ~adata.var_names.isin(genes_only_in_adata)]

In [26]:
adata

View of AnnData object with n_obs × n_vars = 341022 × 34981
    obs: 'batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region', 'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy', 'project', 'organism', 'celltype_integrated', 'human_stage', 'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue', 'multimap_density_organism'
    uns: 'batch_colors', 'celltype_colors', 'human_stage_colors', 'leiden', 'leiden_R_colors', 'leiden_colors', 'mouse_stage_colors', 'multimap_density_organism_params', 'neighbors', 'organism_colors', 'rank_genes_groups', 'tissue_colors'
    obsm: 'X_multimap', 'X_multimap_separate', 'X_pca', 'X_umap', 'X_umap_separate'
    obsp: 'connectivities'

In [27]:
adata.var

ENSMUSG00000109644
ENSMUSG00000108652
ENSMUSG00000007777
ENSMUSG00000086714
ENSMUSG00000043644
...
ENSMUSG00000036214
ENSMUSG00000079478
ENSMUSG00000028180
ENSMUSG00000044068
ENSMUSG00000064356


In [28]:
def add_zero():
	global adata
	global araw
	if araw.shape[1] > adata.shape[1]:
		genes_add = [x for x in araw.var.index.to_list() if x not in adata.var.index.to_list()]
		new_matrix = sparse.csr_matrix((adata.X.data, adata.X.indices, adata.X.indptr), shape = araw.shape)
		all_genes = adata.var.index.to_list()
		all_genes.extend(genes_add)
		new_var = pd.DataFrame(index=all_genes)
		new_var = pd.merge(new_var, araw.var, left_index=True, right_index=True, how='left')
		new_var['feature_is_filtered'] = False
		new_var.loc[genes_add, 'feature_is_filtered'] = True
		new_adata = ad.AnnData(X=new_matrix, obs=adata.obs, var=new_var, uns=adata.uns, obsm=adata.obsm)
		if adata.layers:
			for layer in adata.layers:
				new_layer = sparse.csr_matrix((adata.layers[layer].data, adata.layers[layer].indices, adata.layers[layer].indptr), shape = araw.shape)
				new_adata.layers[layer] = new_layer
		new_adata = new_adata[:,araw.var.index.to_list()]
		new_adata.var = new_adata.var.merge(adata.var, left_index=True, right_index=True, how='left')
		adata = new_adata
	else:
		adata.var['feature_is_filtered'] = False

In [29]:
add_zero()

In [30]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000000003,False
ENSG00000000005,False
ENSG00000000419,False
ENSG00000000457,False
ENSG00000000460,False
...,...
ENSMUSG00000118522,False
ENSMUSG00000118537,False
ENSMUSG00000118550,False
ENSMUSG00000118560,True


In [31]:
araw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


In [32]:
adata.X

<341022x50903 sparse matrix of type '<class 'numpy.float32'>'
	with 1004077818 stored elements in Compressed Sparse Row format>

In [33]:
false_count = (adata.var['feature_is_filtered']== False).sum()

In [34]:
false_count

34981

In [35]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000000003,False
ENSG00000000005,False
ENSG00000000419,False
ENSG00000000457,False
ENSG00000000460,False
...,...
ENSMUSG00000118522,False
ENSMUSG00000118537,False
ENSMUSG00000118550,False
ENSMUSG00000118560,True


In [36]:
araw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


#### **obs (Cell metadata)**

In [37]:
#view obs

In [38]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,anatomy,project,organism,celltype_integrated,human_stage,mouse_stage,chemistry,celltype_per_organism,tissue,multimap_density_organism
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.509896
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,OCP,SMC & Fibro,0.656522
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.022699
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.148222
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,hindlimb,Zhang,human,VenousEndo,Pcw5.1,,3v2,VenousEndo,Endothelial,0.108576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,hindlimb,Kelly,mouse,Pericyte,,18.5,3v2,Pericyte,SMC & Fibro,0.080580
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,hindlimb,Kelly,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.260227
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,hindlimb,Kelly,mouse,Suprabasal,,18.5,3v2,Basal,Epithelial,0.114437
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,hindlimb,Kelly,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.144948


In [39]:
# view the column names in obs

In [40]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism'],
      dtype='object')

In [41]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,anatomy,project,organism,celltype_integrated,human_stage,mouse_stage,chemistry,celltype_per_organism,tissue,multimap_density_organism
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.509896
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,OCP,SMC & Fibro,0.656522
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.022699
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,hindlimb,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.148222
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,hindlimb,Zhang,human,VenousEndo,Pcw5.1,,3v2,VenousEndo,Endothelial,0.108576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,hindlimb,Kelly,mouse,Pericyte,,18.5,3v2,Pericyte,SMC & Fibro,0.080580
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,hindlimb,Kelly,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.260227
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,hindlimb,Kelly,mouse,Suprabasal,,18.5,3v2,Basal,Epithelial,0.114437
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,hindlimb,Kelly,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.144948


#### **assay_ontology_term_id**

In [42]:
list(adata.obs['chemistry'].unique())

['3v2', '5v1', '3v3']

In [43]:
mapping= {'3v3' : 'EFO:0009922', '3v2':'EFO:0009899', '5v1' :'EFO:0011025'
         }

In [44]:
adata.obs['assay_ontology_term_id']  = adata.obs['chemistry'].map(mapping)

In [45]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [46]:
# view adata.obs

In [47]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,project,organism,celltype_integrated,human_stage,mouse_stage,chemistry,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.509896,EFO:0009899
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,OCP,SMC & Fibro,0.656522,EFO:0009899
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.022699,EFO:0009899
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,Zhang,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.148222,EFO:0009899
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,Zhang,human,VenousEndo,Pcw5.1,,3v2,VenousEndo,Endothelial,0.108576,EFO:0009899
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,Kelly,mouse,Pericyte,,18.5,3v2,Pericyte,SMC & Fibro,0.080580,EFO:0009899
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,Kelly,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,Kelly,mouse,Suprabasal,,18.5,3v2,Basal,Epithelial,0.114437,EFO:0009899
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,Kelly,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899


#### **cell_type_ontology_term_id**

In [48]:
#identify the column in adata.obs related. to cell type annotation

In [49]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id'],
      dtype='object')

In [50]:
list(adata.obs['celltype_integrated'].unique())

['EarlyProxMes',
 'VenousEndo',
 'EarlyDistMes',
 'PrimErythro',
 'Mes1',
 'PAX3+MyoProg',
 'Basal',
 'Prox/Mes2',
 'TransMes',
 'PAX3/7+MyoProg',
 'HOXC5+DermFibroProg',
 'Periderm',
 'MyoB1',
 'DefErythro',
 'OCP',
 'Mes3/4',
 'ArterialEndo',
 'SchwannProg',
 'AER-Basal',
 'MyoC',
 'InterZone',
 'Suprabasal',
 'MYL3+MyoC',
 'Melano',
 'Monocyte',
 'Megakaryo',
 'PAX7+MyoProg',
 'SMProg',
 'ADH+Fibro',
 'DermFibro',
 'Macro',
 'MFAP5+Fibro',
 'Teno',
 'Pericyte',
 'MyoB2',
 'Schwann',
 'PeriChon',
 'PrehyperChon',
 'Lympho',
 'SMC',
 'LymphEndo',
 'DC2',
 'SynapSchwann',
 'ProlifChon',
 'MYH3+MyoC',
 'HyperChon',
 'ArtiChon',
 'OsteoB',
 'Granulo/NK',
 'DistalMes',
 'RestingChon',
 'DefReticulo',
 'ChondroProg',
 'Neuronal',
 'RDH10+DistalMes',
 'CyclingMes',
 'ProxMes']

In [51]:
# create a dictionary of cell type and ontology term

In [52]:
mapping= {
'Mes1':'CL:0008019',
'OCP':'CL:0000062',
'VenousEndo':'CL:0002543',
'Mes4':'CL:0008019',
'TransMes':'CL:0008019',
'PrimErythro2':'CL:0002355',
'STMN2+Fibro':'CL:0002551',
'PAX3+MyoProg':'CL:0000515',
'OsteoB':'CL:0000062',
'AER-Basal':'CL:0000646',
'MesCond':'CL:0000138',
'DistalMes':'CL:0008019',
'Periderm':'CL:0000078',
'MyoB1':'CL:0000056',
'ISL1+Mes':'CL:0008019',
'ChondroProg':'CL:0000138',
'Mes2':'CL:0008019',
'InterZone':'CL:0008019',
'ProxMes':'CL:0008019',
'Prox/Mes2':'CL:0008019',
'RDH10+DistalMes':'CL:0008019',
'ArterialEndo':'CL:1000413',
'SchwannProg':'CL:0002375',
'HOXC5+DermFibroProg':'CL:0002551',
'MyoC1':'CL:0000187',
'PrimErythro1':'CL:0002355',
'MYL3+MyoC':'CL:0000187',
'Mes3':'CL:0008019',
'Megakaryo':'CL:0000556',
'Monocyte':'CL:0000576',
'SynapSchwann':'CL:0002573',
'PAX7+MyoProg':'CL:0000515',
'Pericyte':'CL:0000669',
'ADH+Fibro':'CL:1001609',
'DermFibro':'CL:0002551',
'Macro':'CL:0000235',
'MFAP5+Fibro':'CL:0000057',
'TenoProg':'CL:0000388',
'SMC':'CL:0000192',
'Perimysium':'CL:0002320',
'SMProg':'CL:0000192',
'MyoB2':'CL:0000056',
'Schwann':'CL:0002573',
'F10+DermFibroProg':'CL:0002551',
'NeuralFibro':'CL:0000057',
'Teno':'CL:0000388',
'PrehyperChon':'CL:0000138',
'NK':'CL:0000623',
'MyoC2':'CL:0000187',
'B':'CL:0000236',
'PeriChon':'CL:0000058',
'InterMusFibro':'CL:1001609',
'Basal':'CL:0000646',
'LymphEndo':'CL:0002138',
'DC2':'CL:0000990',
'LMPP/ELP':'CL:0000936',
'Melano':'CL:0000148',
'MYH3+MyoC':'CL:0000187',
'CMP/GMP':'CL:0000049',
'ProlifChon':'CL:0000138',
'Mast':'CL:0000097',
'Myelocyte':'CL:0002193',
'ArtiChon':'CL:1001607',
'HyperChon':'CL:0000743',
'RestingChon':'CL:0000138',
'DefReticulo':'CL:0000558',
'DefErythro':'CL:0000232',
'Neuronal':'CL:0000540',
'MyoB':'CL:0000056',
'EarlyDistalMes':'CL:0008019',
'EarlyDistMes':'CL:0008019',
'Meox2+Mes':'CL:0008019',
'PrimErythro':'CL:0002355',
'Dpt+Fibro':'CL:0002551',
'EarlyProxMes':'CL:0008019',
'Basophil':'CL:0000767',
'ILC':'CL:0001065',
'SupraBasal1':'CL:0000066',
'Placode':'CL:0002483',
'SupraBasal2':'CL:0000066',
'Nail':'CL:4033056',
'PAX3+PAX7+MyoProg':'CL:0000187',
'PAX3/7+MyoProg':'CL:0000187',
'PAX7+SPON2+MyoProg':'CL:0000187',
'PAX7+NTN5+MyoProg':'CL:0000187',
'CyclingMes':'CL:0008019',
'Mes3/4':'CL:0008019',
'Granulo/NK':'CL:0000623',
'Lympho':'CL:0000542',
'MyoC':'CL:0000187',
'Suprabasal':'CL:0000066'
}

In [53]:
# add the cell_type_ontology_term_id column

In [54]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['celltype_integrated'].map(mapping)

In [55]:
# change datatype of the column

In [56]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [57]:
# view adata.obs

In [58]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,organism,celltype_integrated,human_stage,mouse_stage,chemistry,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.509896,EFO:0009899,CL:0008019
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,human,EarlyProxMes,Pcw5.1,,3v2,OCP,SMC & Fibro,0.656522,EFO:0009899,CL:0008019
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.022699,EFO:0009899,CL:0008019
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,human,EarlyProxMes,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.148222,EFO:0009899,CL:0008019
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,human,VenousEndo,Pcw5.1,,3v2,VenousEndo,Endothelial,0.108576,EFO:0009899,CL:0002543
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,mouse,Pericyte,,18.5,3v2,Pericyte,SMC & Fibro,0.080580,EFO:0009899,CL:0000669
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899,CL:0000515
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,mouse,Suprabasal,,18.5,3v2,Basal,Epithelial,0.114437,EFO:0009899,CL:0000066
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,mouse,PAX7+MyoProg,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899,CL:0000515


In [59]:
list(adata.obs['cell_type_ontology_term_id'].unique())

['CL:0008019',
 'CL:0002543',
 'CL:0002355',
 'CL:0000515',
 'CL:0000646',
 'CL:0000187',
 'CL:0002551',
 'CL:0000078',
 'CL:0000056',
 'CL:0000232',
 'CL:0000062',
 'CL:1000413',
 'CL:0002375',
 'CL:0000066',
 'CL:0000148',
 'CL:0000576',
 'CL:0000556',
 'CL:0000192',
 'CL:1001609',
 'CL:0000235',
 'CL:0000057',
 'CL:0000388',
 'CL:0000669',
 'CL:0002573',
 'CL:0000058',
 'CL:0000138',
 'CL:0000542',
 'CL:0002138',
 'CL:0000990',
 'CL:0000743',
 'CL:1001607',
 'CL:0000623',
 'CL:0000558',
 'CL:0000540']

#### **development_stage_ontology_term_id**

In [60]:
# identify the column in adata which corresponds to age

In [61]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id'],
      dtype='object')

In [62]:
list(adata.obs['human_stage'].unique())

['Pcw5.1',
 'Pcw8.4',
 'Pcw8.0',
 'Pcw7.2',
 'Pcw6.5',
 'Pcw5.4',
 'Pcw9.3',
 'Pcw9.0',
 'Pcw6.1',
 'Pcw5.6',
 '']

In [63]:
list(adata.obs['mouse_stage'].unique())

['',
 '12.5',
 '13.5',
 '16.5',
 '11.0',
 '12.0',
 '13.0',
 '15.0',
 '10.5',
 '14.0',
 '9.5',
 '11.5',
 '15.5',
 '18.5']

In [64]:
import pandas as pd

# Assuming adata is your AnnData object
# Extract the 'human_stage' and 'mouse_stage' columns
human_stage = adata.obs['human_stage']
mouse_stage = adata.obs['mouse_stage']

# Concatenate the two columns
stage = human_stage.astype(str) + '_' + mouse_stage.astype(str)

# Add the new 'stage' column to adata.obs
adata.obs['stage'] = stage


In [65]:
list(adata.obs['stage'].unique())

['Pcw5.1_',
 'Pcw8.4_',
 'Pcw8.0_',
 'Pcw7.2_',
 'Pcw6.5_',
 'Pcw5.4_',
 'Pcw9.3_',
 'Pcw9.0_',
 'Pcw6.1_',
 'Pcw5.6_',
 '_12.5',
 '_13.5',
 '_16.5',
 '_11.0',
 '_12.0',
 '_13.0',
 '_15.0',
 '_10.5',
 '_14.0',
 '_9.5',
 '_11.5',
 '_15.5',
 '_18.5']

In [66]:
adata.obs['stage'] = adata.obs['stage'].str.replace('_', '')

In [67]:
list(adata.obs['stage'].unique())

['Pcw5.1',
 'Pcw8.4',
 'Pcw8.0',
 'Pcw7.2',
 'Pcw6.5',
 'Pcw5.4',
 'Pcw9.3',
 'Pcw9.0',
 'Pcw6.1',
 'Pcw5.6',
 '12.5',
 '13.5',
 '16.5',
 '11.0',
 '12.0',
 '13.0',
 '15.0',
 '10.5',
 '14.0',
 '9.5',
 '11.5',
 '15.5',
 '18.5']

In [68]:
mapping= {'Pcw5.1':'HsapDv:0000022',
 'Pcw8.4':'HsapDv:0000030',
 'Pcw8.0':'HsapDv:0000030',
 'Pcw7.2':'HsapDv:0000026',
 'Pcw6.5':'HsapDv:0000025',
 'Pcw5.4':'HsapDv:0000023',
 'Pcw9.3':'HsapDv:0000047',
 'Pcw9.0':'HsapDv:0000046',
 'Pcw6.1':'HsapDv:0000024',
 'Pcw5.6':'HsapDv:0000023',
 '12.5':'MmusDv:0000028', 
 '13.5':'MmusDv:0000029',
 '16.5':'MmusDv:0000033', 
 '11.0':'MmusDv:0000026', 
 '12.0':'MmusDv:0000027', 
 '13.0':'MmusDv:0000028', 
 '15.0':'MmusDv:0000032',
 '10.5':'MmusDv:0000025', 
 '14.0':'MmusDv:0000029', 
 '9.5':'MmusDv:0000023',
 '11.5':'MmusDv:0000027', 
 '15.5':'MmusDv:0000032',
 '18.5':'MmusDv:0000035'
         }

In [69]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['stage'].map(mapping)

In [70]:
# change datatype of the column

In [71]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [72]:
# view unique values of development_stage_ontology_term_id column

In [73]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000022',
 'HsapDv:0000030',
 'HsapDv:0000026',
 'HsapDv:0000025',
 'HsapDv:0000023',
 'HsapDv:0000047',
 'HsapDv:0000046',
 'HsapDv:0000024',
 'MmusDv:0000028',
 'MmusDv:0000029',
 'MmusDv:0000033',
 'MmusDv:0000026',
 'MmusDv:0000027',
 'MmusDv:0000032',
 'MmusDv:0000025',
 'MmusDv:0000023',
 'MmusDv:0000035']

In [74]:
# view adata.obs

In [75]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,human_stage,mouse_stage,chemistry,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,Pcw5.1,,3v2,OCP,SMC & Fibro,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,Pcw5.1,,3v2,VenousEndo,Endothelial,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,,18.5,3v2,Pericyte,SMC & Fibro,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,,18.5,3v2,Basal,Epithelial,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035


#### **donor_id**

In [76]:
#identify the column in adata.obs which provides donor information

In [77]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'stage',
       'development_stage_ontology_term_id'],
      dtype='object')

In [78]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,human_stage,mouse_stage,chemistry,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,Pcw5.1,,3v2,OCP,SMC & Fibro,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,Pcw5.1,,3v2,Mes1,SMC & Fibro,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,Pcw5.1,,3v2,VenousEndo,Endothelial,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,,18.5,3v2,Pericyte,SMC & Fibro,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,,18.5,3v2,Basal,Epithelial,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035


In [79]:
list(adata.obs['organism'].unique())

['human', 'mouse']

In [80]:
# add the donor_id column

In [81]:
adata.obs['donor_id'] = adata.obs['batch']

In [82]:
#adata.obs['donor_id'] = ['unknown'] * len(adata.obs['names'])

In [83]:
# change datatype of the column

In [84]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [85]:
# view unique values of donor_id column

In [86]:
list(adata.obs['donor_id'].unique())

['5386STDY7537944',
 'FCAImmP7536758',
 'FCAImmP7536759',
 '5386STDY7557335',
 '5386STDY7557336',
 '5386STDY7557337',
 '5478STDY7652318',
 '5478STDY7717491',
 '5478STDY7717492',
 '5478STDY7935101',
 '5478STDY7935102',
 '5478STDY7980348',
 '5478STDY7980349',
 'WSSS_THYst8796437',
 'WSSS_THYst8796438',
 'WSSS_THYst8796439',
 'WSSS_THYst8796440',
 'WSSS_THYst8796441',
 'WSSS_THYst8796442',
 'WSSS_THYst9384953',
 'WSSS_THYst9384954',
 'WSSS_THYst9384955',
 'WSSS_THYst9384956',
 'WSSS_THYst9384957',
 'WSSS_THYst9384958',
 'WSSS_THYst9807808',
 'WSSS_THYst9807809',
 'WSSS_THYst9807810',
 'WSSS_THYst9807811',
 'WSSS_THYst9807812',
 'WSSS_THYst9807813',
 'WSSS_THYst9807814',
 'WSSS_THYst9807815',
 'WSSS_THYst9807816',
 'WSSS_THYst9807817',
 'WSSS_THYst9807818',
 'WSSS_THYst9807819',
 'WSSS_THYst9807820',
 '1_e13_5',
 '3_e11',
 '4_e12',
 '5_e13',
 '6_e15',
 '7_e10_5',
 '8_e15_whole',
 '9_e15_prox',
 '10_e15_mid',
 '11_e15_dist',
 '12_e13',
 '13_e14',
 'GSM4498677',
 'GSM4498678',
 'GSM4227224',

In [87]:
#view obs

In [88]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,mouse_stage,chemistry,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,,3v2,Mes1,SMC & Fibro,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,,3v2,OCP,SMC & Fibro,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,,3v2,Mes1,SMC & Fibro,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,,3v2,Mes1,SMC & Fibro,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,,3v2,VenousEndo,Endothelial,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,18.5,3v2,Pericyte,SMC & Fibro,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035,GSM4227227
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,18.5,3v2,Basal,Epithelial,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035,GSM4227227
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,18.5,3v2,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227


In [89]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'stage',
       'development_stage_ontology_term_id', 'donor_id'],
      dtype='object')

#### **disease_ontology_term_id**

In [90]:
adata.obs['disease_ontology_term_id']= ['PATO:0000461'] * len(adata.obs)

In [91]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,chemistry,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,3v2,Mes1,SMC & Fibro,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,3v2,OCP,SMC & Fibro,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,3v2,Mes1,SMC & Fibro,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,3v2,Mes1,SMC & Fibro,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,3v2,VenousEndo,Endothelial,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,3v2,Pericyte,SMC & Fibro,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,3v2,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,3v2,Basal,Epithelial,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,3v2,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461


In [92]:
# change datatype of the column

In [93]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [94]:
# view obs

In [95]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,chemistry,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,3v2,Mes1,SMC & Fibro,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,3v2,OCP,SMC & Fibro,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,3v2,Mes1,SMC & Fibro,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,3v2,Mes1,SMC & Fibro,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,3v2,VenousEndo,Endothelial,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,3v2,Pericyte,SMC & Fibro,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,3v2,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,3v2,Basal,Epithelial,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,3v2,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461


#### **is_primary_data**

In [96]:
#change data type of column

In [97]:
adata.obs['is_primary_data']= [False] * len(adata.obs)

In [98]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

In [99]:
# view obs

In [100]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,celltype_per_organism,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,Mes1,SMC & Fibro,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,OCP,SMC & Fibro,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,Mes1,SMC & Fibro,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,Mes1,SMC & Fibro,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,VenousEndo,Endothelial,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,Pericyte,SMC & Fibro,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,Pax7+MyoProg,Skeletal muscle,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,Basal,Epithelial,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,Pax7+MyoProg,Skeletal muscle,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False


#### **organism_ontology_term_id**

In [101]:
# assign organism id 

In [102]:
mapping={'human':'NCBITaxon:9606', 'mouse':'NCBITaxon:10090'}

In [103]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism'].map(mapping)

In [104]:
#change data type of column

In [105]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [106]:
# view obs

In [107]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,tissue,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,SMC & Fibro,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,SMC & Fibro,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,SMC & Fibro,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,SMC & Fibro,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,Endothelial,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,SMC & Fibro,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,Skeletal muscle,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,Epithelial,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,Skeletal muscle,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090


#### **self_reported_ethnicity_ontology_term_id**

In [108]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [109]:
adata.obs['self_reported_ethnicity_ontology_term_id'][adata.obs['organism_ontology_term_id'] != 'NCBITaxon:9606'] = 'na'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.obs['self_reported_ethnicity_ontology_term_id'][adata.obs['organism_ontology_term_id'] != 'NCBITaxon:9606'] = 'na'


In [110]:
# change data type

In [111]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [112]:
# view obs

In [113]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na


In [114]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'stage',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

In [115]:
values_for_human = adata.obs['self_reported_ethnicity_ontology_term_id'][adata.obs['organism'] == 'mouse']

# Display the values
print(values_for_human)

AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He    na
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He    na
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He    na
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He    na
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He    na
                                           ..
TTTGTCAGTCAGGACA-GSM4227227-Kelly          na
TTTGTCAGTTCCGTCT-GSM4227227-Kelly          na
TTTGTCATCCACGTGG-GSM4227227-Kelly          na
TTTGTCATCCCAAGTA-GSM4227227-Kelly          na
TTTGTCATCCTTTACA-GSM4227227-Kelly          na
Name: self_reported_ethnicity_ontology_term_id, Length: 215067, dtype: category
Categories (2, object): ['na', 'unknown']


#### **sex_ontology_term_id**

In [116]:
# identify the column in adata.obs which corresponds to sex

In [117]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'stage',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

In [118]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,multimap_density_organism,assay_ontology_term_id,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,0.509896,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,0.656522,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,0.022699,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,0.148222,EFO:0009899,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,0.108576,EFO:0009899,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,0.080580,EFO:0009899,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,0.260227,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,0.114437,EFO:0009899,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,0.144948,EFO:0009899,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na


In [119]:
# list the unique values 

In [120]:
mapping = {'5386STDY7537944':'F',
           '5478STDY7717491':'M',
'5478STDY7717492':'M',
'5478STDY7652318':'M',
'5386STDY7557336':'M',
'5386STDY7557337':'M',
'5386STDY7557335':'F',
'FCAImmP7536758':'F',
'FCAImmP7536759':'F',
'5478STDY7980348':'M',
'5478STDY7980349':'M',
'5478STDY7935101':'F',
'5478STDY7935102':'M',
'WSSS_THYst9384953':'M',
'WSSS_THYst9384954':'M',
'WSSS_THYst9384955':'M',
'WSSS_THYst9384956':'M',
'WSSS_THYst9384957':'M',
'WSSS_THYst9384958':'M',
'WSSS_THYst8796437':'M',
'WSSS_THYst8796438':'M',
'WSSS_THYst8796439':'M',
'WSSS_THYst8796440':'M',
'WSSS_THYst8796441':'M',
'WSSS_THYst8796442':'M',
'WSSS_THYst9383359':'M',
'WSSS_THYst9383360':'M',
'WSSS_THYst9383361':'M',
'WSSS_THYst9383362':'M',
'WSSS_THYst9699523':'F',
'WSSS_THYst9699524':'F',
'WSSS_THYst9699525':'F',
'WSSS_THYst9699526':'unknown'}

In [121]:
adata.obs['sex'] = adata.obs['batch'].map(mapping)

In [122]:
# create a dictionary of sex and sex ontology term id

In [123]:
mapping= {'F': 'PATO:0000383', 'M': 'PATO:0000384', 'unknown':'unknown'}

In [124]:
# add sex_ontology_term_id column

In [125]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex'].map(mapping)

In [126]:
# Assuming adata is your AnnData object

# Replace 'NaN' with 'unknown' in the 'sex_ontology_term_id' column
adata.obs['sex_ontology_term_id'].fillna('unknown', inplace=True)


In [127]:
# change data type

In [128]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [129]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown


#### **suspension_type**

In [130]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,cell_type_ontology_term_id,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,CL:0008019,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,CL:0002543,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,CL:0000669,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,CL:0000066,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,CL:0000515,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown


In [131]:
adata.obs['suspension_type'] = ['cell'] * len(adata.obs)

In [132]:
# change data type of column

In [133]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [134]:
# view obs

In [135]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,stage,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id,suspension_type
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,Pcw5.1,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,18.5,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell


#### **tissue_type**

In [136]:
adata.obs['tissue_type'] = ['tissue'] * len(adata.obs)

In [137]:
adata.obs['tissue_type'] = adata.obs['tissue_type'].astype('category')

#### **tissue_ontology_term_id**

In [138]:
# identify the column in adata.obs which corresponds to tissue

In [139]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'stage',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue_type'],
      dtype='object')

In [140]:
list(adata.obs['region'].unique())

['WholeLimb', 'Thigh', 'Proximal', 'Middle', 'Distal']

In [141]:
mapping= {'WholeLimb':'UBERON:0002101', 'Thigh':'UBERON:0000376', 'Proximal':'UBERON:0002472', 'Middle':'UBERON:0002471', 'Distal':'UBERON:0002470'}

In [142]:
# add 'tissue_ontology_term_id' column

In [143]:
adata.obs['tissue_ontology_term_id'] =adata.obs['region'].map(mapping)

In [144]:
# change data type of column

In [145]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [146]:
#list the unique values in 'tissue_ontology_term_id' column

In [147]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0002101',
 'UBERON:0000376',
 'UBERON:0002472',
 'UBERON:0002471',
 'UBERON:0002470']

In [148]:
# view obs

In [149]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101


In [150]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'stage',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

#### **obsm (Embeddings)**

In [151]:
# view obsm

In [152]:
# check whether all columns are prefixed with X

In [153]:
adata.obsm

AxisArrays with keys: X_multimap, X_multimap_separate, X_pca, X_umap, X_umap_separate

#### **uns (Dataset Metadata)**

In [154]:
# View

In [155]:
adata.uns

OverloadedDict, wrapping:
	{'batch_colors': array(['#ffff00', '#1ce6ff', '#ff34ff', '#ff4a46', '#008941', '#006fa6',
       '#a30059', '#ffdbe5', '#7a4900', '#0000a6', '#63ffac', '#b79762',
       '#004d43', '#8fb0ff', '#997d87', '#5a0007', '#809693', '#6a3a4c',
       '#1b4400', '#4fc601', '#3b5dff', '#4a3b53', '#ff2f80', '#61615a',
       '#ba0900', '#6b7900', '#00c2a0', '#ffaa92', '#ff90c9', '#b903aa',
       '#d16100', '#ddefff', '#000035', '#7b4f4b', '#a1c299', '#300018',
       '#0aa6d8', '#013349', '#00846f', '#372101', '#ffb500', '#c2ffed',
       '#a079bf', '#cc0744', '#c0b9b2', '#c2ff99', '#001e09', '#00489c',
       '#6f0062', '#0cbd66', '#eec3ff', '#456d75', '#b77b68', '#7a87a1',
       '#788d66', '#885578'], dtype=object), 'celltype_colors': array(['#ffff00', '#82D900', '#ffff00', '#9AFF02', '#5E005E', '#008941',
       '#00E3E3', '#006fa6', '#a30059', '#C6A300', '#0000a6', '#B15BFF',
       '#b79762', '#005757', '#6C3365', '#00AEAE', '#c4adc1', '#99bb99',
       '#00AEAE'

In [156]:
adata.uns.keys

<bound method OverloadedDict.keys of OverloadedDict, wrapping:
	{'batch_colors': array(['#ffff00', '#1ce6ff', '#ff34ff', '#ff4a46', '#008941', '#006fa6',
       '#a30059', '#ffdbe5', '#7a4900', '#0000a6', '#63ffac', '#b79762',
       '#004d43', '#8fb0ff', '#997d87', '#5a0007', '#809693', '#6a3a4c',
       '#1b4400', '#4fc601', '#3b5dff', '#4a3b53', '#ff2f80', '#61615a',
       '#ba0900', '#6b7900', '#00c2a0', '#ffaa92', '#ff90c9', '#b903aa',
       '#d16100', '#ddefff', '#000035', '#7b4f4b', '#a1c299', '#300018',
       '#0aa6d8', '#013349', '#00846f', '#372101', '#ffb500', '#c2ffed',
       '#a079bf', '#cc0744', '#c0b9b2', '#c2ff99', '#001e09', '#00489c',
       '#6f0062', '#0cbd66', '#eec3ff', '#456d75', '#b77b68', '#7a87a1',
       '#788d66', '#885578'], dtype=object), 'celltype_colors': array(['#ffff00', '#82D900', '#ffff00', '#9AFF02', '#5E005E', '#008941',
       '#00E3E3', '#006fa6', '#a30059', '#C6A300', '#0000a6', '#B15BFF',
       '#b79762', '#005757', '#6C3365', '#00AEAE', '

In [157]:
# Give a title for the dataset

In [158]:
adata.uns['title'] = 'Combined_human_and_mouse_limb_scRNAseq'

In [159]:
# Set the default embedding

In [160]:
adata.uns['default_embedding'] = 'X_umap'

In [161]:
del adata.uns['celltype_colors']
del adata.uns['leiden_colors']
del adata.uns['organism_colors']
del adata.uns['tissue_colors']

### **Final check**

In [162]:
# view anndata object

In [163]:
adata

AnnData object with n_obs × n_vars = 341022 × 50903
    obs: 'batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region', 'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy', 'project', 'organism', 'celltype_integrated', 'human_stage', 'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue', 'multimap_density_organism', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'stage', 'development_stage_ontology_term_id', 'donor_id', 'disease_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue_type', 'tissue_ontology_term_id'
    var: 'feature_is_filtered'
    uns: 'batch_colors', 'human_stage_colors', 'leiden', 'leiden_R_colors', 'mouse_stage_colors', 'multimap_density_organism_params', 'neighbors', 'rank_genes_groups', 'title', 'default_embedding'
    obsm: 'X_multimap', 'X_multimap_separate', 'X_pca', 'X_umap', 'X_umap_separate'

In [164]:
# view obs and var data types

In [165]:
adata.obs.dtypes

batch                                       category
percent_mito                                 float64
n_counts                                     float64
n_genes                                        int64
bh_pval                                      float64
region                                      category
S_score                                      float64
G2M_score                                    float64
leiden_R                                    category
sequencing_center                           category
anatomy                                     category
project                                     category
organism                                    category
celltype_integrated                         category
human_stage                                 category
mouse_stage                                 category
chemistry                                   category
celltype_per_organism                       category
tissue                                      ca

In [166]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [167]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed percent_mito from float64 to float32
changed n_counts from float64 to float32
changed bh_pval from float64 to float32
changed S_score from float64 to float32
changed G2M_score from float64 to float32
changed multimap_density_organism from float64 to float32
changed n_genes from int64 to int32
changed stage from object to category
changed sex from object to category


In [168]:
# view obs

In [169]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,F,PATO:0000383,cell,tissue,UBERON:0002101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,,unknown,cell,tissue,UBERON:0002101


In [170]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'organism', 'celltype_integrated', 'human_stage',
       'mouse_stage', 'chemistry', 'celltype_per_organism', 'tissue',
       'multimap_density_organism', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'stage',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

In [171]:
# delete unwanted columns in obs

In [172]:
del adata.obs['tissue']
del adata.obs['sex']
del adata.obs['stage']
del adata.obs['organism']

In [173]:
# view obs

In [174]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101


In [175]:
# view var

In [176]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000000003,False
ENSG00000000005,False
ENSG00000000419,False
ENSG00000000457,False
ENSG00000000460,False
...,...
ENSMUSG00000118522,False
ENSMUSG00000118537,False
ENSMUSG00000118550,False
ENSMUSG00000118560,True


In [177]:
araw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


In [178]:
#view uns

In [179]:
adata.uns

OverloadedDict, wrapping:
	{'batch_colors': array(['#ffff00', '#1ce6ff', '#ff34ff', '#ff4a46', '#008941', '#006fa6',
       '#a30059', '#ffdbe5', '#7a4900', '#0000a6', '#63ffac', '#b79762',
       '#004d43', '#8fb0ff', '#997d87', '#5a0007', '#809693', '#6a3a4c',
       '#1b4400', '#4fc601', '#3b5dff', '#4a3b53', '#ff2f80', '#61615a',
       '#ba0900', '#6b7900', '#00c2a0', '#ffaa92', '#ff90c9', '#b903aa',
       '#d16100', '#ddefff', '#000035', '#7b4f4b', '#a1c299', '#300018',
       '#0aa6d8', '#013349', '#00846f', '#372101', '#ffb500', '#c2ffed',
       '#a079bf', '#cc0744', '#c0b9b2', '#c2ff99', '#001e09', '#00489c',
       '#6f0062', '#0cbd66', '#eec3ff', '#456d75', '#b77b68', '#7a87a1',
       '#788d66', '#885578'], dtype=object), 'human_stage_colors': array(['#000080', '#0000f1', '#004cff', '#00b0ff', '#29ffce', '#7dff7a',
       '#ceff29', '#ffc400', '#ff6800', '#f10800', '#800000'],
      dtype=object), 'leiden': {'params': {'n_iterations': -1, 'random_state': 0, 'resolution': 

In [180]:
list(adata.uns.keys())

['batch_colors',
 'human_stage_colors',
 'leiden',
 'leiden_R_colors',
 'mouse_stage_colors',
 'multimap_density_organism_params',
 'neighbors',
 'rank_genes_groups',
 'title',
 'default_embedding']

In [181]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'celltype_integrated', 'human_stage', 'mouse_stage',
       'chemistry', 'celltype_per_organism', 'multimap_density_organism',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

In [182]:
# Remove unwanted columns in uns

In [183]:
#check the format of expression matrix

In [184]:
adata.X

<341022x50903 sparse matrix of type '<class 'numpy.float32'>'
	with 1004077818 stored elements in Compressed Sparse Row format>

In [185]:
araw.X

<341022x50903 sparse matrix of type '<class 'numpy.float32'>'
	with 1007938179 stored elements in Compressed Sparse Row format>

In [186]:
araw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


In [187]:
#Copy raw counts to adata.raw

In [188]:
adata.raw = araw

In [189]:
obs_dtype = adata.obs.dtypes

In [190]:
obs_dtype

batch                                       category
percent_mito                                 float32
n_counts                                     float32
n_genes                                        int32
bh_pval                                      float32
region                                      category
S_score                                      float32
G2M_score                                    float32
leiden_R                                    category
sequencing_center                           category
anatomy                                     category
project                                     category
celltype_integrated                         category
human_stage                                 category
mouse_stage                                 category
chemistry                                   category
celltype_per_organism                       category
multimap_density_organism                    float32
assay_ontology_term_id                      ca

In [191]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Final_objects/Combined_human_and_mouse_limb.h5ad', compression = 'gzip')

In [192]:
adata.obs

Unnamed: 0,batch,percent_mito,n_counts,n_genes,bh_pval,region,S_score,G2M_score,leiden_R,sequencing_center,...,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCTGCACATTTCT-5386STDY7537944,5386STDY7537944,0.011364,2992.0,1152,0.872782,WholeLimb,-0.117172,-0.204391,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCAGGGATTG-5386STDY7537944,5386STDY7537944,0.006805,2939.0,1187,0.872782,WholeLimb,-0.104853,-0.170856,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATCGGGTC-5386STDY7537944,5386STDY7537944,0.011667,6857.0,2329,0.872782,WholeLimb,0.033006,0.277833,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGCATGGGACA-5386STDY7537944,5386STDY7537944,0.009933,10974.0,2967,0.872782,WholeLimb,0.106430,0.407822,00,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
AAACCTGGTGATGCCC-5386STDY7537944,5386STDY7537944,0.015003,6532.0,2083,0.927508,WholeLimb,-0.120746,-0.143064,140,Sanger,...,HsapDv:0000022,5386STDY7537944,PATO:0000461,False,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0002101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,GSM4227227,0.055120,8164.0,2389,,WholeLimb,-0.126943,-0.117261,172,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,GSM4227227,0.054932,5878.0,1818,,WholeLimb,0.149806,0.512961,60,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCACGTGG-GSM4227227-Kelly,GSM4227227,0.053890,5270.0,1783,,WholeLimb,-0.121107,-0.090475,103,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101
TTTGTCATCCCAAGTA-GSM4227227-Kelly,GSM4227227,0.064262,10037.0,2650,,WholeLimb,0.063508,0.542747,60,Kelly,...,MmusDv:0000035,GSM4227227,PATO:0000461,False,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002101


In [193]:
adata.obs.columns

Index(['batch', 'percent_mito', 'n_counts', 'n_genes', 'bh_pval', 'region',
       'S_score', 'G2M_score', 'leiden_R', 'sequencing_center', 'anatomy',
       'project', 'celltype_integrated', 'human_stage', 'mouse_stage',
       'chemistry', 'celltype_per_organism', 'multimap_density_organism',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

In [194]:
adata.raw.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSMUSG00000118522
ENSMUSG00000118537
ENSMUSG00000118550
ENSMUSG00000118560
ENSMUSG00000118578


In [195]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000000003,False
ENSG00000000005,False
ENSG00000000419,False
ENSG00000000457,False
ENSG00000000460,False
...,...
ENSMUSG00000118522,False
ENSMUSG00000118537,False
ENSMUSG00000118550,False
ENSMUSG00000118560,True
