### **Curating Mouse_limb.h5ad**

Article: A human embryonic limb cell atlas resolved in space and time

DOI: https://doi.org/10.1038/s41586-023-06806-x

Data Source : https://developmental.cellatlas.io/embryonic-limb

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math
import re
from collections import Counter

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Data/New_data/Mouse_limb.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 215067 × 24858
    obs: 'sequencing_center', 'batch', 'stage', 'dissection', 'anatomy', 'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval', 'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project', 'S_score', 'G2M_score', 'phase'
    var: 'gene_ids-Allou', 'n_cells-Allou', 'feature_types-Allou', 'gene_ids-He', 'feature_types-WSSS_THYst9807808-He', 'feature_types-WSSS_THYst9807809-He', 'feature_types-WSSS_THYst9807810-He', 'feature_types-WSSS_THYst9807811-He', 'feature_types-WSSS_THYst9807812-He', 'feature_types-WSSS_THYst9807813-He', 'feature_types-WSSS_THYst9807814-He', 'feature_types-WSSS_THYst9807815-He', 'feature_types-WSSS_THYst9807816-He', 'feature_types-WSSS_THYst9807817-He', 'feature_types-WSSS_THYst9807818-He', 'feature_types-WSSS_THYst9807819-He', 'feature_types-WSSS_THYst9807820-He', 'n_cells-He', 'highly_variable3_e11-He', 'highly_variable9_e15_prox-He', 'highly_variable10_e15_mid-He', 'highly_variable11_e15_dist-He'

##### **X - expression matrix**

In [7]:
# View the expression matrix of the anndata object.

In [8]:
adata.X

<215067x24858 sparse matrix of type '<class 'numpy.float32'>'
	with 710122292 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 2)	0.60125244
  (0, 8)	1.4580588
  (0, 15)	0.60125244
  (0, 23)	0.60125244
  (0, 25)	0.97410846
  (0, 28)	0.60125244
  (0, 29)	0.60125244
  (0, 42)	0.97410846
  (0, 45)	0.60125244
  (0, 48)	0.97410846
  (0, 61)	0.97410846
  (0, 150)	0.60125244
  (0, 166)	0.60125244
  (0, 270)	0.60125244
  (0, 306)	0.60125244
  (0, 333)	0.60125244
  (0, 374)	0.60125244
  (0, 399)	0.60125244
  (0, 400)	0.60125244
  (0, 403)	0.97410846
  (0, 411)	0.60125244
  (0, 447)	0.60125244
  (0, 484)	0.60125244
  (0, 492)	0.60125244
  (0, 494)	0.60125244
  :	:
  (215066, 24763)	0.59506035
  (215066, 24770)	0.96556866
  (215066, 24774)	0.59506035
  (215066, 24785)	0.59506035
  (215066, 24793)	0.59506035
  (215066, 24794)	0.59506035
  (215066, 24799)	0.59506035
  (215066, 24809)	0.59506035
  (215066, 24810)	0.59506035
  (215066, 24813)	0.59506035
  (215066, 24834)	0.59506035
  (215066, 24839)	0.59506035
  (215066, 24840)	0.96556866
  (215066, 24845)	4.4203715
  (215066, 24847)	3.9556158
  (215066, 24848)	3.51230

##### **Raw counts matrix**

In [11]:
araw = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Data/Raw_data/mouse_limb_raw.h5ad')

In [12]:
araw 

AnnData object with n_obs × n_vars = 215067 × 24858
    obs: 'sequencing_center', 'batch', 'stage', 'dissection', 'anatomy', 'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval', 'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project', 'S_score', 'G2M_score', 'phase'
    var: 'gene_ids-Allou', 'n_cells-Allou', 'feature_types-Allou', 'gene_ids-He', 'feature_types-WSSS_THYst9807808-He', 'feature_types-WSSS_THYst9807809-He', 'feature_types-WSSS_THYst9807810-He', 'feature_types-WSSS_THYst9807811-He', 'feature_types-WSSS_THYst9807812-He', 'feature_types-WSSS_THYst9807813-He', 'feature_types-WSSS_THYst9807814-He', 'feature_types-WSSS_THYst9807815-He', 'feature_types-WSSS_THYst9807816-He', 'feature_types-WSSS_THYst9807817-He', 'feature_types-WSSS_THYst9807818-He', 'feature_types-WSSS_THYst9807819-He', 'feature_types-WSSS_THYst9807820-He', 'n_cells-He', 'highly_variable3_e11-He', 'highly_variable9_e15_prox-He', 'highly_variable10_e15_mid-He', 'highly_variable11_e15_dist-He'

In [13]:
araw.layers.keys()

KeysView(Layers with keys: counts)

In [14]:
araw = ad.AnnData(X=araw.layers['counts'].copy(), obs=araw.obs.copy(), var=araw.var.copy())

In [15]:
print(araw.X)

  (0, 2)	1.0
  (0, 8)	4.0
  (0, 15)	1.0
  (0, 23)	1.0
  (0, 25)	2.0
  (0, 28)	1.0
  (0, 29)	1.0
  (0, 42)	2.0
  (0, 45)	1.0
  (0, 48)	2.0
  (0, 61)	2.0
  (0, 150)	1.0
  (0, 166)	1.0
  (0, 270)	1.0
  (0, 306)	1.0
  (0, 333)	1.0
  (0, 374)	1.0
  (0, 399)	1.0
  (0, 400)	1.0
  (0, 403)	2.0
  (0, 411)	1.0
  (0, 447)	1.0
  (0, 484)	1.0
  (0, 492)	1.0
  (0, 494)	1.0
  :	:
  (215066, 24763)	1.0
  (215066, 24770)	2.0
  (215066, 24774)	1.0
  (215066, 24785)	1.0
  (215066, 24793)	1.0
  (215066, 24794)	1.0
  (215066, 24799)	1.0
  (215066, 24809)	1.0
  (215066, 24810)	1.0
  (215066, 24813)	1.0
  (215066, 24834)	1.0
  (215066, 24839)	1.0
  (215066, 24840)	2.0
  (215066, 24845)	101.0
  (215066, 24847)	63.0
  (215066, 24848)	40.0
  (215066, 24849)	76.0
  (215066, 24850)	53.0
  (215066, 24851)	63.0
  (215066, 24852)	24.0
  (215066, 24853)	7.0
  (215066, 24854)	25.0
  (215066, 24855)	1.0
  (215066, 24856)	6.0
  (215066, 24857)	1.0


##### **Variables(var)**

In [16]:
# View the var of anndata and raw object

In [17]:
adata.var

Unnamed: 0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_WSSS_THYst9807819,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n
0610005C13Rik,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610006L08Rik,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610009B22Rik,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610009E02Rik,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610009L18Rik,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mt-Nd3,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
mt-Nd4,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,1
mt-Nd4l,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
mt-Nd5,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0


In [18]:
araw.var

Unnamed: 0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_WSSS_THYst9807819,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n
0610005C13Rik,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610006L08Rik,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610009B22Rik,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610009E02Rik,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
0610009L18Rik,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mt-Nd3,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
mt-Nd4,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,1
mt-Nd4l,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0
mt-Nd5,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,False,0


In [19]:
nan_count = np.sum(pd.isna(adata.var['gene_ids-He']))

print("Number of NaN values in adata.var['gene_id']: ", nan_count)

Number of NaN values in adata.var['gene_id']:  137


In [20]:
adata.var['gene_name'] = adata.var.index
araw.var['gene_name'] = araw.var.index

In [21]:
adata.var.index = adata.var['gene_ids-He'] 
araw.var.index = araw.var['gene_ids-He']

In [22]:
adata.var

Unnamed: 0_level_0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n,gene_name
gene_ids-He,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSMUSG00000109644,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610005C13Rik
ENSMUSG00000108652,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610006L08Rik
ENSMUSG00000007777,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009B22Rik
ENSMUSG00000086714,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009E02Rik
ENSMUSG00000043644,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009L18Rik
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSMUSG00000064360,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd3
ENSMUSG00000064363,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,1,mt-Nd4
ENSMUSG00000065947,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd4l
ENSMUSG00000064367,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd5


In [23]:
araw.var

Unnamed: 0_level_0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n,gene_name
gene_ids-He,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSMUSG00000109644,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610005C13Rik
ENSMUSG00000108652,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610006L08Rik
ENSMUSG00000007777,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009B22Rik
ENSMUSG00000086714,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009E02Rik
ENSMUSG00000043644,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009L18Rik
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSMUSG00000064360,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd3
ENSMUSG00000064363,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,1,mt-Nd4
ENSMUSG00000065947,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd4l
ENSMUSG00000064367,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd5


In [24]:
approved_genes = pd.read_csv('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/Endometrium_reference_integrated_atlas/genes_approved.csv')

In [25]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [26]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]
var_to_keep_araw = [x for x in araw.var_names if (x in genedict)]

In [27]:
adata = adata[:, adata.var.index.isin(var_to_keep_adata)]
araw = araw[:, araw.var.index.isin(var_to_keep_araw)]

In [28]:
adata.var

Unnamed: 0_level_0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n,gene_name
gene_ids-He,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSMUSG00000109644,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610005C13Rik
ENSMUSG00000108652,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610006L08Rik
ENSMUSG00000007777,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009B22Rik
ENSMUSG00000086714,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009E02Rik
ENSMUSG00000043644,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009L18Rik
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSMUSG00000064360,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd3
ENSMUSG00000064363,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,1,mt-Nd4
ENSMUSG00000065947,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd4l
ENSMUSG00000064367,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd5


In [29]:
araw.var

Unnamed: 0_level_0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n,gene_name
gene_ids-He,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSMUSG00000109644,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610005C13Rik
ENSMUSG00000108652,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610006L08Rik
ENSMUSG00000007777,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009B22Rik
ENSMUSG00000086714,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009E02Rik
ENSMUSG00000043644,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009L18Rik
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSMUSG00000064360,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd3
ENSMUSG00000064363,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,1,mt-Nd4
ENSMUSG00000065947,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd4l
ENSMUSG00000064367,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd5


In [30]:
adata.var.drop(columns=adata.var.columns, inplace=True)

feature is filtered

In [31]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [32]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids-He,Unnamed: 1_level_1
ENSMUSG00000109644,False
ENSMUSG00000108652,False
ENSMUSG00000007777,False
ENSMUSG00000086714,False
ENSMUSG00000043644,False
...,...
ENSMUSG00000064360,False
ENSMUSG00000064363,False
ENSMUSG00000065947,False
ENSMUSG00000064367,False


In [33]:
araw.var

Unnamed: 0_level_0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n,gene_name
gene_ids-He,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSMUSG00000109644,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610005C13Rik
ENSMUSG00000108652,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610006L08Rik
ENSMUSG00000007777,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009B22Rik
ENSMUSG00000086714,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009E02Rik
ENSMUSG00000043644,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009L18Rik
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSMUSG00000064360,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd3
ENSMUSG00000064363,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,1,mt-Nd4
ENSMUSG00000065947,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd4l
ENSMUSG00000064367,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd5


In [34]:
#araw.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Data/Raw_data/mouse_limb_approved_genes_raw.h5ad', compression = 'gzip')

#### **obs (Cell metadata)**

In [35]:
#view obs

In [36]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,leiden,leiden_R,final_leiden_R,celltype,project,S_score,G2M_score,phase
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,2,2,70,ProxMes,He,-0.153993,-0.254805,G1
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,4,41,40,DermFibro,He,0.177538,-0.123043,S
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,2,2,70,ProxMes,He,0.283361,-0.021256,S
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,2,2,70,ProxMes,He,-0.195050,-0.201147,G1
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,8,82,120,MyoB,He,0.234431,-0.168458,S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,23,231,,Pericyte,Kelly,-0.126943,-0.117261,G1
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,8,83,,Pax7+MyoProg,Kelly,0.149806,0.512961,G2M
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,21,213,,Basal,Kelly,-0.121107,-0.090475,G1
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,8,83,,Pax7+MyoProg,Kelly,0.063508,0.542747,G2M


In [37]:
# view the column names in obs

In [38]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase'],
      dtype='object')

In [39]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,leiden,leiden_R,final_leiden_R,celltype,project,S_score,G2M_score,phase
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,2,2,70,ProxMes,He,-0.153993,-0.254805,G1
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,4,41,40,DermFibro,He,0.177538,-0.123043,S
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,2,2,70,ProxMes,He,0.283361,-0.021256,S
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,2,2,70,ProxMes,He,-0.195050,-0.201147,G1
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,8,82,120,MyoB,He,0.234431,-0.168458,S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,23,231,,Pericyte,Kelly,-0.126943,-0.117261,G1
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,8,83,,Pax7+MyoProg,Kelly,0.149806,0.512961,G2M
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,21,213,,Basal,Kelly,-0.121107,-0.090475,G1
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,8,83,,Pax7+MyoProg,Kelly,0.063508,0.542747,G2M


#### **assay_ontology_term_id**

In [40]:
adata.obs['barcodes'] = adata.obs_names

In [41]:
adata.obs['barcodes'] = adata.obs['barcodes'].str.extract(r'([ATGC]{10,16})', expand=False)

In [42]:
assay_info = pd.read_csv('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/covid_cell_atlas_ALI_organoids/10X_barcode_table_assay.csv')

In [43]:
mapping = dict(zip(assay_info['barcode'], assay_info['assay']))

In [44]:
adata.obs['assay'] = adata.obs['barcodes'].map(mapping)

In [45]:
mapping = {'GSM4227224':'3v2',
'WSSS_THYst9807811':'3v3',
'WSSS_THYst9807812':'3v3',
'WSSS_THYst9807813':'3v3',
'WSSS_THYst9807817':'3v3',
'WSSS_THYst9807818':'3v3',
'WSSS_THYst9807819':'3v3',
'GSM4227225':'3v2',
'GSM4227226':'3v2',
'WSSS_THYst9807820':'3v3',
'GSM4227227':'3v2',
'GSM4498677':'3v2',
'GSM4498678':'3v2',
'7_e10_5':'3v2',
'3_e11':'3v2',
'4_e12':'3v2',
'WSSS_THYst9807808':'3v3',
'WSSS_THYst9807809':'3v3',
'WSSS_THYst9807810':'3v3',
'12_e13':'3v2',
'5_e13':'3v2',
'1_e13_5':'3v2',
'WSSS_THYst9807814':'3v3',
'WSSS_THYst9807815':'3v3',
'WSSS_THYst9807816':'3v3',
'13_e14':'3v2',
'8_e15_whole':'3v2',
'6_e15':'3v2',
'9_e15_prox':'3v2',
'10_e15_mid':'3v2',
'11_e15_dist':'3v2'}

In [46]:
mapping={'5386STDY7537944':'3v2',
'FCAImmP7536758':'3v2',
'FCAImmP7536759':'3v2',
'5386STDY7557335':'3v2',
'5386STDY7557336':'3v2',
'5386STDY7557337':'3v2',
'5478STDY7652318':'3v2',
'5478STDY7717491':'3v2',
'5478STDY7717492':'3v2',
'5478STDY7935101':'3v2',
'5478STDY7935102':'3v2',
'5478STDY7980348':'3v2',
'5478STDY7980349':'3v2',
'WSSS_THYst8796437':'3v2',
'WSSS_THYst8796438':'3v2',
'WSSS_THYst8796439':'3v2',
'WSSS_THYst8796440':'3v2',
'WSSS_THYst8796441':'3v2',
'WSSS_THYst8796442':'3v2',
'WSSS_THYst9384953':'5v1',
'WSSS_THYst9384954':'5v1',
'WSSS_THYst9384955':'5v1',
'WSSS_THYst9384956':'5v1',
'WSSS_THYst9384957':'5v1',
'WSSS_THYst9384958':'5v1',
'WSSS_THYst9807808':'3v3',
'WSSS_THYst9807809':'3v3',
'WSSS_THYst9807810':'3v3',
'WSSS_THYst9807811':'3v3',
'WSSS_THYst9807812':'3v3',
'WSSS_THYst9807813':'3v3',
'WSSS_THYst9807814':'3v3',
'WSSS_THYst9807815':'3v3',
'WSSS_THYst9807816':'3v3',
'WSSS_THYst9807817':'3v3',
'WSSS_THYst9807818':'3v3',
'WSSS_THYst9807819':'3v3',
'WSSS_THYst9807820':'3v3',
'1_e13_5':'3v2',
'3_e11':'3v2',
'4_e12':'3v2',
'5_e13':'3v2',
'6_e15':'3v2',
'7_e10_5':'3v2',
'8_e15_whole':'3v2',
'9_e15_prox':'3v2',
'10_e15_mid':'3v2',
'11_e15_dist':'3v2',
'12_e13':'3v2',
'13_e14':'3v2',
'GSM4498677':'3v2',
'GSM4498678':'3v2',
'GSM4227224':'3v2',
'GSM4227225':'3v2',
'GSM4227226':'3v2',
'GSM4227227':'3v2'}

In [47]:
adata.obs['assays']  = adata.obs['batch'].map(mapping)

In [48]:
list(adata.obs['sequencing_center'].unique())

['Sanger', 'Caltech', 'Allou', 'Kelly']

In [49]:
list(adata.obs['assays'].unique())

['3v3', '3v2']

In [50]:
import pandas as pd
import scanpy as sc

# Suppose 'adata' is your AnnData object already loaded.

# Step 1: Check unique values in 'assay'
unique_assays = adata.obs['assay'].unique()
print("Unique values in 'assay':", unique_assays)

# Step 2: Display unique 'assays' for each unique 'assay'
for assay in unique_assays:
    unique_assays_for_group = adata.obs.loc[adata.obs['assay'] == assay, 'assays'].unique()
    print(f"Unique 'assays' for assay {assay}:", unique_assays_for_group)

Unique values in 'assay': ['3pv3' '3pv2_5pv1_5pv2+3pv3' '3pv3+multiome' '3pv2_5pv1_5pv2'
 '3pv2_5pv1_5pv2+multiome' nan]
Unique 'assays' for assay 3pv3: ['3v3']
Unique 'assays' for assay 3pv2_5pv1_5pv2+3pv3: ['3v3' '3v2']
Unique 'assays' for assay 3pv3+multiome: ['3v3']
Unique 'assays' for assay 3pv2_5pv1_5pv2: ['3v2']
Unique 'assays' for assay 3pv2_5pv1_5pv2+multiome: ['3v2']
Unique 'assays' for assay nan: []


In [51]:
mapping= {'3v2':'EFO:0009899', '3v3' : 'EFO:0009922'}

In [52]:
adata.obs['assay_ontology_term_id']  = adata.obs['assays'].map(mapping)

In [53]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [54]:
# view adata.obs

In [55]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,final_leiden_R,celltype,project,S_score,G2M_score,phase,barcodes,assay,assays,assay_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,70,ProxMes,He,-0.153993,-0.254805,G1,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,40,DermFibro,He,0.177538,-0.123043,S,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,70,ProxMes,He,0.283361,-0.021256,S,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,70,ProxMes,He,-0.195050,-0.201147,G1,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,120,MyoB,He,0.234431,-0.168458,S,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,,Pericyte,Kelly,-0.126943,-0.117261,G1,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,,Pax7+MyoProg,Kelly,0.149806,0.512961,G2M,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,,Basal,Kelly,-0.121107,-0.090475,G1,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,,Pax7+MyoProg,Kelly,0.063508,0.542747,G2M,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899


#### **cell_type_ontology_term_id**

In [56]:
#identify the column in adata.obs related. to cell type annotation

In [57]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id'],
      dtype='object')

In [58]:
list(adata.obs['celltype'].unique())

['ProxMes',
 'DermFibro',
 'MyoB',
 'Hoxc5+DermFibroProg',
 'EarlyDistalMes',
 'MyoC1',
 'RestingChon',
 'SMProg',
 'Pax7+MyoProg',
 'ProlifChon',
 'VenousEndo',
 'MesCond',
 'NK',
 'Myh3+MyoC',
 'Meox2+Mes',
 'PrimErythro',
 'SchwannProg',
 'Perichon',
 'Dpt+Fibro',
 'LymphEndo',
 'DC2',
 'PrehyperChon',
 'Mfap5+Fibro',
 'ArterialEndo',
 'Myl3+MyoC',
 'InterZone',
 'EarlyProxMes',
 'CMP/GMP',
 'Basal',
 'SynapSchwann',
 'SMC',
 'Melano',
 'Macro',
 'Basophil',
 'OsteoB',
 'Schwann',
 'Adh+Fibro',
 'Periderm',
 'Megakaryo',
 'ILC',
 'Monocyte',
 'Teno',
 'DefErythro',
 'Pax3+MyoProg',
 'Mast',
 'DistalMes',
 'TransMes',
 'AER-Basal',
 'InterMusFibro',
 'Rdh10+DistalMes',
 'SupraBasal1',
 'MyoC2',
 'Myelocyte',
 'Pericyte',
 'B',
 'HyperChon',
 'Placode',
 'SupraBasal2',
 'Nail']

In [59]:
# create a dictionary of cell type and ontology term

In [60]:
mapping= {
'Mes1':'CL:0008019',
'OCP':'CL:0000062',
'VenousEndo':'CL:0002543',
'Mes4':'CL:0008019',
'TransMes':'CL:0008019',
'PrimErythro2':'CL:0002355',
'STMN2+Fibro':'CL:0002551',
'PAX3+MyoProg':'CL:0000515',
'OsteoB':'CL:0000062',
'AER-Basal':'CL:0000646',
'MesCond':'CL:0000138',
'DistalMes':'CL:0008019',
'Periderm':'CL:0000078',
'MyoB1':'CL:0000056',
'ISL1+Mes':'CL:0008019',
'ChondroProg':'CL:0000138',
'Mes2':'CL:0008019',
'InterZone':'CL:0008019',
'ProxMes':'CL:0008019',
'RDH10+DistalMes':'CL:0008019',
'ArterialEndo':'CL:1000413',
'SchwannProg':'CL:0002375',
'HOXC5+DermFibroProg':'CL:0002551',
'MyoC1':'CL:0000187',
'PrimErythro1':'CL:0002355',
'MYL3+MyoC':'CL:0000187',
'Mes3':'CL:0008019',
'Megakaryo':'CL:0000556',
'Monocyte':'CL:0000576',
'SynapSchwann':'CL:0002573',
'PAX7+MyoProg':'CL:0000515',
'Pericyte':'CL:0000669',
'ADH+Fibro':'CL:1001609',
'DermFibro':'CL:0002551',
'Macro':'CL:0000235',
'MFAP5+Fibro':'CL:0000057',
'TenoProg':'CL:0000388',
'SMC':'CL:0000192',
'Perimysium':'CL:0002320',
'SMProg':'CL:0000192',
'MyoB2':'CL:0000056',
'Schwann':'CL:0002573',
'F10+DermFibroProg':'CL:0002551',
'NeuralFibro':'CL:0000057',
'Teno':'CL:0000388',
'PrehyperChon':'CL:0000138',
'NK':'CL:0000623',
'MyoC2':'CL:0000187',
'B':'CL:0000236',
'PeriChon':'CL:0000058',
'InterMusFibro':'CL:1001609',
'Basal':'CL:0000646',
'LymphEndo':'CL:0002138',
'DC2':'CL:0000990',
'LMPP/ELP':'CL:0000936',
'Melano':'CL:0000148',
'MYH3+MyoC':'CL:0000187',
'CMP/GMP':'CL:0000049',
'ProlifChon':'CL:0000138',
'Mast':'CL:0000097',
'Myelocyte':'CL:0002193',
'ArtiChon':'CL:1001607',
'HyperChon':'CL:0000743',
'RestingChon':'CL:0000138',
'DefReticulo':'CL:0000558',
'DefErythro':'CL:0000232',
'Neuronal':'CL:0000540',
'MyoB':'CL:0000056',
'EarlyDistalMes':'CL:0008019',
'Meox2+Mes':'CL:0008019',
'PrimErythro':'CL:0002355',
'Dpt+Fibro':'CL:0002551',
'EarlyProxMes':'CL:0008019',
'Basophil':'CL:0000767',
'ILC':'CL:0001065',
'SupraBasal1':'CL:0000066',
'Placode':'CL:0002483',
'SupraBasal2':'CL:0000066',
'Nail':'CL:4033056',
'PAX3+PAX7+MyoProg':'CL:0000187',
'PAX7+SPON2+MyoProg':'CL:0000187',
'PAX7+NTN5+MyoProg':'CL:0000187',
'ArtiChon':'CL:1001607',
'Myl3+MyoC' :'CL:0000187',
'Myh3+MyoC' :'CL:0000187',
'Mfap5+Fibro':'CL:0000057',
'Hoxc5+DermFibroProg':'CL:0002551',
'Adh+Fibro':'CL:1001609',
'Pax3+MyoProg':'CL:0000515',
'Pax7+MyoProg':'CL:0000515',
'Rdh10+DistalMes':'CL:0008019',
'Perichon':'CL:0000058'
}

In [61]:
# add the cell_type_ontology_term_id column

In [62]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['celltype'].map(mapping)

In [63]:
# change datatype of the column

In [64]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [65]:
# view adata.obs

In [66]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,celltype,project,S_score,G2M_score,phase,barcodes,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,ProxMes,He,-0.153993,-0.254805,G1,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922,CL:0008019
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,DermFibro,He,0.177538,-0.123043,S,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922,CL:0002551
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,ProxMes,He,0.283361,-0.021256,S,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922,CL:0008019
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,ProxMes,He,-0.195050,-0.201147,G1,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922,CL:0008019
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,MyoB,He,0.234431,-0.168458,S,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922,CL:0000056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,Pericyte,Kelly,-0.126943,-0.117261,G1,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,Pax7+MyoProg,Kelly,0.149806,0.512961,G2M,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,Basal,Kelly,-0.121107,-0.090475,G1,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,Pax7+MyoProg,Kelly,0.063508,0.542747,G2M,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515


In [67]:
list(adata.obs['cell_type_ontology_term_id'].unique())

['CL:0008019',
 'CL:0002551',
 'CL:0000056',
 'CL:0000187',
 'CL:0000138',
 'CL:0000192',
 'CL:0000515',
 'CL:0002543',
 'CL:0000623',
 'CL:0002355',
 'CL:0002375',
 'CL:0000058',
 'CL:0002138',
 'CL:0000990',
 'CL:0000057',
 'CL:1000413',
 'CL:0000049',
 'CL:0000646',
 'CL:0002573',
 'CL:0000148',
 'CL:0000235',
 'CL:0000767',
 'CL:0000062',
 'CL:1001609',
 'CL:0000078',
 'CL:0000556',
 'CL:0001065',
 'CL:0000576',
 'CL:0000388',
 'CL:0000232',
 'CL:0000097',
 'CL:0000066',
 'CL:0002193',
 'CL:0000669',
 'CL:0000236',
 'CL:0000743',
 'CL:0002483',
 'CL:4033056']

In [68]:
adata.obs['celltype'] = adata.obs['celltype'].astype(str)
adata.obs['cell_type_ontology_term_id']= adata.obs['cell_type_ontology_term_id'].astype(str)
combined_data = adata.obs['celltype'] + ' - ' + adata.obs['cell_type_ontology_term_id']

# Get unique combinations
unique_combinations = combined_data.unique()

# Display unique combinations
print("Unique combinations of 'assay' and 'celltype':")
for combination in unique_combinations:
    print(combination)

Unique combinations of 'assay' and 'celltype':
ProxMes - CL:0008019
DermFibro - CL:0002551
MyoB - CL:0000056
Hoxc5+DermFibroProg - CL:0002551
EarlyDistalMes - CL:0008019
MyoC1 - CL:0000187
RestingChon - CL:0000138
SMProg - CL:0000192
Pax7+MyoProg - CL:0000515
ProlifChon - CL:0000138
VenousEndo - CL:0002543
MesCond - CL:0000138
NK - CL:0000623
Myh3+MyoC - CL:0000187
Meox2+Mes - CL:0008019
PrimErythro - CL:0002355
SchwannProg - CL:0002375
Perichon - CL:0000058
Dpt+Fibro - CL:0002551
LymphEndo - CL:0002138
DC2 - CL:0000990
PrehyperChon - CL:0000138
Mfap5+Fibro - CL:0000057
ArterialEndo - CL:1000413
Myl3+MyoC - CL:0000187
InterZone - CL:0008019
EarlyProxMes - CL:0008019
CMP/GMP - CL:0000049
Basal - CL:0000646
SynapSchwann - CL:0002573
SMC - CL:0000192
Melano - CL:0000148
Macro - CL:0000235
Basophil - CL:0000767
OsteoB - CL:0000062
Schwann - CL:0002573
Adh+Fibro - CL:1001609
Periderm - CL:0000078
Megakaryo - CL:0000556
ILC - CL:0001065
Monocyte - CL:0000576
Teno - CL:0000388
DefErythro - CL

#### **development_stage_ontology_term_id**

In [69]:
# identify the column in adata which corresponds to age

In [70]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id'],
      dtype='object')

In [71]:
list(adata.obs['stage'].unique())

[12.5, 13.5, 16.5, 11.0, 12.0, 13.0, 15.0, 10.5, 14.0, 9.5, 11.5, 15.5, 18.5]

In [72]:
adata.obs['stage'].dtype

CategoricalDtype(categories=[9.5, 10.5, 11.0, 11.5, 12.0, 12.5, 13.0, 13.5, 14.0, 15.0,
                  15.5, 16.5, 18.5],
, ordered=False, categories_dtype=float64)

In [73]:
adata.obs['stage'] = adata.obs['stage'].astype('str')

In [74]:
#adata.obs['stage'].dtype()

In [75]:
mapping= {'12.5':'MmusDv:0000028', '13.5':'MmusDv:0000029', '16.5':'MmusDv:0000033', '11.0':'MmusDv:0000026', '12.0':'MmusDv:0000027', '13.0':'MmusDv:0000028', '15.0':'MmusDv:0000032', '10.5':'MmusDv:0000025', '14.0':'MmusDv:0000029', '9.5':'MmusDv:0000023', '11.5':'MmusDv:0000027', '15.5':'MmusDv:0000032', '18.5':'MmusDv:0000035'}

In [76]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['stage'].map(mapping)

In [77]:
# change datatype of the column

In [78]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [79]:
# view unique values of development_stage_ontology_term_id column

In [80]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['MmusDv:0000028',
 'MmusDv:0000029',
 'MmusDv:0000033',
 'MmusDv:0000026',
 'MmusDv:0000027',
 'MmusDv:0000032',
 'MmusDv:0000025',
 'MmusDv:0000023',
 'MmusDv:0000035']

In [81]:
# view adata.obs

In [82]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,project,S_score,G2M_score,phase,barcodes,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,He,-0.153993,-0.254805,G1,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,He,0.177538,-0.123043,S,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,He,0.283361,-0.021256,S,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,He,-0.195050,-0.201147,G1,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,He,0.234431,-0.168458,S,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,Kelly,-0.126943,-0.117261,G1,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,Kelly,0.149806,0.512961,G2M,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,Kelly,-0.121107,-0.090475,G1,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,Kelly,0.063508,0.542747,G2M,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035


#### **donor_id**

In [83]:
#identify the column in adata.obs which provides donor information

In [84]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id'],
      dtype='object')

In [85]:
list(adata.obs['batch'].unique())

['WSSS_THYst9807808',
 'WSSS_THYst9807809',
 'WSSS_THYst9807810',
 'WSSS_THYst9807811',
 'WSSS_THYst9807812',
 'WSSS_THYst9807813',
 'WSSS_THYst9807814',
 'WSSS_THYst9807815',
 'WSSS_THYst9807816',
 'WSSS_THYst9807817',
 'WSSS_THYst9807818',
 'WSSS_THYst9807819',
 'WSSS_THYst9807820',
 '1_e13_5',
 '3_e11',
 '4_e12',
 '5_e13',
 '6_e15',
 '7_e10_5',
 '8_e15_whole',
 '9_e15_prox',
 '10_e15_mid',
 '11_e15_dist',
 '12_e13',
 '13_e14',
 'GSM4498677',
 'GSM4498678',
 'GSM4227224',
 'GSM4227225',
 'GSM4227226',
 'GSM4227227']

In [86]:
# add the donor_id column

In [87]:
adata.obs['donor_id'] = adata.obs['batch']

In [88]:
#adata.obs['donor_id'] = ['unknown'] * len(adata.obs['names'])

In [89]:
# change datatype of the column

In [90]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [91]:
# view unique values of donor_id column

In [92]:
list(adata.obs['donor_id'].unique())

['WSSS_THYst9807808',
 'WSSS_THYst9807809',
 'WSSS_THYst9807810',
 'WSSS_THYst9807811',
 'WSSS_THYst9807812',
 'WSSS_THYst9807813',
 'WSSS_THYst9807814',
 'WSSS_THYst9807815',
 'WSSS_THYst9807816',
 'WSSS_THYst9807817',
 'WSSS_THYst9807818',
 'WSSS_THYst9807819',
 'WSSS_THYst9807820',
 '1_e13_5',
 '3_e11',
 '4_e12',
 '5_e13',
 '6_e15',
 '7_e10_5',
 '8_e15_whole',
 '9_e15_prox',
 '10_e15_mid',
 '11_e15_dist',
 '12_e13',
 '13_e14',
 'GSM4498677',
 'GSM4498678',
 'GSM4227224',
 'GSM4227225',
 'GSM4227226',
 'GSM4227227']

In [93]:
#view obs

In [94]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,S_score,G2M_score,phase,barcodes,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,-0.153993,-0.254805,G1,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,0.177538,-0.123043,S,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,0.283361,-0.021256,S,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,-0.195050,-0.201147,G1,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,0.234431,-0.168458,S,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,-0.126943,-0.117261,G1,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,0.149806,0.512961,G2M,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,-0.121107,-0.090475,G1,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,0.063508,0.542747,G2M,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227


In [95]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id'],
      dtype='object')

#### **disease_ontology_term_id**

In [96]:
adata.obs['disease_ontology_term_id']= ['PATO:0000461'] * len(adata.obs)

In [97]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,G2M_score,phase,barcodes,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,-0.254805,G1,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,-0.123043,S,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,-0.021256,S,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,-0.201147,G1,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,-0.168458,S,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,-0.117261,G1,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,0.512961,G2M,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,-0.090475,G1,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,0.542747,G2M,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461


In [98]:
# change datatype of the column

In [99]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [100]:
# view obs

In [101]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,G2M_score,phase,barcodes,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,-0.254805,G1,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,-0.123043,S,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,-0.021256,S,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,-0.201147,G1,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,-0.168458,S,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,-0.117261,G1,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,0.512961,G2M,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,-0.090475,G1,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,0.542747,G2M,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461


#### **is_primary_data**

In [102]:
#change data type of column

In [103]:
adata.obs['is_primary_data']= [True] * len(adata.obs)

In [104]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

In [105]:
# view obs

In [106]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,phase,barcodes,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,G1,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,S,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,S,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,G1,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,S,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,G1,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461,True
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,G2M,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,G1,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461,True
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,G2M,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True


#### **organism_ontology_term_id**

In [107]:
# assign organism id 

In [108]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:10090'] * len(adata.obs)

In [109]:
#change data type of column

In [110]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [111]:
# view obs

In [112]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,barcodes,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,AAACCCAAGAAGGATG,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,AAACCCAAGACTCGAG,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,AAACCCACACTCTGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,AAACCCAGTATCAGCT,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,AAACCCAGTTTCCCAC,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,TTTGTCAGTCAGGACA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,TTTGTCAGTTCCGTCT,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,TTTGTCATCCACGTGG,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,TTTGTCATCCCAAGTA,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090


#### **self_reported_ethnicity_ontology_term_id**

In [113]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['na'] * len(adata.obs)

In [114]:
# change data type

In [115]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [116]:
# view obs

In [117]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na


In [118]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

#### **sex_ontology_term_id**

In [119]:
# identify the column in adata.obs which corresponds to sex

In [120]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

In [121]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,assay,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,3pv3,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,3pv3,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,3pv3,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,3pv2_5pv1_5pv2,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na


In [122]:
# list the unique values 

In [123]:
# create a dictionary of sex and sex ontology term id

In [124]:
# add sex_ontology_term_id column

In [125]:
adata.obs['sex_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [126]:
# change data type

In [127]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [128]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown


#### **suspension_type**

In [129]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,assays,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,3v3,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,3v3,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,3v3,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,3v2,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,3v2,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,3v2,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown


In [130]:
adata.obs['suspension_type'] = ['cell'] * len(adata.obs)

In [131]:
# change data type of column

In [132]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [133]:
# view obs

In [134]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,EFO:0009922,CL:0002551,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,EFO:0009922,CL:0008019,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,EFO:0009922,CL:0000056,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,EFO:0009899,CL:0000669,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,EFO:0009899,CL:0000646,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,EFO:0009899,CL:0000515,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell


#### **tissue_type**

In [135]:
adata.obs['tissue_type'] = ['tissue'] * len(adata.obs)

In [136]:
adata.obs['tissue_type'] = adata.obs['tissue_type'].astype('category')

#### **tissue_ontology_term_id**

In [137]:
# identify the column in adata.obs which corresponds to tissue

In [138]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type'],
      dtype='object')

In [139]:
list(adata.obs['anatomy'].unique())

['forelimb', 'hindlimb']

In [140]:
# Ensure 'barcode' and 'cell_type_ontology_term_id' are columns in adata.obs
if 'batch' in adata.obs.columns and 'anatomy' in adata.obs.columns:
    # Create the dictionary
    mapping = dict(zip(adata.obs['batch'], adata.obs['anatomy']))
else:
    raise KeyError("Columns 'barcode' and/or 'anatomy' not found in adata.obs")

In [141]:
mapping

{'WSSS_THYst9807808': 'forelimb',
 'WSSS_THYst9807809': 'forelimb',
 'WSSS_THYst9807810': 'forelimb',
 'WSSS_THYst9807811': 'hindlimb',
 'WSSS_THYst9807812': 'hindlimb',
 'WSSS_THYst9807813': 'hindlimb',
 'WSSS_THYst9807814': 'forelimb',
 'WSSS_THYst9807815': 'forelimb',
 'WSSS_THYst9807816': 'forelimb',
 'WSSS_THYst9807817': 'hindlimb',
 'WSSS_THYst9807818': 'hindlimb',
 'WSSS_THYst9807819': 'hindlimb',
 'WSSS_THYst9807820': 'hindlimb',
 '1_e13_5': 'forelimb',
 '3_e11': 'forelimb',
 '4_e12': 'forelimb',
 '5_e13': 'forelimb',
 '6_e15': 'forelimb',
 '7_e10_5': 'forelimb',
 '8_e15_whole': 'forelimb',
 '9_e15_prox': 'forelimb',
 '10_e15_mid': 'forelimb',
 '11_e15_dist': 'forelimb',
 '12_e13': 'forelimb',
 '13_e14': 'forelimb',
 'GSM4498677': 'forelimb',
 'GSM4498678': 'forelimb',
 'GSM4227224': 'hindlimb',
 'GSM4227225': 'hindlimb',
 'GSM4227226': 'hindlimb',
 'GSM4227227': 'hindlimb'}

In [142]:
mapping= {'hindlimb':'UBERON:0002103', 'forelimb':'UBERON:0002102'}

In [143]:
# add 'tissue_ontology_term_id' column

In [144]:
adata.obs['tissue_ontology_term_id'] = adata.obs['anatomy'].map(mapping)

In [145]:
# change data type of column

In [146]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [147]:
#list the unique values in 'tissue_ontology_term_id' column

In [148]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0002102', 'UBERON:0002103']

In [149]:
# view obs

In [150]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103


In [151]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

#### **obsm (Embeddings)**

In [152]:
# view obsm

In [153]:
# check whether all columns are prefixed with X

In [154]:
adata.obsm

AxisArrays with keys: X_pca, X_umap

#### **uns (Dataset Metadata)**

In [155]:
# View

In [156]:
adata.uns

{'anatomy_colors': array(['#1f77b4', '#ff7f0e'], dtype=object),
 'batch_colors': array(['#ffff00', '#1ce6ff', '#ff34ff', '#ff4a46', '#008941', '#006fa6',
        '#a30059', '#ffdbe5', '#7a4900', '#0000a6', '#63ffac', '#b79762',
        '#004d43', '#8fb0ff', '#997d87', '#5a0007', '#809693', '#6a3a4c',
        '#1b4400', '#4fc601', '#3b5dff', '#4a3b53', '#ff2f80', '#61615a',
        '#ba0900', '#6b7900', '#00c2a0', '#ffaa92', '#ff90c9', '#b903aa',
        '#d16100'], dtype=object),
 'celltype_colors': array(['#82D900', '#ffff00', '#9AFF02', '#008941', '#00E3E3', '#006fa6',
        '#a30059', '#0000a6', '#B15BFF', '#6C3365', '#00AEAE', '#005AB5',
        '#c4adc1', '#99bb99', '#ff0000', '#5a0007', '#484891', '#737300',
        '#005757', '#BF0060', '#ddefff', '#000035', '#7b4f4b', '#95CACA',
        '#ddefff', '#424200', '#00FFFF', '#c2ffed', '#a079bf', '#B8B8DC',
        '#AE00AE', '#FF44FF', '#AE0000', '#844200', '#00489c', '#cc0744',
        '#AD5A5A', '#ffa6ff', '#ff7575', '#4F9D9D', 

In [157]:
adata.uns.keys

<function dict.keys>

In [158]:
# Give a title for the dataset

In [159]:
adata.uns['title'] = 'mouse_limb_scRNAseq'

In [160]:
# Set the default embedding

In [161]:
adata.uns['default_embedding'] = 'X_umap'

### **Final check**

In [162]:
# view anndata object

In [163]:
adata

AnnData object with n_obs × n_vars = 215067 × 24651
    obs: 'sequencing_center', 'batch', 'stage', 'dissection', 'anatomy', 'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval', 'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project', 'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'donor_id', 'disease_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'tissue_type', 'tissue_ontology_term_id'
    var: 'feature_is_filtered'
    uns: 'anatomy_colors', 'batch_colors', 'celltype_colors', 'celltype_sizes', 'dissection_colors', 'leiden', 'leiden_R_colors', 'louvain', 'neighbors', 'paga', 'pca', 'predicted_hi_colors', 'project_colors', 'sequencing_center_colors', 'umap', 'title', 'default_embedding'
    obsm: 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distanc

In [164]:
# view obs and var data types

In [165]:
adata.obs.dtypes

sequencing_center                           category
batch                                       category
stage                                         object
dissection                                  category
anatomy                                     category
percent_mito                                 float64
n_counts                                     float64
n_genes                                        int64
doublet_scores                               float64
bh_pval                                      float64
leiden                                         int64
leiden_R                                    category
final_leiden_R                              category
celltype                                      object
project                                     category
S_score                                      float64
G2M_score                                    float64
phase                                       category
barcodes                                      

In [166]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [167]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed percent_mito from float64 to float32
changed n_counts from float64 to float32
changed doublet_scores from float64 to float32
changed bh_pval from float64 to float32
changed S_score from float64 to float32
changed G2M_score from float64 to float32
changed n_genes from int64 to int32
changed leiden from int64 to int32
changed stage from object to category
changed celltype from object to category
changed barcodes from object to category
changed assay from object to category
changed assays from object to category
changed cell_type_ontology_term_id from object to category


In [168]:
# view obs

In [169]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,anatomy,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,...,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045589,12130.0,3729,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.045908,20149.0,4626,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.064638,14883.0,3974,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.052659,11356.0,3442,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,forelimb,0.056154,30381.0,5028,0.0,,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.055120,8164.0,2389,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.054932,5878.0,1818,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.053890,5270.0,1783,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,hindlimb,0.064262,10037.0,2650,,,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103


In [170]:
adata.obs.columns

Index(['sequencing_center', 'batch', 'stage', 'dissection', 'anatomy',
       'percent_mito', 'n_counts', 'n_genes', 'doublet_scores', 'bh_pval',
       'leiden', 'leiden_R', 'final_leiden_R', 'celltype', 'project',
       'S_score', 'G2M_score', 'phase', 'barcodes', 'assay', 'assays',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

In [171]:
# delete unwanted columns in obs

In [172]:
del adata.obs['assay']
del adata.obs['anatomy']
del adata.obs['assays']
del adata.obs['barcodes']
del adata.uns['batch_colors']

In [173]:
# view obs

In [174]:
adata.obs

Unnamed: 0,sequencing_center,batch,stage,dissection,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,leiden,...,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,0.045589,12130.0,3729,0.0,,2,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,0.045908,20149.0,4626,0.0,,4,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,0.064638,14883.0,3974,0.0,,2,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,0.052659,11356.0,3442,0.0,,2,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,WSSS_THYst9807808,12.5,prox,0.056154,30381.0,5028,0.0,,8,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,0.055120,8164.0,2389,,,23,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,0.054932,5878.0,1818,,,8,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,0.053890,5270.0,1783,,,21,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,GSM4227227,18.5,whole,0.064262,10037.0,2650,,,8,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103


In [175]:
# view var

In [176]:
adata.var

Unnamed: 0_level_0,feature_is_filtered
gene_ids-He,Unnamed: 1_level_1
ENSMUSG00000109644,False
ENSMUSG00000108652,False
ENSMUSG00000007777,False
ENSMUSG00000086714,False
ENSMUSG00000043644,False
...,...
ENSMUSG00000064360,False
ENSMUSG00000064363,False
ENSMUSG00000065947,False
ENSMUSG00000064367,False


In [177]:
araw.var

Unnamed: 0_level_0,gene_ids-Allou,n_cells-Allou,feature_types-Allou,gene_ids-He,feature_types-WSSS_THYst9807808-He,feature_types-WSSS_THYst9807809-He,feature_types-WSSS_THYst9807810-He,feature_types-WSSS_THYst9807811-He,feature_types-WSSS_THYst9807812-He,feature_types-WSSS_THYst9807813-He,...,Deep_1_e13_5,Deep_WSSS_THYst9807810,Deep_WSSS_THYst9807815,Deep_WSSS_THYst9807820,Deep_GSM4227225,Deep_GSM4227224,Deep_GSM4227226,Deep_GSM4227227,Deep_n,gene_name
gene_ids-He,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSMUSG00000109644,ENSMUSG00000109644,108.0,Gene Expression,ENSMUSG00000109644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610005C13Rik
ENSMUSG00000108652,,,,ENSMUSG00000108652,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610006L08Rik
ENSMUSG00000007777,ENSMUSG00000007777,8616.0,Gene Expression,ENSMUSG00000007777,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009B22Rik
ENSMUSG00000086714,ENSMUSG00000086714,410.0,Gene Expression,ENSMUSG00000086714,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009E02Rik
ENSMUSG00000043644,ENSMUSG00000043644,1002.0,Gene Expression,ENSMUSG00000043644,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,0610009L18Rik
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSMUSG00000064360,ENSMUSG00000064360,22967.0,Gene Expression,ENSMUSG00000064360,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd3
ENSMUSG00000064363,ENSMUSG00000064363,34136.0,Gene Expression,ENSMUSG00000064363,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,1,mt-Nd4
ENSMUSG00000065947,ENSMUSG00000065947,11768.0,Gene Expression,ENSMUSG00000065947,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd4l
ENSMUSG00000064367,ENSMUSG00000064367,23436.0,Gene Expression,ENSMUSG00000064367,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,Gene Expression,...,False,False,False,False,False,False,False,False,0,mt-Nd5


In [178]:
#view uns

In [179]:
adata.uns

{'anatomy_colors': array(['#1f77b4', '#ff7f0e'], dtype=object),
 'celltype_colors': array(['#82D900', '#ffff00', '#9AFF02', '#008941', '#00E3E3', '#006fa6',
        '#a30059', '#0000a6', '#B15BFF', '#6C3365', '#00AEAE', '#005AB5',
        '#c4adc1', '#99bb99', '#ff0000', '#5a0007', '#484891', '#737300',
        '#005757', '#BF0060', '#ddefff', '#000035', '#7b4f4b', '#95CACA',
        '#ddefff', '#424200', '#00FFFF', '#c2ffed', '#a079bf', '#B8B8DC',
        '#AE00AE', '#FF44FF', '#AE0000', '#844200', '#00489c', '#cc0744',
        '#AD5A5A', '#ffa6ff', '#ff7575', '#4F9D9D', '#FF8040', '#8600FF',
        '#eec3ff', '#616130', '#FF359A', '#FFDC35', '#005500', '#b86bae',
        '#977C00', '#F75000', '#CF9E9E', '#467500', '#272727', '#886f4c',
        '#34362d', '#ffdbe5', '#743A3A', '#8F4586', '#73BF00'],
       dtype=object),
 'celltype_sizes': array([  873,  6647,   191,    34,  7446,    34,   175,   195,  1964,
        11862,  4251,  6693, 34183, 10651,   222,   137,   121,  4256,
     

In [180]:
list(adata.uns.keys())

['anatomy_colors',
 'celltype_colors',
 'celltype_sizes',
 'dissection_colors',
 'leiden',
 'leiden_R_colors',
 'louvain',
 'neighbors',
 'paga',
 'pca',
 'predicted_hi_colors',
 'project_colors',
 'sequencing_center_colors',
 'umap',
 'title',
 'default_embedding']

In [181]:
del adata.uns['anatomy_colors']
del adata.uns['predicted_hi_colors']
del adata.obs['batch']

In [182]:
adata.obs.columns

Index(['sequencing_center', 'stage', 'dissection', 'percent_mito', 'n_counts',
       'n_genes', 'doublet_scores', 'bh_pval', 'leiden', 'leiden_R',
       'final_leiden_R', 'celltype', 'project', 'S_score', 'G2M_score',
       'phase', 'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

In [183]:
# Remove unwanted columns in uns

In [184]:
#check the format of expression matrix

In [185]:
adata.X

<215067x24651 sparse matrix of type '<class 'numpy.float32'>'
	with 709378722 stored elements in Compressed Sparse Row format>

In [186]:
araw.X

<215067x24651 sparse matrix of type '<class 'numpy.float32'>'
	with 709378722 stored elements in Compressed Sparse Row format>

In [187]:
#Copy raw counts to adata.raw

In [188]:
adata.raw = araw

In [189]:
obs_dtype = adata.obs.dtypes

In [190]:
obs_dtype

sequencing_center                           category
stage                                       category
dissection                                  category
percent_mito                                 float32
n_counts                                     float32
n_genes                                        int32
doublet_scores                               float32
bh_pval                                      float32
leiden                                         int32
leiden_R                                    category
final_leiden_R                              category
celltype                                    category
project                                     category
S_score                                      float32
G2M_score                                    float32
phase                                       category
assay_ontology_term_id                      category
cell_type_ontology_term_id                  category
development_stage_ontology_term_id          ca

In [191]:
adata.var.columns

Index(['feature_is_filtered'], dtype='object')

In [192]:
adata.raw.var.drop(columns=adata.raw.var.columns, inplace=True)

In [193]:
adata.raw.var

ENSMUSG00000109644
ENSMUSG00000108652
ENSMUSG00000007777
ENSMUSG00000086714
ENSMUSG00000043644
...
ENSMUSG00000064360
ENSMUSG00000064363
ENSMUSG00000065947
ENSMUSG00000064367
ENSMUSG00000064368


In [194]:
adata.var.index.name = None
adata.raw.var.index.name = None

In [195]:
adata.var

Unnamed: 0,feature_is_filtered
ENSMUSG00000109644,False
ENSMUSG00000108652,False
ENSMUSG00000007777,False
ENSMUSG00000086714,False
ENSMUSG00000043644,False
...,...
ENSMUSG00000064360,False
ENSMUSG00000064363,False
ENSMUSG00000065947,False
ENSMUSG00000064367,False


In [196]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/limb_cell_atlas/Final_objects/to_upload/mouse_limb.h5ad', compression = 'gzip')

In [197]:
adata.obs

Unnamed: 0,sequencing_center,stage,dissection,percent_mito,n_counts,n_genes,doublet_scores,bh_pval,leiden,leiden_R,...,development_stage_ontology_term_id,donor_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
AAACCCAAGAAGGATG-1-WSSS_THYst9807808-He,Sanger,12.5,prox,0.045589,12130.0,3729,0.0,,2,2,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAAGACTCGAG-1-WSSS_THYst9807808-He,Sanger,12.5,prox,0.045908,20149.0,4626,0.0,,4,41,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCACACTCTGCT-1-WSSS_THYst9807808-He,Sanger,12.5,prox,0.064638,14883.0,3974,0.0,,2,2,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTATCAGCT-1-WSSS_THYst9807808-He,Sanger,12.5,prox,0.052659,11356.0,3442,0.0,,2,2,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
AAACCCAGTTTCCCAC-1-WSSS_THYst9807808-He,Sanger,12.5,prox,0.056154,30381.0,5028,0.0,,8,82,...,MmusDv:0000028,WSSS_THYst9807808,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAGTCAGGACA-GSM4227227-Kelly,Kelly,18.5,whole,0.055120,8164.0,2389,,,23,231,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCAGTTCCGTCT-GSM4227227-Kelly,Kelly,18.5,whole,0.054932,5878.0,1818,,,8,83,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCACGTGG-GSM4227227-Kelly,Kelly,18.5,whole,0.053890,5270.0,1783,,,21,213,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103
TTTGTCATCCCAAGTA-GSM4227227-Kelly,Kelly,18.5,whole,0.064262,10037.0,2650,,,8,83,...,MmusDv:0000035,GSM4227227,PATO:0000461,True,NCBITaxon:10090,na,unknown,cell,tissue,UBERON:0002103


In [198]:
adata.obs.columns

Index(['sequencing_center', 'stage', 'dissection', 'percent_mito', 'n_counts',
       'n_genes', 'doublet_scores', 'bh_pval', 'leiden', 'leiden_R',
       'final_leiden_R', 'celltype', 'project', 'S_score', 'G2M_score',
       'phase', 'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'donor_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

In [199]:
adata.raw.var

ENSMUSG00000109644
ENSMUSG00000108652
ENSMUSG00000007777
ENSMUSG00000086714
ENSMUSG00000043644
...
ENSMUSG00000064360
ENSMUSG00000064363
ENSMUSG00000065947
ENSMUSG00000064367
ENSMUSG00000064368


In [200]:
adata.var

Unnamed: 0,feature_is_filtered
ENSMUSG00000109644,False
ENSMUSG00000108652,False
ENSMUSG00000007777,False
ENSMUSG00000086714,False
ENSMUSG00000043644,False
...,...
ENSMUSG00000064360,False
ENSMUSG00000064363,False
ENSMUSG00000065947,False
ENSMUSG00000064367,False


In [201]:
adata.raw.X

<215067x24651 sparse matrix of type '<class 'numpy.float32'>'
	with 709378722 stored elements in Compressed Sparse Row format>