### **Curating Human_fetal_10x_data.h5ad**

Article: Developmental cell programs are co-opted in inflammatory skin disease

DOI: 10.1126/science.aba6500

Data Source : https://developmental.cellatlas.io/diseased-skin

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math
import re

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/inflammatory_skin_disease/Data/Human_fetal_10x_data.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 44298 × 33538
    obs: 'donor_id', 'Location', 'Sex', 'anno_final'
    var: 'gene_ids-4834STDY7002879', 'feature_types-4834STDY7002879', 'gene_ids-4834STDY7002880', 'feature_types-4834STDY7002880', 'gene_ids-4834STDY7038752', 'feature_types-4834STDY7038752', 'gene_ids-4834STDY7038753', 'feature_types-4834STDY7038753', 'gene_ids-4834STDY7002883', 'feature_types-4834STDY7002883', 'gene_ids-FCAImmP7241240', 'feature_types-FCAImmP7241240', 'gene_ids-FCAImmP7241241', 'feature_types-FCAImmP7241241', 'gene_ids-FCAImmP7316896', 'feature_types-FCAImmP7316896', 'gene_ids-FCAImmP7316897', 'feature_types-FCAImmP7316897', 'gene_ids-FCAImmP7316886', 'feature_types-FCAImmP7316886', 'gene_ids-FCAImmP7316887', 'feature_types-FCAImmP7316887', 'gene_ids-FCAImmP7316888', 'feature_types-FCAImmP7316888', 'gene_ids-FCAImmP7352189', 'feature_types-FCAImmP7352189', 'gene_ids-FCAImmP7352191', 'feature_types-FCAImmP7352191', 'gene_ids-FCAImmP7352190', 'feature_types-FCAImmP73

##### **X - expression matrix**

In [7]:
# View the expression matrix of the anndata object.

In [8]:
adata.X

<44298x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 105235655 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 27)	3.0
  (0, 53)	1.0
  (0, 55)	2.0
  (0, 78)	3.0
  (0, 89)	1.0
  (0, 93)	1.0
  (0, 154)	6.0
  (0, 190)	1.0
  (0, 201)	2.0
  (0, 216)	1.0
  (0, 258)	1.0
  (0, 261)	1.0
  (0, 270)	1.0
  (0, 274)	2.0
  (0, 331)	1.0
  (0, 378)	1.0
  (0, 381)	1.0
  (0, 412)	1.0
  (0, 414)	3.0
  (0, 449)	1.0
  (0, 465)	3.0
  (0, 470)	11.0
  (0, 471)	12.0
  (0, 472)	12.0
  (0, 493)	17.0
  :	:
  (44297, 33254)	5.0
  (44297, 33257)	2.0
  (44297, 33279)	1.0
  (44297, 33297)	2.0
  (44297, 33305)	1.0
  (44297, 33326)	3.0
  (44297, 33338)	1.0
  (44297, 33396)	1.0
  (44297, 33412)	1.0
  (44297, 33445)	1.0
  (44297, 33446)	2.0
  (44297, 33474)	7.0
  (44297, 33479)	8.0
  (44297, 33490)	1.0
  (44297, 33495)	1.0
  (44297, 33496)	6.0
  (44297, 33497)	7.0
  (44297, 33498)	33.0
  (44297, 33499)	8.0
  (44297, 33501)	4.0
  (44297, 33502)	19.0
  (44297, 33503)	4.0
  (44297, 33505)	10.0
  (44297, 33506)	1.0
  (44297, 33508)	8.0


##### **Raw counts matrix**

In [11]:
# If X has normalized counts, check for the raw counts matrix.

In [12]:
# check whether raw counts are present in adata.raw

In [13]:
print(adata.raw.X)

  (0, 27)	3.0
  (0, 53)	1.0
  (0, 55)	2.0
  (0, 78)	3.0
  (0, 89)	1.0
  (0, 93)	1.0
  (0, 154)	6.0
  (0, 190)	1.0
  (0, 201)	2.0
  (0, 216)	1.0
  (0, 258)	1.0
  (0, 261)	1.0
  (0, 270)	1.0
  (0, 274)	2.0
  (0, 331)	1.0
  (0, 378)	1.0
  (0, 381)	1.0
  (0, 412)	1.0
  (0, 414)	3.0
  (0, 449)	1.0
  (0, 465)	3.0
  (0, 470)	11.0
  (0, 471)	12.0
  (0, 472)	12.0
  (0, 493)	17.0
  :	:
  (44297, 33254)	5.0
  (44297, 33257)	2.0
  (44297, 33279)	1.0
  (44297, 33297)	2.0
  (44297, 33305)	1.0
  (44297, 33326)	3.0
  (44297, 33338)	1.0
  (44297, 33396)	1.0
  (44297, 33412)	1.0
  (44297, 33445)	1.0
  (44297, 33446)	2.0
  (44297, 33474)	7.0
  (44297, 33479)	8.0
  (44297, 33490)	1.0
  (44297, 33495)	1.0
  (44297, 33496)	6.0
  (44297, 33497)	7.0
  (44297, 33498)	33.0
  (44297, 33499)	8.0
  (44297, 33501)	4.0
  (44297, 33502)	19.0
  (44297, 33503)	4.0
  (44297, 33505)	10.0
  (44297, 33506)	1.0
  (44297, 33508)	8.0


In [14]:
del adata.raw

In [15]:
# Check whether adata and araw has same dimensions.

##### **Variables(var)**

In [16]:
# View the var of anndata and raw object

In [17]:
adata.var

Unnamed: 0_level_0,gene_ids-4834STDY7002879,feature_types-4834STDY7002879,gene_ids-4834STDY7002880,feature_types-4834STDY7002880,gene_ids-4834STDY7038752,feature_types-4834STDY7038752,gene_ids-4834STDY7038753,feature_types-4834STDY7038753,gene_ids-4834STDY7002883,feature_types-4834STDY7002883,...,gene_ids-FCAImmP7352189,feature_types-FCAImmP7352189,gene_ids-FCAImmP7352191,feature_types-FCAImmP7352191,gene_ids-FCAImmP7352190,feature_types-FCAImmP7352190,gene_ids-FCAImmP7462240,feature_types-FCAImmP7462240,gene_ids-FCAImmP7462241,feature_types-FCAImmP7462241
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MIR1302-2HG,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,...,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression
FAM138A,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,...,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression
OR4F5,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,...,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression
AL627309.1,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,...,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression
AL627309.3,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,...,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AC233755.2,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,...,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression
AC233755.1,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,...,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression
AC240274.1,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,...,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression
AC213203.1,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,...,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression


In [18]:
gene_names1 = adata.var.loc[:,adata.var.columns.str.startswith('gene_ids')]

In [19]:
len(gene_names1)

33538

In [20]:
ensg1 = []

for k in tqdm(range(len(gene_names1))):
    gene_valcount = gene_names1.iloc[k].value_counts()
    ensg_values = gene_valcount.index[gene_valcount.index.str.startswith('ENSG')]
    if len(ensg_values) > 0:
        ensg1.append(ensg_values[0])
    else:
        ensg1.append('nan')

100%|██████████| 33538/33538 [00:20<00:00, 1674.97it/s]


In [21]:
len(ensg1)

33538

In [22]:
# copy the index column values to a new column called gene_symbols

In [23]:
adata.var['gene_symbols'] = adata.var_names


In [24]:
#set ensembl ids as index column

In [25]:
adata.var_names = ensg1

In [26]:
# Load the approved genes file.

In [27]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [28]:
#Create a dictionary from the approved genes file 

In [29]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [30]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [31]:
len(genedict)

116184

In [32]:
#Filter out the genes which are not in the approved genes file.

In [33]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]


In [34]:
len(var_to_keep_adata)

33234

In [35]:
adata.var

Unnamed: 0,gene_ids-4834STDY7002879,feature_types-4834STDY7002879,gene_ids-4834STDY7002880,feature_types-4834STDY7002880,gene_ids-4834STDY7038752,feature_types-4834STDY7038752,gene_ids-4834STDY7038753,feature_types-4834STDY7038753,gene_ids-4834STDY7002883,feature_types-4834STDY7002883,...,feature_types-FCAImmP7352189,gene_ids-FCAImmP7352191,feature_types-FCAImmP7352191,gene_ids-FCAImmP7352190,feature_types-FCAImmP7352190,gene_ids-FCAImmP7462240,feature_types-FCAImmP7462240,gene_ids-FCAImmP7462241,feature_types-FCAImmP7462241,gene_symbols
ENSG00000243485,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,...,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,...,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,...,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,...,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,...,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,AL627309.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,...,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,...,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,...,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,...,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,AC213203.1


In [36]:
# Modify the anndata object by filtering out the filtered genes.

In [37]:
adata = adata[:, var_to_keep_adata].copy()


In [38]:
adata.var

Unnamed: 0,gene_ids-4834STDY7002879,feature_types-4834STDY7002879,gene_ids-4834STDY7002880,feature_types-4834STDY7002880,gene_ids-4834STDY7038752,feature_types-4834STDY7038752,gene_ids-4834STDY7038753,feature_types-4834STDY7038753,gene_ids-4834STDY7002883,feature_types-4834STDY7002883,...,feature_types-FCAImmP7352189,gene_ids-FCAImmP7352191,feature_types-FCAImmP7352191,gene_ids-FCAImmP7352190,feature_types-FCAImmP7352190,gene_ids-FCAImmP7462240,feature_types-FCAImmP7462240,gene_ids-FCAImmP7462241,feature_types-FCAImmP7462241,gene_symbols
ENSG00000243485,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,...,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,ENSG00000243485,Gene Expression,MIR1302-2HG
ENSG00000237613,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,...,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,ENSG00000237613,Gene Expression,FAM138A
ENSG00000186092,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,...,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,ENSG00000186092,Gene Expression,OR4F5
ENSG00000238009,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,...,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,ENSG00000238009,Gene Expression,AL627309.1
ENSG00000239945,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,...,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,ENSG00000239945,Gene Expression,AL627309.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000277856,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,...,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,ENSG00000277856,Gene Expression,AC233755.2
ENSG00000275063,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,...,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,ENSG00000275063,Gene Expression,AC233755.1
ENSG00000271254,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,...,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,ENSG00000271254,Gene Expression,AC240274.1
ENSG00000277475,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,...,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,ENSG00000277475,Gene Expression,AC213203.1


In [39]:
adata.var = adata.var.iloc[:, 0:0]


In [40]:
# View var

In [41]:
adata.var

ENSG00000243485
ENSG00000237613
ENSG00000186092
ENSG00000238009
ENSG00000239945
...
ENSG00000277856
ENSG00000275063
ENSG00000271254
ENSG00000277475
ENSG00000268674


In [42]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [43]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


#### **obs (Cell metadata)**

In [44]:
#view obs

In [45]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro
...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB


In [46]:
# view the column names in obs

In [47]:
adata.obs.columns

Index(['donor_id', 'Location', 'Sex', 'anno_final'], dtype='object')

#### **assay_ontology_term_id**

In [48]:
adata.obs['assay_ontology_term_id'] = ['EFO:0009899']* len(adata.obs)

In [49]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [50]:
# view adata.obs

In [51]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899
...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899


#### **cell_type_ontology_term_id**

In [52]:
#identify the column in adata.obs related. to cell type annotation

In [53]:
adata.obs.columns

Index(['donor_id', 'Location', 'Sex', 'anno_final', 'assay_ontology_term_id'], dtype='object')

In [54]:
list(adata.obs['anno_final'].unique())

['M_fs_Macro',
 'G_fs_Mast',
 'L_fs_DC2',
 'D_fs_FB',
 'H_fs_NK',
 'I_fs_ILC',
 'F_fs_VE',
 'E_fs_LE',
 'K_fs_DC1',
 'N_fs_pericyte',
 'J_fs_LC',
 'A_fs_KC',
 'C_fs_Schwann',
 'B_fs_Melanocyte']

In [55]:
mapping = {'M_fs_Macro' :'CL:0000235',
 'G_fs_Mast' :'CL:0000097',
 'L_fs_DC2':'CL:0000451',
 'D_fs_FB':'CL:0002620',
 'H_fs_NK':'CL:0000623',
 'I_fs_ILC':'CL:0001065',
 'F_fs_VE':'CL:0002139',
 'E_fs_LE':'CL:0002138',
 'K_fs_DC1':'CL:0000990',
 'N_fs_pericyte':'CL:0000669',
 'J_fs_LC':'CL:0000453',
 'A_fs_KC':'CL:0000312',
 'C_fs_Schwann':'CL:0002573',
 'B_fs_Melanocyte': 'CL:0000148'}

In [56]:
# add the cell_type_ontology_term_id column

In [57]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['anno_final'].map(mapping)

In [58]:
# change datatype of the column

In [59]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [60]:
# view adata.obs

In [61]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620


#### **development_stage_ontology_term_id**

In [62]:
# identify the column in adata which corresponds to age

In [63]:
#adata.obs['study'] = adata.obs.index.str.split('-').str[2]

In [64]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235
...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620


In [65]:
adata.obs['study'] = adata.obs.index.str.split('-', n=2).str[2]

In [66]:
list(adata.obs['study'].unique())

['4834STDY7002879',
 '4834STDY7002880',
 '4834STDY7038752',
 '4834STDY7038753',
 '4834STDY7002883',
 'FCAImmP7241240',
 'FCAImmP7241241',
 'FCAImmP7316896',
 'FCAImmP7316897',
 'FCAImmP7316886',
 'FCAImmP7316887',
 'FCAImmP7316888',
 'FCAImmP7352189',
 'FCAImmP7352191',
 'FCAImmP7352190',
 'FCAImmP7462240',
 'FCAImmP7462241']

In [67]:
mapping = {'4834STDY7002879':'8 weeks gestation',
'4834STDY7002880':'8 weeks gestation',
'4834STDY7002883':'9 weeks gestation',
'4834STDY7038752':'9 weeks gestation',
'4834STDY7038753':'9 weeks gestation',
'FCAImmP7241240':'10 weeks gestation',
'FCAImmP7241241':'10 weeks gestation',
'FCAImmP7316886':'9 weeks gestation',
'FCAImmP7316887':'9 weeks gestation',
'FCAImmP7316888':'9 weeks gestation',
'FCAImmP7316896':'7 weeks gestation- cs22',
'FCAImmP7316897':'7 weeks gestation- cs22',
'FCAImmP7352189':'8 weeks gestation',
'FCAImmP7352190':'8 weeks gestation',
'FCAImmP7352191':'8 weeks gestation',
'FCAImmP7462240':'7 weeks gestation- cs23',
'FCAImmP7462241':'7 weeks gestation- cs23'}

In [68]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['study'].map(mapping)

In [69]:
mapping = {'8 weeks gestation':'HsapDv:0000046',
 '9 weeks gestation':'HsapDv:0000047',
 '10 weeks gestation':'HsapDv:0000048',
 '7 weeks gestation- cs22':'HsapDv:0000029', 
 '7 weeks gestation- cs23':'HsapDv:0000030'}

In [70]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].map(mapping)

In [71]:
# change datatype of the column

In [72]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [73]:
# view unique values of development_stage_ontology_term_id column

In [74]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000046',
 'HsapDv:0000047',
 'HsapDv:0000048',
 'HsapDv:0000029',
 'HsapDv:0000030']

In [75]:
# view adata.obs

In [76]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030


#### **donor_id**

In [77]:
#identify the column in adata.obs which provides donor information

In [78]:
adata.obs.columns

Index(['donor_id', 'Location', 'Sex', 'anno_final', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'study',
       'development_stage_ontology_term_id'],
      dtype='object')

In [79]:
list(adata.obs['donor_id'].unique())

['F16', 'F17', 'F19', 'F32', 'F33', 'F34', 'F35']

In [80]:
# add the donor_id column

In [81]:
adata.obs['donor_id'] = adata.obs['donor_id']

In [82]:
# change datatype of the column

In [83]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [84]:
# view unique values of donor_id column

In [85]:
list(adata.obs['donor_id'].unique())

['F16', 'F17', 'F19', 'F32', 'F33', 'F34', 'F35']

In [86]:
#view obs

In [87]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046
...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030


#### **disease_ontology_term_id**

In [88]:
# Assign normal since all are healthy patients

In [89]:
# add the disease_ontology_term_id column

In [90]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461']* len(adata.obs)

In [91]:
# change datatype of the column

In [92]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [93]:
# view obs

In [94]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461
...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461


#### **is_primary_data**

In [95]:
adata.obs['is_primary_data'] = [True] * len(adata.obs)

In [96]:
adata.obs['is_primary_data']

index
AAACCTGGTCAGTGGA-1-4834STDY7002879    True
AAAGCAAAGATGTGGC-1-4834STDY7002879    True
AAAGTAGCAGATCGGA-1-4834STDY7002879    True
AAAGTAGTCCGCATCT-1-4834STDY7002879    True
AAATGCCAGCTGCCCA-1-4834STDY7002879    True
                                      ... 
TTTGTCATCAAACAAG-1-FCAImmP7462241     True
TTTGTCATCAAGGTAA-1-FCAImmP7462241     True
TTTGTCATCATTGCGA-1-FCAImmP7462241     True
TTTGTCATCGGACAAG-1-FCAImmP7462241     True
TTTGTCATCGGTCCGA-1-FCAImmP7462241     True
Name: is_primary_data, Length: 44298, dtype: bool

In [97]:
list(adata.obs['is_primary_data'].unique())

[True]

In [98]:
#change data type of column

In [99]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

In [100]:
# view obs

In [101]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True
...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True


#### **organism_ontology_term_id**

In [102]:
# assign organism id 

In [103]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [104]:
#change data type of column

In [105]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [106]:
# view obs

In [107]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [108]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [109]:
# change data type

In [110]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [111]:
# view obs

In [112]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown


#### **sex_ontology_term_id**

In [113]:
# identify the column in adata.obs which corresponds to sex

In [114]:
adata.obs.columns

Index(['donor_id', 'Location', 'Sex', 'anno_final', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'study',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

In [115]:
# list the unique values 

In [116]:
list(adata.obs['Sex'].unique())

['Male', 'Female']

In [117]:
# create a dictionary of sex and sex ontology term id

In [118]:
mapping= {'Female': 'PATO:0000383', 'Male': 'PATO:0000384'}

In [119]:
# add sex_ontology_term_id column

In [120]:
adata.obs['sex_ontology_term_id'] = adata.obs['Sex'].map(mapping)

In [121]:
# change data type

In [122]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [123]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384
...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383


#### **suspension_type**

In [124]:
adata.obs['suspension_type'] = ['cell'] * len(adata.obs)

In [125]:
# change data type of column

In [126]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [127]:
# view obs

In [128]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell


#### **tissue_ontology_term_id**

In [129]:
# identify the column in adata.obs which corresponds to tissue

In [130]:
adata.obs.columns

Index(['donor_id', 'Location', 'Sex', 'anno_final', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'study',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type'],
      dtype='object')

In [131]:
# add 'tissue_ontology_term_id' column

In [132]:
adata.obs['tissue_ontology_term_id'] = ['UBERON:0002097'] * len(adata.obs)

In [133]:
# change data type of column

In [134]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [135]:
#list the unique values in 'tissue_ontology_term_id' column

In [136]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0002097']

In [137]:
# view obs

In [138]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097


#### **obsm (Embeddings)**

In [139]:
# view obsm

In [140]:
# check whether all columns are prefixed with X

In [141]:
adata.obsm

AxisArrays with keys: 

#### **uns (Dataset Metadata)**

In [142]:
# View

In [143]:
adata.uns

OverloadedDict, wrapping:
	OrderedDict()
With overloaded keys:
	['neighbors'].

In [144]:
adata.uns.keys

<bound method OverloadedDict.keys of OverloadedDict, wrapping:
	OrderedDict()
With overloaded keys:
	['neighbors'].>

In [145]:
# Give a title for the dataset

In [146]:
adata.uns['title'] = 'Human fetal skin scRNA-seq data'

In [147]:
# Set the default embedding

In [148]:
#adata.uns['default_embedding'] = 'X_umap'

### **Final check**

In [149]:
# view anndata object

In [150]:
adata

AnnData object with n_obs × n_vars = 44298 × 33234
    obs: 'donor_id', 'Location', 'Sex', 'anno_final', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'study', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'tissue_ontology_term_id'
    var: 'feature_is_filtered'
    uns: 'title'

In [151]:
# view obs and var data types

In [152]:
adata.obs.dtypes

donor_id                                    category
Location                                    category
Sex                                         category
anno_final                                  category
assay_ontology_term_id                      category
cell_type_ontology_term_id                  category
study                                         object
development_stage_ontology_term_id          category
disease_ontology_term_id                    category
is_primary_data                                 bool
organism_ontology_term_id                   category
self_reported_ethnicity_ontology_term_id    category
sex_ontology_term_id                        category
suspension_type                             category
tissue_ontology_term_id                     category
dtype: object

In [153]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [154]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed study from object to category


In [155]:
# view obs

In [156]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,Sex,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,Male,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,Male,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,Female,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097


In [157]:
adata.obs.columns

Index(['donor_id', 'Location', 'Sex', 'anno_final', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'study',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [158]:
# delete unwanted columns in obs

In [159]:
del adata.obs['Sex']

In [160]:
# view obs

In [161]:
adata.obs

Unnamed: 0_level_0,donor_id,Location,anno_final,assay_ontology_term_id,cell_type_ontology_term_id,study,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AAACCTGGTCAGTGGA-1-4834STDY7002879,F16,Trunk_Limb,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGCAAAGATGTGGC-1-4834STDY7002879,F16,Trunk_Limb,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGTAGCAGATCGGA-1-4834STDY7002879,F16,Trunk_Limb,G_fs_Mast,EFO:0009899,CL:0000097,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAAGTAGTCCGCATCT-1-4834STDY7002879,F16,Trunk_Limb,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
AAATGCCAGCTGCCCA-1-4834STDY7002879,F16,Trunk_Limb,M_fs_Macro,EFO:0009899,CL:0000235,4834STDY7002879,HsapDv:0000046,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000384,cell,UBERON:0002097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCATCAAACAAG-1-FCAImmP7462241,F35,Trunk_Limb,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCAAGGTAA-1-FCAImmP7462241,F35,Trunk_Limb,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCATTGCGA-1-FCAImmP7462241,F35,Trunk_Limb,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097
TTTGTCATCGGACAAG-1-FCAImmP7462241,F35,Trunk_Limb,D_fs_FB,EFO:0009899,CL:0002620,FCAImmP7462241,HsapDv:0000030,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,UBERON:0002097


In [162]:
adata.obs.columns

Index(['donor_id', 'Location', 'anno_final', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'study',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [163]:
# view var

In [164]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000243485,False
ENSG00000237613,False
ENSG00000186092,False
ENSG00000238009,False
ENSG00000239945,False
...,...
ENSG00000277856,False
ENSG00000275063,False
ENSG00000271254,False
ENSG00000277475,False


In [165]:
#view uns

In [166]:
adata.uns

OverloadedDict, wrapping:
	OrderedDict([('title', 'Human fetal skin scRNA-seq data')])
With overloaded keys:
	['neighbors'].

In [167]:
list(adata.uns.keys())

['title']

In [168]:
adata.obs.columns

Index(['donor_id', 'Location', 'anno_final', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'study',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [169]:
# Remove unwanted columns in uns

In [170]:
#check the format of expression matrix

In [171]:
adata.X

<44298x33234 sparse matrix of type '<class 'numpy.float32'>'
	with 105173159 stored elements in Compressed Sparse Row format>

In [172]:
#Copy raw counts to adata.raw

In [173]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/inflammatory_skin_disease/Final_objects/Human_fetal_10x_data.h5ad', compression = 'gzip')