#  Curating donor_p13_trophoblasts dataset 

In [1]:
Article: 'Spatial multiomics map of trophoblast development in early pregnancy'

In [2]:
DOI : 'https://doi.org/10.1038/s41586-023-05869-0'

In [3]:
Data_Source : 'https://www.reproductivecellatlas.org/mfi.html'

### Mount farm

mount-farm

### Packages required for curation

#### Import all packages required for the curation

In [4]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### Curation Schema

#### X (Matrix Layers)

#### AnnData object

In [5]:
# Load the AnnData object

In [6]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/trophoblast/donor_p13_trophoblasts.h5ad')

In [7]:
# View the AnnData object

In [8]:
adata

AnnData object with n_obs × n_vars = 37675 × 29058
    obs: 'n_genes', 'donor', 'tissue_block', 'age', 'sample', 'n_counts', 'dataset', 'technique', 'batch', 'barcode', 'S_score', 'G2M_score', 'phase', 'origin_M_F', 'annotation_prev_or_removed', 'final_annot_all_troph_corrected'
    var: 'gene_ids-0', 'feature_types-0', 'genome-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'genome-1', 'n_cells-1', 'gene_ids-2', 'feature_types-2', 'genome-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3', 'genome-3', 'n_cells-3', 'gene_ids-4', 'feature_types-4', 'genome-4', 'n_cells-4', 'gene_ids-5', 'feature_types-5', 'genome-5', 'n_cells-5', 'gene_ids-6', 'feature_types-6', 'genome-6', 'n_cells-6', 'gene_ids-7', 'feature_types-7', 'genome-7', 'n_cells-7'
    uns: 'age_colors', 'annotation_prev_or_removed_colors', 'donor_colors', 'final_annot_all_troph_colors', 'final_annot_all_troph_corrected_colors', 'final_annot_inv_troph_colors', 'louvain', 'louvain_scvi_n_latent_8_colors', 'louvain_scvi_troph_r

##### Raw Counts matrix

In [9]:
# Check whether adata has raw counts or normalized counts

In [10]:
print(adata.X)

  (666, 0)	1.5060438
  (746, 0)	1.3161347
  (1132, 0)	1.0537868
  (1496, 0)	1.5909654
  (1551, 0)	1.0210154
  (1585, 0)	1.4167948
  (1766, 0)	1.035753
  (1774, 0)	1.8001465
  (2128, 0)	0.46555716
  (2204, 0)	1.0278051
  (2230, 0)	1.3731703
  (2385, 0)	0.56066865
  (2394, 0)	1.9896199
  (2484, 0)	1.7025269
  (2567, 0)	1.1694117
  (2595, 0)	0.7557831
  (2632, 0)	0.3468709
  (3079, 0)	1.9655236
  (3213, 0)	0.6158167
  (3270, 0)	1.0406817
  (3489, 0)	1.7946229
  (3585, 0)	1.070102
  (3749, 0)	1.4181118
  (3852, 0)	1.2571706
  (4003, 0)	1.174123
  :	:
  (37632, 29057)	1.4350809
  (37633, 29057)	1.5511724
  (37635, 29057)	1.6426113
  (37636, 29057)	1.6699781
  (37639, 29057)	0.9726335
  (37641, 29057)	1.6526458
  (37642, 29057)	1.330122
  (37644, 29057)	1.8305576
  (37645, 29057)	1.9252099
  (37646, 29057)	1.6392559
  (37647, 29057)	1.6116563
  (37651, 29057)	1.8952024
  (37652, 29057)	1.1548103
  (37655, 29057)	1.3462334
  (37659, 29057)	1.4697279
  (37661, 29057)	1.4210142
  (37664, 29057)

In [11]:
print(adata.raw.X)

  (0, 19700)	1.0
  (0, 19698)	2.0
  (0, 19693)	3.0
  (0, 19689)	3.0
  (0, 19692)	2.0
  (0, 19691)	2.0
  (0, 19696)	2.0
  (0, 19695)	1.0
  (0, 12383)	1.0
  (0, 21392)	2.0
  (0, 19031)	3.0
  (0, 18661)	2.0
  (0, 21573)	1.0
  (0, 26909)	1.0
  (0, 21818)	1.0
  (0, 20184)	1.0
  (0, 24694)	1.0
  (0, 21850)	20.0
  (0, 9333)	2.0
  (0, 21401)	1.0
  (0, 15788)	3.0
  (0, 9612)	3.0
  (0, 12763)	4.0
  (0, 27202)	2.0
  (0, 15336)	4.0
  :	:
  (37674, 27467)	1.0
  (37674, 13759)	2.0
  (37674, 13704)	1.0
  (37674, 24580)	1.0
  (37674, 21923)	1.0
  (37674, 12513)	2.0
  (37674, 27786)	2.0
  (37674, 26729)	1.0
  (37674, 19280)	2.0
  (37674, 19202)	1.0
  (37674, 19740)	1.0
  (37674, 5819)	1.0
  (37674, 21551)	1.0
  (37674, 16464)	1.0
  (37674, 23194)	4.0
  (37674, 13236)	5.0
  (37674, 27703)	1.0
  (37674, 15180)	1.0
  (37674, 19125)	1.0
  (37674, 23193)	1.0
  (37674, 22400)	5.0
  (37674, 10612)	1.0
  (37674, 19826)	1.0
  (37674, 27418)	1.0
  (37674, 24008)	1.0


In [12]:
#since raw counts are present in adata.raw, copy the counts to araw

In [13]:
araw = adata.raw.to_adata()

In [14]:
araw

AnnData object with n_obs × n_vars = 37675 × 29058
    obs: 'n_genes', 'donor', 'tissue_block', 'age', 'sample', 'n_counts', 'dataset', 'technique', 'batch', 'barcode', 'S_score', 'G2M_score', 'phase', 'origin_M_F', 'annotation_prev_or_removed', 'final_annot_all_troph_corrected'
    var: 'gene_ids-0', 'feature_types-0', 'genome-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'genome-1', 'n_cells-1', 'gene_ids-2', 'feature_types-2', 'genome-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3', 'genome-3', 'n_cells-3', 'gene_ids-4', 'feature_types-4', 'genome-4', 'n_cells-4', 'gene_ids-5', 'feature_types-5', 'genome-5', 'n_cells-5', 'gene_ids-6', 'feature_types-6', 'genome-6', 'n_cells-6', 'gene_ids-7', 'feature_types-7', 'genome-7', 'n_cells-7'
    uns: 'age_colors', 'annotation_prev_or_removed_colors', 'donor_colors', 'final_annot_all_troph_colors', 'final_annot_all_troph_corrected_colors', 'final_annot_inv_troph_colors', 'louvain', 'louvain_scvi_n_latent_8_colors', 'louvain_scvi_troph_r

#### Variables (var)

In [15]:
# View var

In [16]:
adata.var

Unnamed: 0,gene_ids-0,feature_types-0,genome-0,n_cells-0,gene_ids-1,feature_types-1,genome-1,n_cells-1,gene_ids-2,feature_types-2,...,genome-5,n_cells-5,gene_ids-6,feature_types-6,genome-6,n_cells-6,gene_ids-7,feature_types-7,genome-7,n_cells-7
A1BG,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,105.0,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,119.0,ENSG00000121410,Gene Expression,...,GRCh38-3.0.0_premrna,101.0,ENSG00000121410,Gene Expression,GRCh38,159.0,ENSG00000121410,Gene Expression,GRCh38,160.0
A1BG-AS1,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,162.0,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,211.0,ENSG00000268895,Gene Expression,...,GRCh38-3.0.0_premrna,152.0,ENSG00000268895,Gene Expression,GRCh38,223.0,ENSG00000268895,Gene Expression,GRCh38,217.0
A1CF,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,12.0,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,11.0,ENSG00000148584,Gene Expression,...,GRCh38-3.0.0_premrna,9.0,ENSG00000148584,Gene Expression,GRCh38,12.0,ENSG00000148584,Gene Expression,GRCh38,15.0
A2M,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1302.0,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1356.0,ENSG00000175899,Gene Expression,...,GRCh38-3.0.0_premrna,1074.0,ENSG00000175899,Gene Expression,GRCh38,1429.0,ENSG00000175899,Gene Expression,GRCh38,1333.0
A2M-AS1,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,20.0,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,18.0,ENSG00000245105,Gene Expression,...,GRCh38-3.0.0_premrna,10.0,ENSG00000245105,Gene Expression,GRCh38,36.0,ENSG00000245105,Gene Expression,GRCh38,25.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,1994.0,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,2274.0,ENSG00000070476,Gene Expression,...,GRCh38-3.0.0_premrna,2410.0,ENSG00000070476,Gene Expression,GRCh38,2541.0,ENSG00000070476,Gene Expression,GRCh38,2641.0
ZYG11A,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,218.0,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,276.0,ENSG00000203995,Gene Expression,...,GRCh38-3.0.0_premrna,229.0,ENSG00000203995,Gene Expression,GRCh38,216.0,ENSG00000203995,Gene Expression,GRCh38,226.0
ZYG11B,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2320.0,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2526.0,ENSG00000162378,Gene Expression,...,GRCh38-3.0.0_premrna,2920.0,ENSG00000162378,Gene Expression,GRCh38,2735.0,ENSG00000162378,Gene Expression,GRCh38,2810.0
ZYX,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1274.0,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1402.0,ENSG00000159840,Gene Expression,...,GRCh38-3.0.0_premrna,1124.0,ENSG00000159840,Gene Expression,GRCh38,1795.0,ENSG00000159840,Gene Expression,GRCh38,1690.0


In [17]:
araw.var

Unnamed: 0,gene_ids-0,feature_types-0,genome-0,n_cells-0,gene_ids-1,feature_types-1,genome-1,n_cells-1,gene_ids-2,feature_types-2,...,genome-5,n_cells-5,gene_ids-6,feature_types-6,genome-6,n_cells-6,gene_ids-7,feature_types-7,genome-7,n_cells-7
A1BG,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,105.0,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,119.0,ENSG00000121410,Gene Expression,...,GRCh38-3.0.0_premrna,101.0,ENSG00000121410,Gene Expression,GRCh38,159.0,ENSG00000121410,Gene Expression,GRCh38,160.0
A1BG-AS1,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,162.0,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,211.0,ENSG00000268895,Gene Expression,...,GRCh38-3.0.0_premrna,152.0,ENSG00000268895,Gene Expression,GRCh38,223.0,ENSG00000268895,Gene Expression,GRCh38,217.0
A1CF,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,12.0,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,11.0,ENSG00000148584,Gene Expression,...,GRCh38-3.0.0_premrna,9.0,ENSG00000148584,Gene Expression,GRCh38,12.0,ENSG00000148584,Gene Expression,GRCh38,15.0
A2M,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1302.0,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1356.0,ENSG00000175899,Gene Expression,...,GRCh38-3.0.0_premrna,1074.0,ENSG00000175899,Gene Expression,GRCh38,1429.0,ENSG00000175899,Gene Expression,GRCh38,1333.0
A2M-AS1,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,20.0,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,18.0,ENSG00000245105,Gene Expression,...,GRCh38-3.0.0_premrna,10.0,ENSG00000245105,Gene Expression,GRCh38,36.0,ENSG00000245105,Gene Expression,GRCh38,25.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,1994.0,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,2274.0,ENSG00000070476,Gene Expression,...,GRCh38-3.0.0_premrna,2410.0,ENSG00000070476,Gene Expression,GRCh38,2541.0,ENSG00000070476,Gene Expression,GRCh38,2641.0
ZYG11A,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,218.0,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,276.0,ENSG00000203995,Gene Expression,...,GRCh38-3.0.0_premrna,229.0,ENSG00000203995,Gene Expression,GRCh38,216.0,ENSG00000203995,Gene Expression,GRCh38,226.0
ZYG11B,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2320.0,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2526.0,ENSG00000162378,Gene Expression,...,GRCh38-3.0.0_premrna,2920.0,ENSG00000162378,Gene Expression,GRCh38,2735.0,ENSG00000162378,Gene Expression,GRCh38,2810.0
ZYX,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1274.0,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1402.0,ENSG00000159840,Gene Expression,...,GRCh38-3.0.0_premrna,1124.0,ENSG00000159840,Gene Expression,GRCh38,1795.0,ENSG00000159840,Gene Expression,GRCh38,1690.0


### Ensembl IDs

In [18]:
#Ensembl ids are provided in multiple columns in adata.var

In [19]:
#Select columns from adata.var that have column name starting with gene_ids and assigns them to a variable gene_names

In [20]:
gene_names = adata.var.loc[:,adata.var.columns.str.startswith('gene_ids')]

In [21]:
gene_names

Unnamed: 0,gene_ids-0,gene_ids-1,gene_ids-2,gene_ids-3,gene_ids-4,gene_ids-5,gene_ids-6,gene_ids-7
A1BG,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410
A1BG-AS1,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895
A1CF,ENSG00000148584,ENSG00000148584,ENSG00000148584,ENSG00000148584,ENSG00000148584,ENSG00000148584,ENSG00000148584,ENSG00000148584
A2M,ENSG00000175899,ENSG00000175899,ENSG00000175899,ENSG00000175899,ENSG00000175899,ENSG00000175899,ENSG00000175899,ENSG00000175899
A2M-AS1,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105
...,...,...,...,...,...,...,...,...
ZXDC,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476
ZYG11A,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995
ZYG11B,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378
ZYX,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840


In [22]:
len(gene_names)

29058

In [23]:
#Creates an empty list to store extracted gene names

In [24]:
ensg = []

for k in tqdm(range(0,len(adata.var))):
    gene_valcount = gene_names.iloc[k].value_counts()
    if sum(gene_names.iloc[k].value_counts().index.str.startswith('ENSG')) > 0:
        ensg.append(gene_valcount[gene_valcount.index.str.startswith('ENSG')].index[0])
    else:
        ensg.append('nan')

100%|██████████| 29058/29058 [00:30<00:00, 966.38it/s] 


In [25]:
ensg

['ENSG00000121410',
 'ENSG00000268895',
 'ENSG00000148584',
 'ENSG00000175899',
 'ENSG00000245105',
 'ENSG00000166535',
 'ENSG00000256661',
 'ENSG00000184389',
 'ENSG00000128274',
 'ENSG00000118017',
 'ENSG00000094914',
 'ENSG00000081760',
 'ENSG00000114771',
 'ENSG00000197953',
 'ENSG00000242908',
 'ENSG00000188984',
 'ENSG00000204518',
 'ENSG00000109576',
 'ENSG00000158122',
 'ENSG00000103591',
 'ENSG00000115977',
 'ENSG00000087884',
 'ENSG00000127837',
 'ENSG00000129673',
 'ENSG00000131043',
 'ENSG00000205002',
 'ENSG00000090861',
 'ENSG00000124608',
 'ENSG00000266967',
 'ENSG00000157426',
 'ENSG00000149313',
 'ENSG00000008311',
 'ENSG00000215458',
 'ENSG00000275700',
 'ENSG00000181409',
 'ENSG00000254180',
 'ENSG00000281376',
 'ENSG00000183044',
 'ENSG00000165029',
 'ENSG00000154263',
 'ENSG00000144452',
 'ENSG00000179869',
 'ENSG00000107331',
 'ENSG00000167972',
 'ENSG00000198691',
 'ENSG00000154265',
 'ENSG00000154262',
 'ENSG00000064687',
 'ENSG00000141338',
 'ENSG00000154258',


In [26]:
len(ensg)

29058

In [27]:
# copy the index column values to a new column called gebe_symbola

In [28]:
adata.var['gene_symbols'] = adata.var_names
araw.var['gene_symbols'] = adata.var_names

In [29]:
#set ensembl ids as index column

In [30]:
adata.var_names = ensg

In [31]:
araw.var_names = ensg

In [32]:
#Load the approved genes file in the curation

In [33]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [34]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [35]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [36]:
# Filter out genes that don't appear in the approved annotation

In [37]:
var_to_keep_adata = [x for x in ensg if (x in genedict)]

In [38]:
adata = adata[:, var_to_keep_adata].copy()
araw = araw[:, var_to_keep_adata].copy()

In [39]:
adata.var

Unnamed: 0,gene_ids-0,feature_types-0,genome-0,n_cells-0,gene_ids-1,feature_types-1,genome-1,n_cells-1,gene_ids-2,feature_types-2,...,n_cells-5,gene_ids-6,feature_types-6,genome-6,n_cells-6,gene_ids-7,feature_types-7,genome-7,n_cells-7,gene_symbols
ENSG00000121410,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,105.0,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,119.0,ENSG00000121410,Gene Expression,...,101.0,ENSG00000121410,Gene Expression,GRCh38,159.0,ENSG00000121410,Gene Expression,GRCh38,160.0,A1BG
ENSG00000268895,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,162.0,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,211.0,ENSG00000268895,Gene Expression,...,152.0,ENSG00000268895,Gene Expression,GRCh38,223.0,ENSG00000268895,Gene Expression,GRCh38,217.0,A1BG-AS1
ENSG00000148584,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,12.0,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,11.0,ENSG00000148584,Gene Expression,...,9.0,ENSG00000148584,Gene Expression,GRCh38,12.0,ENSG00000148584,Gene Expression,GRCh38,15.0,A1CF
ENSG00000175899,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1302.0,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1356.0,ENSG00000175899,Gene Expression,...,1074.0,ENSG00000175899,Gene Expression,GRCh38,1429.0,ENSG00000175899,Gene Expression,GRCh38,1333.0,A2M
ENSG00000245105,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,20.0,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,18.0,ENSG00000245105,Gene Expression,...,10.0,ENSG00000245105,Gene Expression,GRCh38,36.0,ENSG00000245105,Gene Expression,GRCh38,25.0,A2M-AS1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,1994.0,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,2274.0,ENSG00000070476,Gene Expression,...,2410.0,ENSG00000070476,Gene Expression,GRCh38,2541.0,ENSG00000070476,Gene Expression,GRCh38,2641.0,ZXDC
ENSG00000203995,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,218.0,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,276.0,ENSG00000203995,Gene Expression,...,229.0,ENSG00000203995,Gene Expression,GRCh38,216.0,ENSG00000203995,Gene Expression,GRCh38,226.0,ZYG11A
ENSG00000162378,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2320.0,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2526.0,ENSG00000162378,Gene Expression,...,2920.0,ENSG00000162378,Gene Expression,GRCh38,2735.0,ENSG00000162378,Gene Expression,GRCh38,2810.0,ZYG11B
ENSG00000159840,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1274.0,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1402.0,ENSG00000159840,Gene Expression,...,1124.0,ENSG00000159840,Gene Expression,GRCh38,1795.0,ENSG00000159840,Gene Expression,GRCh38,1690.0,ZYX


In [40]:
araw.var

Unnamed: 0,gene_ids-0,feature_types-0,genome-0,n_cells-0,gene_ids-1,feature_types-1,genome-1,n_cells-1,gene_ids-2,feature_types-2,...,n_cells-5,gene_ids-6,feature_types-6,genome-6,n_cells-6,gene_ids-7,feature_types-7,genome-7,n_cells-7,gene_symbols
ENSG00000121410,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,105.0,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,119.0,ENSG00000121410,Gene Expression,...,101.0,ENSG00000121410,Gene Expression,GRCh38,159.0,ENSG00000121410,Gene Expression,GRCh38,160.0,A1BG
ENSG00000268895,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,162.0,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,211.0,ENSG00000268895,Gene Expression,...,152.0,ENSG00000268895,Gene Expression,GRCh38,223.0,ENSG00000268895,Gene Expression,GRCh38,217.0,A1BG-AS1
ENSG00000148584,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,12.0,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,11.0,ENSG00000148584,Gene Expression,...,9.0,ENSG00000148584,Gene Expression,GRCh38,12.0,ENSG00000148584,Gene Expression,GRCh38,15.0,A1CF
ENSG00000175899,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1302.0,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1356.0,ENSG00000175899,Gene Expression,...,1074.0,ENSG00000175899,Gene Expression,GRCh38,1429.0,ENSG00000175899,Gene Expression,GRCh38,1333.0,A2M
ENSG00000245105,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,20.0,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,18.0,ENSG00000245105,Gene Expression,...,10.0,ENSG00000245105,Gene Expression,GRCh38,36.0,ENSG00000245105,Gene Expression,GRCh38,25.0,A2M-AS1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,1994.0,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,2274.0,ENSG00000070476,Gene Expression,...,2410.0,ENSG00000070476,Gene Expression,GRCh38,2541.0,ENSG00000070476,Gene Expression,GRCh38,2641.0,ZXDC
ENSG00000203995,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,218.0,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,276.0,ENSG00000203995,Gene Expression,...,229.0,ENSG00000203995,Gene Expression,GRCh38,216.0,ENSG00000203995,Gene Expression,GRCh38,226.0,ZYG11A
ENSG00000162378,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2320.0,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2526.0,ENSG00000162378,Gene Expression,...,2920.0,ENSG00000162378,Gene Expression,GRCh38,2735.0,ENSG00000162378,Gene Expression,GRCh38,2810.0,ZYG11B
ENSG00000159840,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1274.0,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1402.0,ENSG00000159840,Gene Expression,...,1124.0,ENSG00000159840,Gene Expression,GRCh38,1795.0,ENSG00000159840,Gene Expression,GRCh38,1690.0,ZYX


#### feature is filtered

In [41]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [42]:
#View var

In [43]:
adata.var

Unnamed: 0,gene_ids-0,feature_types-0,genome-0,n_cells-0,gene_ids-1,feature_types-1,genome-1,n_cells-1,gene_ids-2,feature_types-2,...,gene_ids-6,feature_types-6,genome-6,n_cells-6,gene_ids-7,feature_types-7,genome-7,n_cells-7,gene_symbols,feature_is_filtered
ENSG00000121410,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,105.0,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,119.0,ENSG00000121410,Gene Expression,...,ENSG00000121410,Gene Expression,GRCh38,159.0,ENSG00000121410,Gene Expression,GRCh38,160.0,A1BG,False
ENSG00000268895,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,162.0,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,211.0,ENSG00000268895,Gene Expression,...,ENSG00000268895,Gene Expression,GRCh38,223.0,ENSG00000268895,Gene Expression,GRCh38,217.0,A1BG-AS1,False
ENSG00000148584,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,12.0,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,11.0,ENSG00000148584,Gene Expression,...,ENSG00000148584,Gene Expression,GRCh38,12.0,ENSG00000148584,Gene Expression,GRCh38,15.0,A1CF,False
ENSG00000175899,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1302.0,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1356.0,ENSG00000175899,Gene Expression,...,ENSG00000175899,Gene Expression,GRCh38,1429.0,ENSG00000175899,Gene Expression,GRCh38,1333.0,A2M,False
ENSG00000245105,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,20.0,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,18.0,ENSG00000245105,Gene Expression,...,ENSG00000245105,Gene Expression,GRCh38,36.0,ENSG00000245105,Gene Expression,GRCh38,25.0,A2M-AS1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,1994.0,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,2274.0,ENSG00000070476,Gene Expression,...,ENSG00000070476,Gene Expression,GRCh38,2541.0,ENSG00000070476,Gene Expression,GRCh38,2641.0,ZXDC,False
ENSG00000203995,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,218.0,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,276.0,ENSG00000203995,Gene Expression,...,ENSG00000203995,Gene Expression,GRCh38,216.0,ENSG00000203995,Gene Expression,GRCh38,226.0,ZYG11A,False
ENSG00000162378,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2320.0,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2526.0,ENSG00000162378,Gene Expression,...,ENSG00000162378,Gene Expression,GRCh38,2735.0,ENSG00000162378,Gene Expression,GRCh38,2810.0,ZYG11B,False
ENSG00000159840,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1274.0,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1402.0,ENSG00000159840,Gene Expression,...,ENSG00000159840,Gene Expression,GRCh38,1795.0,ENSG00000159840,Gene Expression,GRCh38,1690.0,ZYX,False


In [44]:
araw.var

Unnamed: 0,gene_ids-0,feature_types-0,genome-0,n_cells-0,gene_ids-1,feature_types-1,genome-1,n_cells-1,gene_ids-2,feature_types-2,...,n_cells-5,gene_ids-6,feature_types-6,genome-6,n_cells-6,gene_ids-7,feature_types-7,genome-7,n_cells-7,gene_symbols
ENSG00000121410,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,105.0,ENSG00000121410,Gene Expression,GRCh38-3.0.0_premrna,119.0,ENSG00000121410,Gene Expression,...,101.0,ENSG00000121410,Gene Expression,GRCh38,159.0,ENSG00000121410,Gene Expression,GRCh38,160.0,A1BG
ENSG00000268895,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,162.0,ENSG00000268895,Gene Expression,GRCh38-3.0.0_premrna,211.0,ENSG00000268895,Gene Expression,...,152.0,ENSG00000268895,Gene Expression,GRCh38,223.0,ENSG00000268895,Gene Expression,GRCh38,217.0,A1BG-AS1
ENSG00000148584,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,12.0,ENSG00000148584,Gene Expression,GRCh38-3.0.0_premrna,11.0,ENSG00000148584,Gene Expression,...,9.0,ENSG00000148584,Gene Expression,GRCh38,12.0,ENSG00000148584,Gene Expression,GRCh38,15.0,A1CF
ENSG00000175899,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1302.0,ENSG00000175899,Gene Expression,GRCh38-3.0.0_premrna,1356.0,ENSG00000175899,Gene Expression,...,1074.0,ENSG00000175899,Gene Expression,GRCh38,1429.0,ENSG00000175899,Gene Expression,GRCh38,1333.0,A2M
ENSG00000245105,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,20.0,ENSG00000245105,Gene Expression,GRCh38-3.0.0_premrna,18.0,ENSG00000245105,Gene Expression,...,10.0,ENSG00000245105,Gene Expression,GRCh38,36.0,ENSG00000245105,Gene Expression,GRCh38,25.0,A2M-AS1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,1994.0,ENSG00000070476,Gene Expression,GRCh38-3.0.0_premrna,2274.0,ENSG00000070476,Gene Expression,...,2410.0,ENSG00000070476,Gene Expression,GRCh38,2541.0,ENSG00000070476,Gene Expression,GRCh38,2641.0,ZXDC
ENSG00000203995,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,218.0,ENSG00000203995,Gene Expression,GRCh38-3.0.0_premrna,276.0,ENSG00000203995,Gene Expression,...,229.0,ENSG00000203995,Gene Expression,GRCh38,216.0,ENSG00000203995,Gene Expression,GRCh38,226.0,ZYG11A
ENSG00000162378,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2320.0,ENSG00000162378,Gene Expression,GRCh38-3.0.0_premrna,2526.0,ENSG00000162378,Gene Expression,...,2920.0,ENSG00000162378,Gene Expression,GRCh38,2735.0,ENSG00000162378,Gene Expression,GRCh38,2810.0,ZYG11B
ENSG00000159840,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1274.0,ENSG00000159840,Gene Expression,GRCh38-3.0.0_premrna,1402.0,ENSG00000159840,Gene Expression,...,1124.0,ENSG00000159840,Gene Expression,GRCh38,1795.0,ENSG00000159840,Gene Expression,GRCh38,1690.0,ZYX


## obs (Cell metadata)

In [45]:
#view obs

In [46]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,S_score,G2M_score,phase,origin_M_F,annotation_prev_or_removed,final_annot_all_troph_corrected
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,-0.188796,-0.100957,G1,F,0_none,SCT
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,-0.173719,-0.090667,G1,F,VCT_p,VCT
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,-0.078127,-0.109307,G1,F,SCT,SCT
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,-0.082188,-0.109189,G1,F,0_none,SCT
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,-0.116485,-0.087987,G1,F,iEVT,iEVT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,-0.003675,0.007817,G2M,F,removed,SCT
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,-0.157740,-0.174682,G1,F,VCT,VCT
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,-0.161988,-0.148416,G1,F,0_none,EVT_2
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,0.045677,0.480976,G2M,F,EVT_1,EVT_1


#### assay_ontology_term_id

In [47]:
adata.obs['assay_ontology_term_id'] = ['EFO:0030080']* len(adata.obs)

In [48]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,S_score,G2M_score,phase,origin_M_F,annotation_prev_or_removed,final_annot_all_troph_corrected,assay_ontology_term_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,-0.188796,-0.100957,G1,F,0_none,SCT,EFO:0030080
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,-0.173719,-0.090667,G1,F,VCT_p,VCT,EFO:0030080
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,-0.078127,-0.109307,G1,F,SCT,SCT,EFO:0030080
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,-0.082188,-0.109189,G1,F,0_none,SCT,EFO:0030080
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,-0.116485,-0.087987,G1,F,iEVT,iEVT,EFO:0030080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,-0.003675,0.007817,G2M,F,removed,SCT,EFO:0030080
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,-0.157740,-0.174682,G1,F,VCT,VCT,EFO:0030080
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,-0.161988,-0.148416,G1,F,0_none,EVT_2,EFO:0030080
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,0.045677,0.480976,G2M,F,EVT_1,EVT_1,EFO:0030080


#### cell_type_ontology_term_id

In [49]:
#get the column in adata.obs related. to cell type annotation

In [50]:
list(adata.obs['final_annot_all_troph_corrected'].unique())

['SCT',
 'VCT',
 'iEVT',
 'VCT_CCC',
 'EVT_1',
 'GC',
 'VCT_fusing',
 'VCT_p',
 'EVT_2',
 'eEVT']

In [51]:
cl_tropho = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/trophoblast/cl_tropho.csv')

In [52]:
mapping = dict(zip(cl_tropho['cell_type'], cl_tropho['CL ID']))

In [53]:
mapping

{'dNK1': 'CL:0002343',
 'dT_cells': 'CL:0000084',
 'T_cells': 'CL:0000084',
 'dNK2': 'CL:0002343',
 'ILC3': 'CL:0001078',
 'dNK3': 'CL:0002343',
 'dT_regs': 'CL:0000815',
 'dM1': 'CL:0000235',
 'HOFB': 'CL:3000001',
 'B_cells': 'CL:0000236',
 'dDC': 'CL:0000451',
 'dM2': 'CL:0000235',
 'dS2': 'CL:0000499',
 'M3': 'CL:0000235',
 'VCT': 'CL:2000060',
 'NK': 'CL:0000623',
 'Granulocytes': 'CL:0000094',
 'uSMC': 'CL:0002601',
 'Endo_F': 'CL:0009092',
 'DC': 'CL:0000451',
 'dEpi_secretory': 'CL:0000066',
 'MO': 'CL:0000235',
 'EVT_2': 'CL:0008036',
 'Endo_M': 'CL:0009095',
 'SCT': 'CL:0000525',
 'dS1': 'CL:0000499',
 'dS3': 'CL:0000499',
 'dEpi_lumenal': 'CL:0000066',
 'iEVT': 'CL:0008036',
 'Endo_L': 'CL:0002138',
 'PV MMP11': 'CL:0000003',
 'PVMMP11': 'CL:0000003',
 'PVSTEAP4': 'CL:0000003',
 'PV STEAP4': 'CL:0000003',
 'EVT_1': 'CL:0008036',
 'PVAOC3': 'CL:0000003',
 'PV AOC3': 'CL:0000003',
 'fF1': 'CL:2000042',
 'VCT_CCC': 'CL:2000060',
 'fF2': 'CL:2000042',
 'VCT_p': 'CL:2000060',
 'P

In [54]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['final_annot_all_troph_corrected'].map(mapping)

In [55]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [56]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,S_score,G2M_score,phase,origin_M_F,annotation_prev_or_removed,final_annot_all_troph_corrected,assay_ontology_term_id,cell_type_ontology_term_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,-0.188796,-0.100957,G1,F,0_none,SCT,EFO:0030080,CL:0000525
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,-0.173719,-0.090667,G1,F,VCT_p,VCT,EFO:0030080,CL:2000060
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,-0.078127,-0.109307,G1,F,SCT,SCT,EFO:0030080,CL:0000525
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,-0.082188,-0.109189,G1,F,0_none,SCT,EFO:0030080,CL:0000525
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,-0.116485,-0.087987,G1,F,iEVT,iEVT,EFO:0030080,CL:0008036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,-0.003675,0.007817,G2M,F,removed,SCT,EFO:0030080,CL:0000525
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,-0.157740,-0.174682,G1,F,VCT,VCT,EFO:0030080,CL:2000060
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,-0.161988,-0.148416,G1,F,0_none,EVT_2,EFO:0030080,CL:0008036
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,0.045677,0.480976,G2M,F,EVT_1,EVT_1,EFO:0030080,CL:0008036


## development_stage_ontology_term_id

In [57]:
list(adata.obs['age'].unique())

['8-9_PCW']

In [58]:
adata.obs['development_stage_ontology_term_id'] =['HsapDv:0000046'] * len(adata.obs)

In [59]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,S_score,G2M_score,phase,origin_M_F,annotation_prev_or_removed,final_annot_all_troph_corrected,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,-0.188796,-0.100957,G1,F,0_none,SCT,EFO:0030080,CL:0000525,HsapDv:0000046
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,-0.173719,-0.090667,G1,F,VCT_p,VCT,EFO:0030080,CL:2000060,HsapDv:0000046
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,-0.078127,-0.109307,G1,F,SCT,SCT,EFO:0030080,CL:0000525,HsapDv:0000046
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,-0.082188,-0.109189,G1,F,0_none,SCT,EFO:0030080,CL:0000525,HsapDv:0000046
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,-0.116485,-0.087987,G1,F,iEVT,iEVT,EFO:0030080,CL:0008036,HsapDv:0000046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,-0.003675,0.007817,G2M,F,removed,SCT,EFO:0030080,CL:0000525,HsapDv:0000046
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,-0.157740,-0.174682,G1,F,VCT,VCT,EFO:0030080,CL:2000060,HsapDv:0000046
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,-0.161988,-0.148416,G1,F,0_none,EVT_2,EFO:0030080,CL:0008036,HsapDv:0000046
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,0.045677,0.480976,G2M,F,EVT_1,EVT_1,EFO:0030080,CL:0008036,HsapDv:0000046


#### disease_ontology_term_id

In [60]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461']* len(adata.obs)

In [61]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,S_score,G2M_score,phase,origin_M_F,annotation_prev_or_removed,final_annot_all_troph_corrected,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,-0.188796,-0.100957,G1,F,0_none,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,-0.173719,-0.090667,G1,F,VCT_p,VCT,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,-0.078127,-0.109307,G1,F,SCT,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,-0.082188,-0.109189,G1,F,0_none,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,-0.116485,-0.087987,G1,F,iEVT,iEVT,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,-0.003675,0.007817,G2M,F,removed,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,-0.157740,-0.174682,G1,F,VCT,VCT,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,-0.161988,-0.148416,G1,F,0_none,EVT_2,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,0.045677,0.480976,G2M,F,EVT_1,EVT_1,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461


#### donor_id

In [62]:
adata.obs['donor_id'] = adata.obs['donor']

In [63]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,...,G2M_score,phase,origin_M_F,annotation_prev_or_removed,final_annot_all_troph_corrected,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,...,-0.100957,G1,F,0_none,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,...,-0.090667,G1,F,VCT_p,VCT,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461,P13
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,...,-0.109307,G1,F,SCT,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,...,-0.109189,G1,F,0_none,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,...,-0.087987,G1,F,iEVT,iEVT,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,...,0.007817,G2M,F,removed,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,...,-0.174682,G1,F,VCT,VCT,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461,P13
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,...,-0.148416,G1,F,0_none,EVT_2,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,...,0.480976,G2M,F,EVT_1,EVT_1,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13


#### is_primary_data

In [64]:
adata.obs['is_primary_data'] = ['True'] * len(adata.obs)

In [65]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### organism_ontology_term_id

In [66]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [67]:
adata.obs['organism_ontology_term_id'] = pd.Categorical(adata.obs['organism_ontology_term_id'])

In [68]:
adata.obs['organism_ontology_term_id']

barcode_sample
ACCTGAAAGGATGGCT-1_Pla_Camb10691970                               NCBITaxon:9606
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751                                NCBITaxon:9606
ATTTACCCATGGAACG-1_WSSS_PLA8810750                                NCBITaxon:9606
ACCATTTGTGTCTTCC-1_Pla_Camb10691970                               NCBITaxon:9606
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750                                NCBITaxon:9606
                                                                       ...      
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751                                NCBITaxon:9606
CGATGCGCAATTGCCA-1_WSSS_PLA8810751                                NCBITaxon:9606
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916    NCBITaxon:9606
AGCATCACATATGAAG-1_WSSS_PLA8810750                                NCBITaxon:9606
GTCGAATGTTAAGAAC-1_WSSS_PLA8764121                                NCBITaxon:9606
Name: organism_ontology_term_id, Length: 37675, dtype: category
Categories (1, object): ['NCBI

#### self_reported_ethnicity_ontology_term_id

In [69]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [70]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [71]:
adata.obs['self_reported_ethnicity_ontology_term_id']

barcode_sample
ACCTGAAAGGATGGCT-1_Pla_Camb10691970                               unknown
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751                                unknown
ATTTACCCATGGAACG-1_WSSS_PLA8810750                                unknown
ACCATTTGTGTCTTCC-1_Pla_Camb10691970                               unknown
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750                                unknown
                                                                   ...   
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751                                unknown
CGATGCGCAATTGCCA-1_WSSS_PLA8810751                                unknown
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916    unknown
AGCATCACATATGAAG-1_WSSS_PLA8810750                                unknown
GTCGAATGTTAAGAAC-1_WSSS_PLA8764121                                unknown
Name: self_reported_ethnicity_ontology_term_id, Length: 37675, dtype: category
Categories (1, object): ['unknown']

In [72]:
adata.obs.columns

Index(['n_genes', 'donor', 'tissue_block', 'age', 'sample', 'n_counts',
       'dataset', 'technique', 'batch', 'barcode', 'S_score', 'G2M_score',
       'phase', 'origin_M_F', 'annotation_prev_or_removed',
       'final_annot_all_troph_corrected', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'development_stage_ontology_term_id',
       'disease_ontology_term_id', 'donor_id', 'is_primary_data',
       'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

#### sex_ontology_term_id

In [73]:
adata.obs['sex_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [74]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,...,final_annot_all_troph_corrected,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,...,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,...,VCT,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,...,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,...,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,...,iEVT,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,...,SCT,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,...,VCT,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,...,EVT_2,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,...,EVT_1,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown


### suspension_type

In [75]:
list(adata.obs['dataset'].unique())

['snRNA-seq']

In [76]:
adata.obs['suspension_type'] = ['nucleus'] * len(adata.obs)

In [77]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,...,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,...,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,...,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,...,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,...,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,...,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,...,EFO:0030080,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,...,EFO:0030080,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,...,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,...,EFO:0030080,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus


#### tissue_ontology_term_id

In [78]:
adata.obs['tissue_ontology_term_id'] =['UBERON:0000453'] * len(adata.obs)

In [79]:
adata.obs.columns

Index(['n_genes', 'donor', 'tissue_block', 'age', 'sample', 'n_counts',
       'dataset', 'technique', 'batch', 'barcode', 'S_score', 'G2M_score',
       'phase', 'origin_M_F', 'annotation_prev_or_removed',
       'final_annot_all_troph_corrected', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'development_stage_ontology_term_id',
       'disease_ontology_term_id', 'donor_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [80]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,...,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,...,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,...,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,...,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,...,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,...,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453


### obsm (Embeddings)

In [81]:
adata.obsm

AxisArrays with keys: X_scVI_n_latent_12_sample, X_scVI_n_latent_17_sample, X_scVI_n_latent_8_sample, X_umap, X_umap_scVI_n_latent_12_sample, X_umap_scVI_n_latent_17_sample, X_umap_scVI_n_latent_8_sample

#### uns (Dataset Metadata)

In [82]:
adata.uns

OverloadedDict, wrapping:
	{'age_colors': array(['#1f77b4'], dtype=object), 'annotation_prev_or_removed_colors': array(['#d3d3d3', '#d5e7f7', '#d895ea', '#9f6bac', '#bf4cea', '#b9a96b',
       '#f8f4a8', '#eacf68', '#dbc7de', '#69b4ce', '#eb5376', '#ecdbe5',
       '#e7a3c8', '#b53279', '#e96db3', '#c35338'], dtype=object), 'donor_colors': array(['#1f77b4'], dtype=object), 'final_annot_all_troph_colors': array(['#1f77b4', '#ff7f0e', '#279e68', '#d62728', '#aa40fc', '#8c564b',
       '#e377c2', '#b5bd61', '#17becf', '#aec7e8'], dtype=object), 'final_annot_all_troph_corrected_colors': array(['#1f77b4', '#ff7f0e', '#279e68', '#d62728', '#aa40fc', '#8c564b',
       '#e377c2', '#b5bd61', '#17becf', '#aec7e8'], dtype=object), 'final_annot_inv_troph_colors': array(['#1f77b4', '#ff7f0e', '#279e68', '#d62728', '#aa40fc', '#8c564b',
       '#e377c2'], dtype=object), 'louvain': {'params': {'random_state': 0, 'resolution': 0.2}}, 'louvain_scvi_n_latent_8_colors': array(['#1f77b4', '#ff7f0e', '#279

In [83]:
adata.uns['schema_version'] = '3.0.0'

In [84]:
adata.uns['title'] = 'donor_p13_trophoblasts'

In [85]:
adata.uns['default_embedding'] = 'X_umap'

### Final checks and adjustments

In [86]:
adata

AnnData object with n_obs × n_vars = 37675 × 28821
    obs: 'n_genes', 'donor', 'tissue_block', 'age', 'sample', 'n_counts', 'dataset', 'technique', 'batch', 'barcode', 'S_score', 'G2M_score', 'phase', 'origin_M_F', 'annotation_prev_or_removed', 'final_annot_all_troph_corrected', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'tissue_ontology_term_id'
    var: 'gene_ids-0', 'feature_types-0', 'genome-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'genome-1', 'n_cells-1', 'gene_ids-2', 'feature_types-2', 'genome-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3', 'genome-3', 'n_cells-3', 'gene_ids-4', 'feature_types-4', 'genome-4', 'n_cells-4', 'gene_ids-5', 'feature_types-5', 'genome-5', 'n_cells-5', 'gene_ids-6', 'feature_types-6', 'genome-6', 'n_cells-6', 'gene_ids

In [87]:
adata.obs.dtypes

n_genes                                      float32
donor                                       category
tissue_block                                category
age                                         category
sample                                      category
n_counts                                     float32
dataset                                     category
technique                                   category
batch                                       category
barcode                                     category
S_score                                      float32
G2M_score                                    float32
phase                                       category
origin_M_F                                  category
annotation_prev_or_removed                  category
final_annot_all_troph_corrected             category
assay_ontology_term_id                        object
cell_type_ontology_term_id                  category
development_stage_ontology_term_id            

In [88]:
adata.obs

Unnamed: 0_level_0,n_genes,donor,tissue_block,age,sample,n_counts,dataset,technique,batch,barcode,...,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
barcode_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACCTGAAAGGATGGCT-1_Pla_Camb10691970,2595.0,P13,unknown,8-9_PCW,Pla_Camb10691970,3433.483154,snRNA-seq,10X,4,ACCTGAAAGGATGGCT-1_Pla_Camb10691970,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,3291.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3979.873535,snRNA-seq,10X,3,CTACAGAAGAGGCTGT-1_WSSS_PLA8810751,...,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
ATTTACCCATGGAACG-1_WSSS_PLA8810750,2387.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3165.444824,snRNA-seq,10X,2,ATTTACCCATGGAACG-1_WSSS_PLA8810750,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
ACCATTTGTGTCTTCC-1_Pla_Camb10691970,1862.0,P13,unknown,8-9_PCW,Pla_Camb10691970,2897.327393,snRNA-seq,10X,4,ACCATTTGTGTCTTCC-1_Pla_Camb10691970,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,2518.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3417.379150,snRNA-seq,10X,2,TCATTTGGTCCAGTTA-1_WSSS_PLA8810750,...,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,2391.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,3568.117920,snRNA-seq,10X,3,GTGCTTCTCCCGTAAA-1_WSSS_PLA8810751,...,CL:0000525,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
CGATGCGCAATTGCCA-1_WSSS_PLA8810751,3525.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810751,4057.335693,snRNA-seq,10X,3,CGATGCGCAATTGCCA-1_WSSS_PLA8810751,...,CL:2000060,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_Pla_Camb10687916,4764.0,P13,unknown,8-9_PCW,Pla_Camb10714920_and_40110_Pla_Camb10687916,3999.327637,snRNA-seq,10X,7,TTACAGCAGGAAACTG-1_Pla_Camb10714920_and_40110_...,...,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453
AGCATCACATATGAAG-1_WSSS_PLA8810750,2431.0,P13,PU8-9_B2,8-9_PCW,WSSS_PLA8810750,3604.964111,snRNA-seq,10X,2,AGCATCACATATGAAG-1_WSSS_PLA8810750,...,CL:0008036,HsapDv:0000046,PATO:0000461,P13,True,NCBITaxon:9606,unknown,unknown,nucleus,UBERON:0000453


In [89]:
#check the format of expression matrix

In [90]:
adata.X

<37675x28821 sparse matrix of type '<class 'numpy.float32'>'
	with 100962746 stored elements in Compressed Sparse Column format>

In [91]:
# Convert expression matrix to sparse matrix format

In [92]:
adata.X = scipy.sparse.csr_matrix(adata.X)

In [93]:
adata.X

<37675x28821 sparse matrix of type '<class 'numpy.float32'>'
	with 100962746 stored elements in Compressed Sparse Row format>

In [94]:
adata.raw = araw

In [95]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/trophoblast/final_objects/donor_p13_trophoblasts_final.h5ad', compression = 'gzip')