#  Curating trophoblast_stemcells dataset 

In [1]:
Article: 'Spatial multiomics map of trophoblast development in early pregnancy'

In [2]:
DOI : 'https://doi.org/10.1038/s41586-023-05869-0'

In [3]:
Data_Source : 'https://www.reproductivecellatlas.org/mfi.html'

### Mount farm

mount-farm

### Packages required for curation

#### Import all packages required for the curation

In [4]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### Curation Schema

#### X (Matrix Layers)

#### AnnData object

In [5]:
# Load the AnnData object

In [6]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/trophoblast/trophoblast_stemcells.h5ad')

In [7]:
# View the AnnData object

In [8]:
adata

AnnData object with n_obs × n_vars = 9957 × 22523
    obs: 'n_genes', 'time_point', 'Treatment', 'sample', 'percent_mito', 'leiden', 'S_score', 'G2M_score', 'phase', 'cell_annotation'
    var: 'gene_ids-0', 'feature_types-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'n_cells-1', 'gene_ids-10', 'feature_types-10', 'n_cells-10', 'gene_ids-11', 'feature_types-11', 'n_cells-11', 'gene_ids-12', 'feature_types-12', 'n_cells-12', 'gene_ids-13', 'feature_types-13', 'n_cells-13', 'gene_ids-14', 'feature_types-14', 'n_cells-14', 'gene_ids-15', 'feature_types-15', 'n_cells-15', 'gene_ids-16', 'feature_types-16', 'n_cells-16', 'gene_ids-2', 'feature_types-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3', 'n_cells-3', 'gene_ids-4', 'feature_types-4', 'n_cells-4', 'gene_ids-5', 'feature_types-5', 'n_cells-5', 'gene_ids-6', 'feature_types-6', 'n_cells-6', 'gene_ids-7', 'feature_types-7', 'n_cells-7', 'gene_ids-8', 'feature_types-8', 'n_cells-8', 'gene_ids-9', 'feature_types-9', 'n_cells-9', 'n_c

##### Raw Counts matrix

In [9]:
# Check whether adata has raw counts or normalized counts

In [10]:
print(adata.X)

  (0, 0)	0.19539925
  (0, 7)	0.19539925
  (0, 8)	0.35878754
  (0, 15)	0.19539925
  (0, 16)	1.5860327
  (0, 18)	0.19539925
  (0, 20)	0.19539925
  (0, 22)	0.19539925
  (0, 25)	0.49919152
  (0, 26)	0.6222875
  (0, 29)	0.35878754
  (0, 40)	0.35878754
  (0, 42)	0.6222875
  (0, 53)	0.19539925
  (0, 55)	0.19539925
  (0, 64)	0.49919152
  (0, 65)	0.35878754
  (0, 67)	0.6222875
  (0, 68)	1.3915675
  (0, 70)	0.19539925
  (0, 71)	0.19539925
  (0, 73)	1.982117
  (0, 77)	0.19539925
  (0, 78)	0.35878754
  (0, 79)	0.35878754
  :	:
  (9956, 22415)	0.43215233
  (9956, 22431)	0.43215233
  (9956, 22434)	0.2392412
  (9956, 22441)	0.2392412
  (9956, 22446)	0.2392412
  (9956, 22447)	0.2392412
  (9956, 22458)	0.2392412
  (9956, 22460)	0.8550213
  (9956, 22464)	0.8550213
  (9956, 22465)	1.5652746
  (9956, 22466)	0.2392412
  (9956, 22468)	0.2392412
  (9956, 22470)	0.8550213
  (9956, 22471)	0.2392412
  (9956, 22476)	0.2392412
  (9956, 22478)	0.2392412
  (9956, 22479)	0.2392412
  (9956, 22481)	0.2392412
  (9956, 

In [11]:
print(adata.raw.X)

  (0, 0)	1.0
  (0, 7)	1.0
  (0, 8)	2.0
  (0, 15)	1.0
  (0, 16)	18.0
  (0, 18)	1.0
  (0, 20)	1.0
  (0, 22)	1.0
  (0, 25)	3.0
  (0, 26)	4.0
  (0, 29)	2.0
  (0, 40)	2.0
  (0, 42)	4.0
  (0, 53)	1.0
  (0, 55)	1.0
  (0, 64)	3.0
  (0, 65)	2.0
  (0, 67)	4.0
  (0, 68)	14.0
  (0, 70)	1.0
  (0, 71)	1.0
  (0, 73)	29.0
  (0, 77)	1.0
  (0, 78)	2.0
  (0, 79)	2.0
  :	:
  (9956, 22415)	2.0
  (9956, 22431)	2.0
  (9956, 22434)	1.0
  (9956, 22441)	1.0
  (9956, 22446)	1.0
  (9956, 22447)	1.0
  (9956, 22458)	1.0
  (9956, 22460)	5.0
  (9956, 22464)	5.0
  (9956, 22465)	14.0
  (9956, 22466)	1.0
  (9956, 22468)	1.0
  (9956, 22470)	5.0
  (9956, 22471)	1.0
  (9956, 22476)	1.0
  (9956, 22478)	1.0
  (9956, 22479)	1.0
  (9956, 22481)	1.0
  (9956, 22486)	1.0
  (9956, 22505)	1.0
  (9956, 22510)	2.0
  (9956, 22512)	1.0
  (9956, 22514)	1.0
  (9956, 22520)	2.0
  (9956, 22521)	3.0


In [12]:
#since raw counts are present in adata.raw, copy the counts to araw

In [13]:
araw = adata.raw.to_adata()

In [14]:
araw

AnnData object with n_obs × n_vars = 9957 × 22523
    obs: 'n_genes', 'time_point', 'Treatment', 'sample', 'percent_mito', 'leiden', 'S_score', 'G2M_score', 'phase', 'cell_annotation'
    var: 'gene_ids-0', 'feature_types-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'n_cells-1', 'gene_ids-10', 'feature_types-10', 'n_cells-10', 'gene_ids-11', 'feature_types-11', 'n_cells-11', 'gene_ids-12', 'feature_types-12', 'n_cells-12', 'gene_ids-13', 'feature_types-13', 'n_cells-13', 'gene_ids-14', 'feature_types-14', 'n_cells-14', 'gene_ids-15', 'feature_types-15', 'n_cells-15', 'gene_ids-16', 'feature_types-16', 'n_cells-16', 'gene_ids-2', 'feature_types-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3', 'n_cells-3', 'gene_ids-4', 'feature_types-4', 'n_cells-4', 'gene_ids-5', 'feature_types-5', 'n_cells-5', 'gene_ids-6', 'feature_types-6', 'n_cells-6', 'gene_ids-7', 'feature_types-7', 'n_cells-7', 'gene_ids-8', 'feature_types-8', 'n_cells-8', 'gene_ids-9', 'feature_types-9', 'n_cells-9', 'n_c

#### Variables (var)

In [15]:
# View var

In [16]:
adata.var

Unnamed: 0,gene_ids-0,feature_types-0,n_cells-0,gene_ids-1,feature_types-1,n_cells-1,gene_ids-10,feature_types-10,n_cells-10,gene_ids-11,...,n_cells-8,gene_ids-9,feature_types-9,n_cells-9,n_cells,highly_variable,highly_variable_rank,means,variances,variances_norm
A1BG,ENSG00000121410,Gene Expression,1121.0,ENSG00000121410,Gene Expression,1357.0,ENSG00000121410,Gene Expression,859.0,ENSG00000121410,...,1307.0,ENSG00000121410,Gene Expression,1507.0,19012,False,,0.283017,0.353803,0.932761
A1BG-AS1,ENSG00000268895,Gene Expression,421.0,ENSG00000268895,Gene Expression,391.0,ENSG00000268895,Gene Expression,294.0,ENSG00000268895,...,496.0,ENSG00000268895,Gene Expression,682.0,6707,False,,0.078538,0.083024,0.837489
A2M,,,,,,,,,,,...,,,,,26,False,,0.000301,0.000301,0.984182
A2M-AS1,ENSG00000245105,Gene Expression,17.0,ENSG00000245105,Gene Expression,13.0,ENSG00000245105,Gene Expression,3.0,,...,22.0,ENSG00000245105,Gene Expression,4.0,176,False,,0.002310,0.002305,0.928771
A2ML1,ENSG00000166535,Gene Expression,181.0,ENSG00000166535,Gene Expression,155.0,ENSG00000166535,Gene Expression,46.0,ENSG00000166535,...,216.0,ENSG00000166535,Gene Expression,105.0,1930,False,,0.021593,0.023539,0.928596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,ENSG00000070476,Gene Expression,1258.0,ENSG00000070476,Gene Expression,1195.0,ENSG00000070476,Gene Expression,441.0,ENSG00000070476,...,798.0,ENSG00000070476,Gene Expression,917.0,13428,False,,0.134378,0.149478,0.860687
ZYG11A,ENSG00000203995,Gene Expression,354.0,ENSG00000203995,Gene Expression,329.0,ENSG00000203995,Gene Expression,97.0,ENSG00000203995,...,282.0,ENSG00000203995,Gene Expression,161.0,3510,False,,0.034950,0.036344,0.863785
ZYG11B,ENSG00000162378,Gene Expression,4675.0,ENSG00000162378,Gene Expression,4948.0,ENSG00000162378,Gene Expression,1894.0,ENSG00000162378,...,2271.0,ENSG00000162378,Gene Expression,2912.0,51500,False,,0.787888,1.091203,0.856090
ZYX,ENSG00000159840,Gene Expression,5614.0,ENSG00000159840,Gene Expression,4895.0,ENSG00000159840,Gene Expression,2093.0,ENSG00000159840,...,3774.0,ENSG00000159840,Gene Expression,2927.0,57676,True,2831.0,1.929798,5.217148,1.178365


In [17]:
araw.var

Unnamed: 0,gene_ids-0,feature_types-0,n_cells-0,gene_ids-1,feature_types-1,n_cells-1,gene_ids-10,feature_types-10,n_cells-10,gene_ids-11,...,gene_ids-7,feature_types-7,n_cells-7,gene_ids-8,feature_types-8,n_cells-8,gene_ids-9,feature_types-9,n_cells-9,n_cells
A1BG,ENSG00000121410,Gene Expression,1121.0,ENSG00000121410,Gene Expression,1357.0,ENSG00000121410,Gene Expression,859.0,ENSG00000121410,...,ENSG00000121410,Gene Expression,1044.0,ENSG00000121410,Gene Expression,1307.0,ENSG00000121410,Gene Expression,1507.0,19012
A1BG-AS1,ENSG00000268895,Gene Expression,421.0,ENSG00000268895,Gene Expression,391.0,ENSG00000268895,Gene Expression,294.0,ENSG00000268895,...,ENSG00000268895,Gene Expression,458.0,ENSG00000268895,Gene Expression,496.0,ENSG00000268895,Gene Expression,682.0,6707
A2M,,,,,,,,,,,...,ENSG00000175899,Gene Expression,4.0,,,,,,,26
A2M-AS1,ENSG00000245105,Gene Expression,17.0,ENSG00000245105,Gene Expression,13.0,ENSG00000245105,Gene Expression,3.0,,...,ENSG00000245105,Gene Expression,27.0,ENSG00000245105,Gene Expression,22.0,ENSG00000245105,Gene Expression,4.0,176
A2ML1,ENSG00000166535,Gene Expression,181.0,ENSG00000166535,Gene Expression,155.0,ENSG00000166535,Gene Expression,46.0,ENSG00000166535,...,ENSG00000166535,Gene Expression,162.0,ENSG00000166535,Gene Expression,216.0,ENSG00000166535,Gene Expression,105.0,1930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,ENSG00000070476,Gene Expression,1258.0,ENSG00000070476,Gene Expression,1195.0,ENSG00000070476,Gene Expression,441.0,ENSG00000070476,...,ENSG00000070476,Gene Expression,992.0,ENSG00000070476,Gene Expression,798.0,ENSG00000070476,Gene Expression,917.0,13428
ZYG11A,ENSG00000203995,Gene Expression,354.0,ENSG00000203995,Gene Expression,329.0,ENSG00000203995,Gene Expression,97.0,ENSG00000203995,...,ENSG00000203995,Gene Expression,307.0,ENSG00000203995,Gene Expression,282.0,ENSG00000203995,Gene Expression,161.0,3510
ZYG11B,ENSG00000162378,Gene Expression,4675.0,ENSG00000162378,Gene Expression,4948.0,ENSG00000162378,Gene Expression,1894.0,ENSG00000162378,...,ENSG00000162378,Gene Expression,3753.0,ENSG00000162378,Gene Expression,2271.0,ENSG00000162378,Gene Expression,2912.0,51500
ZYX,ENSG00000159840,Gene Expression,5614.0,ENSG00000159840,Gene Expression,4895.0,ENSG00000159840,Gene Expression,2093.0,ENSG00000159840,...,ENSG00000159840,Gene Expression,4260.0,ENSG00000159840,Gene Expression,3774.0,ENSG00000159840,Gene Expression,2927.0,57676


### Ensembl IDs

In [18]:
#Ensembl ids are provided in multiple columns in adata.var

In [19]:
#Select columns from adata.var that have column name starting with gene_ids and assigns them to a variable gene_names

In [20]:
gene_names = adata.var.loc[:,adata.var.columns.str.startswith('gene_ids')]

In [21]:
gene_names

Unnamed: 0,gene_ids-0,gene_ids-1,gene_ids-10,gene_ids-11,gene_ids-12,gene_ids-13,gene_ids-14,gene_ids-15,gene_ids-16,gene_ids-2,gene_ids-3,gene_ids-4,gene_ids-5,gene_ids-6,gene_ids-7,gene_ids-8,gene_ids-9
A1BG,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410,ENSG00000121410
A1BG-AS1,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895,ENSG00000268895
A2M,,,,,,,,,,,ENSG00000175899,ENSG00000175899,ENSG00000175899,ENSG00000175899,ENSG00000175899,,
A2M-AS1,ENSG00000245105,ENSG00000245105,ENSG00000245105,,ENSG00000245105,ENSG00000245105,ENSG00000245105,,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105,ENSG00000245105
A2ML1,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535,ENSG00000166535
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476,ENSG00000070476
ZYG11A,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995,ENSG00000203995
ZYG11B,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378,ENSG00000162378
ZYX,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840,ENSG00000159840


In [22]:
len(gene_names)

22523

In [23]:
#Creates an empty list to store extracted gene names

In [24]:
ensg = []

for k in tqdm(range(0,len(adata.var))):
    gene_valcount = gene_names.iloc[k].value_counts()
    if sum(gene_names.iloc[k].value_counts().index.str.startswith('ENSG')) > 0:
        ensg.append(gene_valcount[gene_valcount.index.str.startswith('ENSG')].index[0])
    else:
        ensg.append('nan')

100%|██████████| 22523/22523 [00:26<00:00, 861.28it/s]


In [25]:
ensg

['ENSG00000121410',
 'ENSG00000268895',
 'ENSG00000175899',
 'ENSG00000245105',
 'ENSG00000166535',
 'ENSG00000256661',
 'ENSG00000128274',
 'ENSG00000094914',
 'ENSG00000081760',
 'ENSG00000114771',
 'ENSG00000197953',
 'ENSG00000242908',
 'ENSG00000188984',
 'ENSG00000109576',
 'ENSG00000158122',
 'ENSG00000103591',
 'ENSG00000115977',
 'ENSG00000087884',
 'ENSG00000127837',
 'ENSG00000129673',
 'ENSG00000131043',
 'ENSG00000205002',
 'ENSG00000090861',
 'ENSG00000124608',
 'ENSG00000266967',
 'ENSG00000157426',
 'ENSG00000149313',
 'ENSG00000008311',
 'ENSG00000215458',
 'ENSG00000275700',
 'ENSG00000181409',
 'ENSG00000281376',
 'ENSG00000183044',
 'ENSG00000165029',
 'ENSG00000154263',
 'ENSG00000144452',
 'ENSG00000179869',
 'ENSG00000107331',
 'ENSG00000167972',
 'ENSG00000198691',
 'ENSG00000154265',
 'ENSG00000154262',
 'ENSG00000064687',
 'ENSG00000141338',
 'ENSG00000154258',
 'ENSG00000231749',
 'ENSG00000085563',
 'ENSG00000135776',
 'ENSG00000073734',
 'ENSG00000005471',


In [26]:
len(ensg)

22523

In [27]:
# copy the index column values to a new column called gebe_symbola

In [28]:
adata.var['gene_symbols'] = adata.var_names
araw.var['gene_symbols'] = adata.var_names

In [29]:
#set ensembl ids as index column

In [30]:
adata.var_names = ensg

In [31]:
araw.var_names = ensg

In [32]:
#Load the approved genes file in the curation

In [33]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [34]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [35]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [36]:
# Filter out genes that don't appear in the approved annotation

In [37]:
var_to_keep_adata = [x for x in ensg if (x in genedict)]

In [38]:
adata = adata[:, var_to_keep_adata].copy()
araw = araw[:, var_to_keep_adata].copy()

In [39]:
adata.var

Unnamed: 0,gene_ids-0,feature_types-0,n_cells-0,gene_ids-1,feature_types-1,n_cells-1,gene_ids-10,feature_types-10,n_cells-10,gene_ids-11,...,gene_ids-9,feature_types-9,n_cells-9,n_cells,highly_variable,highly_variable_rank,means,variances,variances_norm,gene_symbols
ENSG00000121410,ENSG00000121410,Gene Expression,1121.0,ENSG00000121410,Gene Expression,1357.0,ENSG00000121410,Gene Expression,859.0,ENSG00000121410,...,ENSG00000121410,Gene Expression,1507.0,19012,False,,0.283017,0.353803,0.932761,A1BG
ENSG00000268895,ENSG00000268895,Gene Expression,421.0,ENSG00000268895,Gene Expression,391.0,ENSG00000268895,Gene Expression,294.0,ENSG00000268895,...,ENSG00000268895,Gene Expression,682.0,6707,False,,0.078538,0.083024,0.837489,A1BG-AS1
ENSG00000175899,,,,,,,,,,,...,,,,26,False,,0.000301,0.000301,0.984182,A2M
ENSG00000245105,ENSG00000245105,Gene Expression,17.0,ENSG00000245105,Gene Expression,13.0,ENSG00000245105,Gene Expression,3.0,,...,ENSG00000245105,Gene Expression,4.0,176,False,,0.002310,0.002305,0.928771,A2M-AS1
ENSG00000166535,ENSG00000166535,Gene Expression,181.0,ENSG00000166535,Gene Expression,155.0,ENSG00000166535,Gene Expression,46.0,ENSG00000166535,...,ENSG00000166535,Gene Expression,105.0,1930,False,,0.021593,0.023539,0.928596,A2ML1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,1258.0,ENSG00000070476,Gene Expression,1195.0,ENSG00000070476,Gene Expression,441.0,ENSG00000070476,...,ENSG00000070476,Gene Expression,917.0,13428,False,,0.134378,0.149478,0.860687,ZXDC
ENSG00000203995,ENSG00000203995,Gene Expression,354.0,ENSG00000203995,Gene Expression,329.0,ENSG00000203995,Gene Expression,97.0,ENSG00000203995,...,ENSG00000203995,Gene Expression,161.0,3510,False,,0.034950,0.036344,0.863785,ZYG11A
ENSG00000162378,ENSG00000162378,Gene Expression,4675.0,ENSG00000162378,Gene Expression,4948.0,ENSG00000162378,Gene Expression,1894.0,ENSG00000162378,...,ENSG00000162378,Gene Expression,2912.0,51500,False,,0.787888,1.091203,0.856090,ZYG11B
ENSG00000159840,ENSG00000159840,Gene Expression,5614.0,ENSG00000159840,Gene Expression,4895.0,ENSG00000159840,Gene Expression,2093.0,ENSG00000159840,...,ENSG00000159840,Gene Expression,2927.0,57676,True,2831.0,1.929798,5.217148,1.178365,ZYX


In [40]:
araw.var

Unnamed: 0,gene_ids-0,feature_types-0,n_cells-0,gene_ids-1,feature_types-1,n_cells-1,gene_ids-10,feature_types-10,n_cells-10,gene_ids-11,...,feature_types-7,n_cells-7,gene_ids-8,feature_types-8,n_cells-8,gene_ids-9,feature_types-9,n_cells-9,n_cells,gene_symbols
ENSG00000121410,ENSG00000121410,Gene Expression,1121.0,ENSG00000121410,Gene Expression,1357.0,ENSG00000121410,Gene Expression,859.0,ENSG00000121410,...,Gene Expression,1044.0,ENSG00000121410,Gene Expression,1307.0,ENSG00000121410,Gene Expression,1507.0,19012,A1BG
ENSG00000268895,ENSG00000268895,Gene Expression,421.0,ENSG00000268895,Gene Expression,391.0,ENSG00000268895,Gene Expression,294.0,ENSG00000268895,...,Gene Expression,458.0,ENSG00000268895,Gene Expression,496.0,ENSG00000268895,Gene Expression,682.0,6707,A1BG-AS1
ENSG00000175899,,,,,,,,,,,...,Gene Expression,4.0,,,,,,,26,A2M
ENSG00000245105,ENSG00000245105,Gene Expression,17.0,ENSG00000245105,Gene Expression,13.0,ENSG00000245105,Gene Expression,3.0,,...,Gene Expression,27.0,ENSG00000245105,Gene Expression,22.0,ENSG00000245105,Gene Expression,4.0,176,A2M-AS1
ENSG00000166535,ENSG00000166535,Gene Expression,181.0,ENSG00000166535,Gene Expression,155.0,ENSG00000166535,Gene Expression,46.0,ENSG00000166535,...,Gene Expression,162.0,ENSG00000166535,Gene Expression,216.0,ENSG00000166535,Gene Expression,105.0,1930,A2ML1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,1258.0,ENSG00000070476,Gene Expression,1195.0,ENSG00000070476,Gene Expression,441.0,ENSG00000070476,...,Gene Expression,992.0,ENSG00000070476,Gene Expression,798.0,ENSG00000070476,Gene Expression,917.0,13428,ZXDC
ENSG00000203995,ENSG00000203995,Gene Expression,354.0,ENSG00000203995,Gene Expression,329.0,ENSG00000203995,Gene Expression,97.0,ENSG00000203995,...,Gene Expression,307.0,ENSG00000203995,Gene Expression,282.0,ENSG00000203995,Gene Expression,161.0,3510,ZYG11A
ENSG00000162378,ENSG00000162378,Gene Expression,4675.0,ENSG00000162378,Gene Expression,4948.0,ENSG00000162378,Gene Expression,1894.0,ENSG00000162378,...,Gene Expression,3753.0,ENSG00000162378,Gene Expression,2271.0,ENSG00000162378,Gene Expression,2912.0,51500,ZYG11B
ENSG00000159840,ENSG00000159840,Gene Expression,5614.0,ENSG00000159840,Gene Expression,4895.0,ENSG00000159840,Gene Expression,2093.0,ENSG00000159840,...,Gene Expression,4260.0,ENSG00000159840,Gene Expression,3774.0,ENSG00000159840,Gene Expression,2927.0,57676,ZYX


#### feature is filtered

In [41]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

In [42]:
#View var

In [43]:
adata.var

Unnamed: 0,gene_ids-0,feature_types-0,n_cells-0,gene_ids-1,feature_types-1,n_cells-1,gene_ids-10,feature_types-10,n_cells-10,gene_ids-11,...,feature_types-9,n_cells-9,n_cells,highly_variable,highly_variable_rank,means,variances,variances_norm,gene_symbols,feature_is_filtered
ENSG00000121410,ENSG00000121410,Gene Expression,1121.0,ENSG00000121410,Gene Expression,1357.0,ENSG00000121410,Gene Expression,859.0,ENSG00000121410,...,Gene Expression,1507.0,19012,False,,0.283017,0.353803,0.932761,A1BG,False
ENSG00000268895,ENSG00000268895,Gene Expression,421.0,ENSG00000268895,Gene Expression,391.0,ENSG00000268895,Gene Expression,294.0,ENSG00000268895,...,Gene Expression,682.0,6707,False,,0.078538,0.083024,0.837489,A1BG-AS1,False
ENSG00000175899,,,,,,,,,,,...,,,26,False,,0.000301,0.000301,0.984182,A2M,False
ENSG00000245105,ENSG00000245105,Gene Expression,17.0,ENSG00000245105,Gene Expression,13.0,ENSG00000245105,Gene Expression,3.0,,...,Gene Expression,4.0,176,False,,0.002310,0.002305,0.928771,A2M-AS1,False
ENSG00000166535,ENSG00000166535,Gene Expression,181.0,ENSG00000166535,Gene Expression,155.0,ENSG00000166535,Gene Expression,46.0,ENSG00000166535,...,Gene Expression,105.0,1930,False,,0.021593,0.023539,0.928596,A2ML1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,1258.0,ENSG00000070476,Gene Expression,1195.0,ENSG00000070476,Gene Expression,441.0,ENSG00000070476,...,Gene Expression,917.0,13428,False,,0.134378,0.149478,0.860687,ZXDC,False
ENSG00000203995,ENSG00000203995,Gene Expression,354.0,ENSG00000203995,Gene Expression,329.0,ENSG00000203995,Gene Expression,97.0,ENSG00000203995,...,Gene Expression,161.0,3510,False,,0.034950,0.036344,0.863785,ZYG11A,False
ENSG00000162378,ENSG00000162378,Gene Expression,4675.0,ENSG00000162378,Gene Expression,4948.0,ENSG00000162378,Gene Expression,1894.0,ENSG00000162378,...,Gene Expression,2912.0,51500,False,,0.787888,1.091203,0.856090,ZYG11B,False
ENSG00000159840,ENSG00000159840,Gene Expression,5614.0,ENSG00000159840,Gene Expression,4895.0,ENSG00000159840,Gene Expression,2093.0,ENSG00000159840,...,Gene Expression,2927.0,57676,True,2831.0,1.929798,5.217148,1.178365,ZYX,False


In [44]:
araw.var

Unnamed: 0,gene_ids-0,feature_types-0,n_cells-0,gene_ids-1,feature_types-1,n_cells-1,gene_ids-10,feature_types-10,n_cells-10,gene_ids-11,...,feature_types-7,n_cells-7,gene_ids-8,feature_types-8,n_cells-8,gene_ids-9,feature_types-9,n_cells-9,n_cells,gene_symbols
ENSG00000121410,ENSG00000121410,Gene Expression,1121.0,ENSG00000121410,Gene Expression,1357.0,ENSG00000121410,Gene Expression,859.0,ENSG00000121410,...,Gene Expression,1044.0,ENSG00000121410,Gene Expression,1307.0,ENSG00000121410,Gene Expression,1507.0,19012,A1BG
ENSG00000268895,ENSG00000268895,Gene Expression,421.0,ENSG00000268895,Gene Expression,391.0,ENSG00000268895,Gene Expression,294.0,ENSG00000268895,...,Gene Expression,458.0,ENSG00000268895,Gene Expression,496.0,ENSG00000268895,Gene Expression,682.0,6707,A1BG-AS1
ENSG00000175899,,,,,,,,,,,...,Gene Expression,4.0,,,,,,,26,A2M
ENSG00000245105,ENSG00000245105,Gene Expression,17.0,ENSG00000245105,Gene Expression,13.0,ENSG00000245105,Gene Expression,3.0,,...,Gene Expression,27.0,ENSG00000245105,Gene Expression,22.0,ENSG00000245105,Gene Expression,4.0,176,A2M-AS1
ENSG00000166535,ENSG00000166535,Gene Expression,181.0,ENSG00000166535,Gene Expression,155.0,ENSG00000166535,Gene Expression,46.0,ENSG00000166535,...,Gene Expression,162.0,ENSG00000166535,Gene Expression,216.0,ENSG00000166535,Gene Expression,105.0,1930,A2ML1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000070476,ENSG00000070476,Gene Expression,1258.0,ENSG00000070476,Gene Expression,1195.0,ENSG00000070476,Gene Expression,441.0,ENSG00000070476,...,Gene Expression,992.0,ENSG00000070476,Gene Expression,798.0,ENSG00000070476,Gene Expression,917.0,13428,ZXDC
ENSG00000203995,ENSG00000203995,Gene Expression,354.0,ENSG00000203995,Gene Expression,329.0,ENSG00000203995,Gene Expression,97.0,ENSG00000203995,...,Gene Expression,307.0,ENSG00000203995,Gene Expression,282.0,ENSG00000203995,Gene Expression,161.0,3510,ZYG11A
ENSG00000162378,ENSG00000162378,Gene Expression,4675.0,ENSG00000162378,Gene Expression,4948.0,ENSG00000162378,Gene Expression,1894.0,ENSG00000162378,...,Gene Expression,3753.0,ENSG00000162378,Gene Expression,2271.0,ENSG00000162378,Gene Expression,2912.0,51500,ZYG11B
ENSG00000159840,ENSG00000159840,Gene Expression,5614.0,ENSG00000159840,Gene Expression,4895.0,ENSG00000159840,Gene Expression,2093.0,ENSG00000159840,...,Gene Expression,4260.0,ENSG00000159840,Gene Expression,3774.0,ENSG00000159840,Gene Expression,2927.0,57676,ZYX


## obs (Cell metadata)

In [45]:
#view obs

In [46]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing
...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early


#### assay_ontology_term_id

In [47]:
adata.obs['assay_ontology_term_id'] = ['EFO:0030080']* len(adata.obs)

In [48]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,assay_ontology_term_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,EFO:0030080
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,EFO:0030080
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,EFO:0030080
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,EFO:0030080
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,EFO:0030080
...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,EFO:0030080
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,EFO:0030080
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,EFO:0030080
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,EFO:0030080


In [49]:
list(adata.obs['sample'].unique())

['Pla_HDBR13146304', 'Pla_HDBR13146305', 'Pla_HDBR13146313']

#### cell_type_ontology_term_id

In [50]:
#get the column in adata.obs related. to cell type annotation

In [51]:
list(adata.obs['cell_annotation'].unique())

['EVT-2',
 'EVT-1',
 'VCT-fusing',
 'VCT-CCC',
 'iEVT',
 'EVT-2 early',
 'VCT-p-TSC',
 'VCT-TSC']

In [52]:
cl_tropho = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/trophoblast/cl_tropho.csv')

In [53]:
mapping = dict(zip(cl_tropho['cell_type'], cl_tropho['CL ID']))

In [54]:
mapping

{'dNK1': 'CL:0002343',
 'dT_cells': 'CL:0000084',
 'T_cells': 'CL:0000084',
 'dNK2': 'CL:0002343',
 'ILC3': 'CL:0001078',
 'dNK3': 'CL:0002343',
 'dT_regs': 'CL:0000815',
 'dM1': 'CL:0000235',
 'HOFB': 'CL:3000001',
 'B_cells': 'CL:0000236',
 'dDC': 'CL:0000451',
 'dM2': 'CL:0000235',
 'dS2': 'CL:0000499',
 'M3': 'CL:0000235',
 'VCT': 'CL:2000060',
 'NK': 'CL:0000623',
 'Granulocytes': 'CL:0000094',
 'uSMC': 'CL:0002601',
 'Endo_F': 'CL:0009092',
 'DC': 'CL:0000451',
 'dEpi_secretory': 'CL:0000066',
 'MO': 'CL:0000235',
 'EVT_2': 'CL:0008036',
 'Endo_M': 'CL:0009095',
 'SCT': 'CL:0000525',
 'dS1': 'CL:0000499',
 'dS3': 'CL:0000499',
 'dEpi_lumenal': 'CL:0000066',
 'iEVT': 'CL:0008036',
 'Endo_L': 'CL:0002138',
 'PV MMP11': 'CL:0000003',
 'PVMMP11': 'CL:0000003',
 'PVSTEAP4': 'CL:0000003',
 'PV STEAP4': 'CL:0000003',
 'EVT_1': 'CL:0008036',
 'PVAOC3': 'CL:0000003',
 'PV AOC3': 'CL:0000003',
 'fF1': 'CL:2000042',
 'VCT_CCC': 'CL:2000060',
 'fF2': 'CL:2000042',
 'VCT_p': 'CL:2000060',
 'P

In [55]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_annotation'].map(mapping)

In [56]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [57]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,assay_ontology_term_id,cell_type_ontology_term_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,EFO:0030080,CL:0008036
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,EFO:0030080,CL:0008036
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,EFO:0030080,CL:0008036
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,EFO:0030080,CL:2000060
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,EFO:0030080,CL:2000060
...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,EFO:0030080,CL:0008036
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,EFO:0030080,CL:0008036
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,EFO:0030080,CL:0008036
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,EFO:0030080,CL:0008036


## development_stage_ontology_term_id

In [58]:
adata.obs['development_stage_ontology_term_id'] = ['unknown']* len(adata.obs)

In [59]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,EFO:0030080,CL:0008036,unknown
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,EFO:0030080,CL:0008036,unknown
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,EFO:0030080,CL:0008036,unknown
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,EFO:0030080,CL:2000060,unknown
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,EFO:0030080,CL:2000060,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,EFO:0030080,CL:0008036,unknown
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,EFO:0030080,CL:0008036,unknown
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown


#### disease_ontology_term_id

In [60]:
adata.obs['disease_ontology_term_id'] = ['PATO:0000461']* len(adata.obs)

In [61]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,EFO:0030080,CL:0008036,unknown,PATO:0000461
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461


#### donor_id

In [62]:
adata.obs['donor_id'] = ['pooled'] * len(adata.obs)

In [63]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461,pooled
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461,pooled
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled


#### is_primary_data

In [64]:
adata.obs['is_primary_data'] = ['True'] * len(adata.obs)

In [65]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### organism_ontology_term_id

In [66]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [67]:
adata.obs['organism_ontology_term_id'] = pd.Categorical(adata.obs['organism_ontology_term_id'])

In [68]:
adata.obs['organism_ontology_term_id']

Pla_HDBR13146304_AAACCCAAGTCCGTCG    NCBITaxon:9606
Pla_HDBR13146304_AAACCCAAGTGTAGTA    NCBITaxon:9606
Pla_HDBR13146304_AAACGAACAGCCATTA    NCBITaxon:9606
Pla_HDBR13146304_AAACGAAGTCCGAAAG    NCBITaxon:9606
Pla_HDBR13146304_AAACGCTAGACTCATC    NCBITaxon:9606
                                          ...      
Pla_HDBR13146313_TTTGGAGTCCATGCAA    NCBITaxon:9606
Pla_HDBR13146313_TTTGGTTCACGGTAGA    NCBITaxon:9606
Pla_HDBR13146313_TTTGGTTCAGAGATTA    NCBITaxon:9606
Pla_HDBR13146313_TTTGTTGCACATTGTG    NCBITaxon:9606
Pla_HDBR13146313_TTTGTTGTCTAGTTCT    NCBITaxon:9606
Name: organism_ontology_term_id, Length: 9957, dtype: category
Categories (1, object): ['NCBITaxon:9606']

#### self_reported_ethnicity_ontology_term_id

In [69]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [70]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [71]:
adata.obs['self_reported_ethnicity_ontology_term_id']

Pla_HDBR13146304_AAACCCAAGTCCGTCG    unknown
Pla_HDBR13146304_AAACCCAAGTGTAGTA    unknown
Pla_HDBR13146304_AAACGAACAGCCATTA    unknown
Pla_HDBR13146304_AAACGAAGTCCGAAAG    unknown
Pla_HDBR13146304_AAACGCTAGACTCATC    unknown
                                      ...   
Pla_HDBR13146313_TTTGGAGTCCATGCAA    unknown
Pla_HDBR13146313_TTTGGTTCACGGTAGA    unknown
Pla_HDBR13146313_TTTGGTTCAGAGATTA    unknown
Pla_HDBR13146313_TTTGTTGCACATTGTG    unknown
Pla_HDBR13146313_TTTGTTGTCTAGTTCT    unknown
Name: self_reported_ethnicity_ontology_term_id, Length: 9957, dtype: category
Categories (1, object): ['unknown']

In [72]:
adata.obs.columns

Index(['n_genes', 'time_point', 'Treatment', 'sample', 'percent_mito',
       'leiden', 'S_score', 'G2M_score', 'phase', 'cell_annotation',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'donor_id', 'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

#### sex_ontology_term_id

In [73]:
adata.obs['sex_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [74]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown


### suspension_type

In [75]:
suspension = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/trophoblast/suspension.csv')

In [76]:
mapping = dict(zip(suspension['sample'], suspension['suspension']))

In [77]:
adata.obs['suspension_type'] = adata.obs['sample'].map(mapping)

In [78]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,EFO:0030080,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,EFO:0030080,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell


#### tissue_ontology_term_id

In [79]:
adata.obs['tissue_ontology_term_id'] =['UBERON:0000088'] * len(adata.obs)

In [80]:
adata.obs.columns

Index(['n_genes', 'time_point', 'Treatment', 'sample', 'percent_mito',
       'leiden', 'S_score', 'G2M_score', 'phase', 'cell_annotation',
       'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'donor_id', 'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_ontology_term_id'],
      dtype='object')

In [81]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,...,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,...,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,...,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088


### obsm (Embeddings)

In [82]:
adata.obsm

AxisArrays with keys: X_pca, X_umap

#### uns (Dataset Metadata)

In [83]:
adata.uns

OverloadedDict, wrapping:
	OrderedDict()
With overloaded keys:
	['neighbors'].

In [84]:
adata.uns['schema_version'] = '3.0.0'

In [85]:
adata.uns['title'] = 'Trophoblast_stemcells'

In [86]:
adata.uns['default_embedding'] = 'X_umap'

### Final checks and adjustments

In [87]:
adata

AnnData object with n_obs × n_vars = 9957 × 22385
    obs: 'n_genes', 'time_point', 'Treatment', 'sample', 'percent_mito', 'leiden', 'S_score', 'G2M_score', 'phase', 'cell_annotation', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'tissue_ontology_term_id'
    var: 'gene_ids-0', 'feature_types-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'n_cells-1', 'gene_ids-10', 'feature_types-10', 'n_cells-10', 'gene_ids-11', 'feature_types-11', 'n_cells-11', 'gene_ids-12', 'feature_types-12', 'n_cells-12', 'gene_ids-13', 'feature_types-13', 'n_cells-13', 'gene_ids-14', 'feature_types-14', 'n_cells-14', 'gene_ids-15', 'feature_types-15', 'n_cells-15', 'gene_ids-16', 'feature_types-16', 'n_cells-16', 'gene_ids-2', 'feature_types-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3

In [88]:
adata.obs.dtypes

n_genes                                        int64
time_point                                  category
Treatment                                   category
sample                                      category
percent_mito                                 float32
leiden                                      category
S_score                                      float64
G2M_score                                    float64
phase                                       category
cell_annotation                             category
assay_ontology_term_id                        object
cell_type_ontology_term_id                  category
development_stage_ontology_term_id            object
disease_ontology_term_id                      object
donor_id                                      object
is_primary_data                                 bool
organism_ontology_term_id                   category
self_reported_ethnicity_ontology_term_id    category
sex_ontology_term_id                          

In [89]:
adata.obs

Unnamed: 0,n_genes,time_point,Treatment,sample,percent_mito,leiden,S_score,G2M_score,phase,cell_annotation,...,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,donor_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_ontology_term_id
Pla_HDBR13146304_AAACCCAAGTCCGTCG,6669,D3,BSA,Pla_HDBR13146304,0.062408,13,-2.464736,-1.664824,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACCCAAGTGTAGTA,4446,D3,BSA,Pla_HDBR13146304,0.148037,5,0.044273,0.219792,G2M,EVT-1,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACGAACAGCCATTA,2159,D3,BSA,Pla_HDBR13146304,0.011459,13,-0.171557,-0.370751,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACGAAGTCCGAAAG,4265,D3,BSA,Pla_HDBR13146304,0.149628,8,-1.302574,-1.237681,G1,VCT-fusing,...,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146304_AAACGCTAGACTCATC,4915,D3,BSA,Pla_HDBR13146304,0.166676,10,1.027542,-0.313344,S,VCT-fusing,...,CL:2000060,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13146313_TTTGGAGTCCATGCAA,5582,D6,BSA,Pla_HDBR13146313,0.012195,1,-2.395367,-3.029436,G1,EVT-2 early,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146313_TTTGGTTCACGGTAGA,2110,D6,BSA,Pla_HDBR13146313,0.063439,7,-0.323552,-0.467027,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146313_TTTGGTTCAGAGATTA,4354,D6,BSA,Pla_HDBR13146313,0.013887,7,-1.468726,-2.187429,G1,EVT-2,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088
Pla_HDBR13146313_TTTGTTGCACATTGTG,3782,D6,BSA,Pla_HDBR13146313,0.092980,1,-0.623037,-1.009229,G1,EVT-2 early,...,CL:0008036,unknown,PATO:0000461,pooled,True,NCBITaxon:9606,unknown,unknown,cell,UBERON:0000088


In [90]:
#check the format of expression matrix

In [91]:
adata.X

<9957x22385 sparse matrix of type '<class 'numpy.float32'>'
	with 45054336 stored elements in Compressed Sparse Row format>

In [92]:
# Convert expression matrix to sparse matrix format

In [93]:
adata.X = scipy.sparse.csr_matrix(adata.X)

In [94]:
adata.X

<9957x22385 sparse matrix of type '<class 'numpy.float32'>'
	with 45054336 stored elements in Compressed Sparse Row format>

In [95]:
adata.raw = araw

In [96]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/trophoblast/final_objects/trophoblast_stem_cells_final.h5ad', compression = 'gzip')