### **Curating placenta_Infection_lognorm_20231129.h5ad**

Article:  Early infection response of the first trimester human placenta at single-cell scale

DOI: https://doi.org/10.1101/2023.01.02.522155

Data Source : https://www.reproductivecellatlas.org/placenta-infection.html

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math
import re
from collections import Counter

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/Placenta_Infection/Data/placentaInfection_lognorm_20231129.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 158978 × 36601
    obs: 'n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage', 'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection', 'Tg_infected', 'Dev_Stage'
    var: 'gene_ids', 'feature_types', 'mean-0-0', 'std-0-0', 'mean-1-0', 'std-1-0', 'mean-2-0', 'std-2-0', 'highly_variable-0', 'highly_variable_rank-0', 'means-0', 'variances-0', 'variances_norm-0', 'mean-0-1', 'std-0-1', 'mean-1-1', 'std-1-1', 'mean-2-1', 'std-2-1', 'highly_variable-1', 'highly_variable_rank-1', 'means-1', 'variances-1', 'variances_norm-1'
    obsm: 'X_umap'
    layers: 'raw_counts'

##### **X - expression matrix**

In [7]:
# View the expression matrix of the anndata object.

In [8]:
adata.X

<158978x36601 sparse matrix of type '<class 'numpy.float32'>'
	with 721777550 stored elements in Compressed Sparse Row format>

In [9]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [10]:
print(adata.X)

  (0, 35707)	1.734162
  (0, 18483)	0.9379451
  (0, 28368)	0.9379451
  (0, 8844)	0.9379451
  (0, 35078)	1.4132899
  (0, 35089)	0.9379451
  (0, 33404)	1.4132899
  (0, 22895)	0.9379451
  (0, 30549)	1.734162
  (0, 21986)	1.9767033
  (0, 4131)	1.734162
  (0, 2902)	1.4132899
  (0, 16296)	0.9379451
  (0, 22820)	0.9379451
  (0, 24866)	0.9379451
  (0, 1461)	2.5980725
  (0, 12705)	0.9379451
  (0, 34422)	0.9379451
  (0, 25102)	0.9379451
  (0, 6480)	0.9379451
  (0, 18365)	0.9379451
  (0, 11088)	0.9379451
  (0, 32213)	1.4132899
  (0, 25739)	0.9379451
  (0, 27478)	0.9379451
  :	:
  (158977, 12830)	0.5599803
  (158977, 9172)	0.5599803
  (158977, 23888)	0.5599803
  (158977, 25056)	0.5599803
  (158977, 34932)	1.7054439
  (158977, 4131)	1.7054439
  (158977, 30549)	0.5599803
  (158977, 34130)	0.916801
  (158977, 33404)	0.5599803
  (158977, 16321)	0.5599803
  (158977, 16229)	0.5599803
  (158977, 11503)	0.5599803
  (158977, 35089)	0.916801
  (158977, 19118)	0.916801
  (158977, 10880)	0.916801
  (158977, 32

##### **Raw counts matrix**

In [11]:
# If X has normalized counts, check for the raw counts matrix.

In [12]:
# check whether raw counts are present in adata.raw

In [13]:
adata.raw

In [14]:
adata.layers

Layers with keys: raw_counts

In [15]:
print(adata.layers['raw_counts'])

  (0, 35707)	3.0
  (0, 18483)	1.0
  (0, 28368)	1.0
  (0, 8844)	1.0
  (0, 35078)	2.0
  (0, 35089)	1.0
  (0, 33404)	2.0
  (0, 22895)	1.0
  (0, 30549)	3.0
  (0, 21986)	4.0
  (0, 4131)	3.0
  (0, 2902)	2.0
  (0, 16296)	1.0
  (0, 22820)	1.0
  (0, 24866)	1.0
  (0, 1461)	8.0
  (0, 12705)	1.0
  (0, 34422)	1.0
  (0, 25102)	1.0
  (0, 6480)	1.0
  (0, 18365)	1.0
  (0, 11088)	1.0
  (0, 32213)	2.0
  (0, 25739)	1.0
  (0, 27478)	1.0
  :	:
  (158977, 12830)	1.0
  (158977, 9172)	1.0
  (158977, 23888)	1.0
  (158977, 25056)	1.0
  (158977, 34932)	6.0
  (158977, 4131)	6.0
  (158977, 30549)	1.0
  (158977, 34130)	2.0
  (158977, 33404)	1.0
  (158977, 16321)	1.0
  (158977, 16229)	1.0
  (158977, 11503)	1.0
  (158977, 35089)	2.0
  (158977, 19118)	2.0
  (158977, 10880)	2.0
  (158977, 32149)	4.0
  (158977, 27471)	1.0
  (158977, 19270)	1.0
  (158977, 25842)	1.0
  (158977, 25481)	1.0
  (158977, 28368)	1.0
  (158977, 7686)	2.0
  (158977, 35707)	3.0
  (158977, 30846)	1.0
  (158977, 31961)	12.0


In [16]:
# Check whether adata and araw has same dimensions.

In [17]:
araw = ad.AnnData(X=adata.layers['raw_counts'].copy(), obs=adata.obs.copy(), var=adata.var.copy())

In [18]:
araw 

AnnData object with n_obs × n_vars = 158978 × 36601
    obs: 'n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage', 'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection', 'Tg_infected', 'Dev_Stage'
    var: 'gene_ids', 'feature_types', 'mean-0-0', 'std-0-0', 'mean-1-0', 'std-1-0', 'mean-2-0', 'std-2-0', 'highly_variable-0', 'highly_variable_rank-0', 'means-0', 'variances-0', 'variances_norm-0', 'mean-0-1', 'std-0-1', 'mean-1-1', 'std-1-1', 'mean-2-1', 'std-2-1', 'highly_variable-1', 'highly_variable_rank-1', 'means-1', 'variances-1', 'variances_norm-1'

In [19]:
araw.X

<158978x36601 sparse matrix of type '<class 'numpy.float32'>'
	with 721777550 stored elements in Compressed Sparse Row format>

In [20]:
print(araw.X)

  (0, 35707)	3.0
  (0, 18483)	1.0
  (0, 28368)	1.0
  (0, 8844)	1.0
  (0, 35078)	2.0
  (0, 35089)	1.0
  (0, 33404)	2.0
  (0, 22895)	1.0
  (0, 30549)	3.0
  (0, 21986)	4.0
  (0, 4131)	3.0
  (0, 2902)	2.0
  (0, 16296)	1.0
  (0, 22820)	1.0
  (0, 24866)	1.0
  (0, 1461)	8.0
  (0, 12705)	1.0
  (0, 34422)	1.0
  (0, 25102)	1.0
  (0, 6480)	1.0
  (0, 18365)	1.0
  (0, 11088)	1.0
  (0, 32213)	2.0
  (0, 25739)	1.0
  (0, 27478)	1.0
  :	:
  (158977, 12830)	1.0
  (158977, 9172)	1.0
  (158977, 23888)	1.0
  (158977, 25056)	1.0
  (158977, 34932)	6.0
  (158977, 4131)	6.0
  (158977, 30549)	1.0
  (158977, 34130)	2.0
  (158977, 33404)	1.0
  (158977, 16321)	1.0
  (158977, 16229)	1.0
  (158977, 11503)	1.0
  (158977, 35089)	2.0
  (158977, 19118)	2.0
  (158977, 10880)	2.0
  (158977, 32149)	4.0
  (158977, 27471)	1.0
  (158977, 19270)	1.0
  (158977, 25842)	1.0
  (158977, 25481)	1.0
  (158977, 28368)	1.0
  (158977, 7686)	2.0
  (158977, 35707)	3.0
  (158977, 30846)	1.0
  (158977, 31961)	12.0


In [21]:
del adata.layers['raw_counts']

In [22]:
adata.layers

Layers with keys: 

##### **Variables(var)**

In [23]:
# View the var of anndata and raw object

In [24]:
adata.var

Unnamed: 0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,std-0-1,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1
DPP6,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.153355,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053
LINC02665,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.003701,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620
ITCH-IT1,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,1.000000,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000
AC100782.1,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.021768,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653
COG4,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.245278,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PPP1R1A,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.032064,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199
AC005692.2,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,1.000000,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000
LINC00458,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.021793,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970
AL357054.2,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.070607,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939


In [25]:
adata.var['name'] = adata.var.index

In [26]:
adata.var

Unnamed: 0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
DPP6,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
LINC02665,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ITCH-IT1,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
AC100782.1,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
COG4,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PPP1R1A,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
AC005692.2,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
LINC00458,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
AL357054.2,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [27]:
araw.var

Unnamed: 0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,std-0-1,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1
DPP6,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.153355,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053
LINC02665,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.003701,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620
ITCH-IT1,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,1.000000,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000
AC100782.1,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.021768,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653
COG4,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.245278,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PPP1R1A,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.032064,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199
AC005692.2,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,1.000000,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000
LINC00458,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.021793,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970
AL357054.2,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.070607,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939


In [28]:
araw.var['name'] = araw.var.index

In [29]:
adata.var.index = adata.var['gene_ids']

In [30]:
araw.var.index = araw.var['gene_ids']

In [31]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [32]:
araw.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [33]:
# Load the approved genes file.

In [34]:
approved_genes = pd.read_csv('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/Endometrium_reference_integrated_atlas/genes_approved.csv')

In [35]:
#Create a dictionary from the approved genes file 

In [36]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [37]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [38]:
len(genedict)

116184

In [39]:
#Filter out the genes which are not in the approved genes file.

In [40]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]
var_to_keep_araw = [x for x in araw.var_names if (x in genedict)]

In [41]:
len(var_to_keep_adata)

36503

In [42]:
len(var_to_keep_araw)

36503

In [43]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [44]:
araw.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [45]:
# Modify the anndata object by filtering out the filtered genes.

In [46]:
adata = adata[:, adata.var.index.isin(var_to_keep_adata)]
araw = araw[:, araw.var.index.isin(var_to_keep_araw)]

In [47]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [48]:
# View var

In [49]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [50]:
araw.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


feature is filtered

In [51]:
adata.var['feature_is_filtered'] = [False] * len(adata.var)

  adata.var['feature_is_filtered'] = [False] * len(adata.var)


In [52]:
adata.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name,feature_is_filtered
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6,False
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665,False
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1,False
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1,False
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A,False
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2,False
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458,False
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2,False


In [53]:
araw.var

Unnamed: 0_level_0,gene_ids,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [54]:
del adata.var['gene_ids']

In [55]:
del araw.var['gene_ids']

#### **obs (Cell metadata)**

In [56]:
#view obs

In [57]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1,Fetal,UI,UI_Tg_24h,False,CS22
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1,Fetal,UI,UI_Tg_24h,False,CS22
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236,Fetal,Lm,Lm_48h,,14pcw
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw


In [58]:
# view the column names in obs

In [59]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage',
       'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'Dev_Stage'],
      dtype='object')

#### **assay_ontology_term_id**

In [60]:
adata.obs['assay_ontology_term_id'] = ['EFO:0009922']* len(adata.obs)

In [61]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [62]:
# view adata.obs

In [63]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922


#### **cell_type_ontology_term_id**

In [64]:
#identify the column in adata.obs related. to cell type annotation

In [65]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage',
       'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'Dev_Stage', 'assay_ontology_term_id'],
      dtype='object')

In [66]:
list(adata.obs['cell_type'].unique())

['F',
 'VCT_fusing',
 'HBC',
 'Endo_f',
 'VCT',
 'EVT_2',
 'EVT_1',
 'VCT_CCC',
 'PAMM1',
 'VCT_p',
 'PV',
 'F_p',
 'HBC_p',
 'iEVT',
 'F_sm']

In [67]:
# create a dictionary of cell type and ontology term

In [68]:
mapping= {'F':'CL:0000057',
'VCT_fusing':'CL:2000060',
'HBC':'CL:3000001',
'Endo_f':'CL:0009092',
'VCT':'CL:2000060',
'EVT_2':'CL:0008036',
'EVT_1':'CL:0008036',
'VCT_CCC':'CL:2000060',
'PAMM1':'CL:0000235',
'VCT_p':'CL:2000060',
'PV':'CL:4033054',
'F_p':'CL:0000057',
'HBC_p':'CL:3000001',
'iEVT':'CL:0008036',
'F_sm':'CL:0000057'}

In [69]:
# add the cell_type_ontology_term_id column

In [70]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type'].map(mapping)

In [71]:
# change datatype of the column

In [72]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [73]:
# view adata.obs

In [74]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092


#### **donor_id**

In [75]:
#identify the column in adata.obs which provides donor information

In [76]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage',
       'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'Dev_Stage', 'assay_ontology_term_id',
       'cell_type_ontology_term_id'],
      dtype='object')

In [77]:
# add the donor_id column

In [78]:
adata.obs['donor_id'] = adata.obs['donor_id']

In [79]:
adata.obs['donor_id'] = np.where(adata.obs['MFgenotype'] == 'Fetal', adata.obs['donor_id'].astype(str) + '_fetus', adata.obs['donor_id'])

In [80]:
adata.obs['donor_id'] = np.where(adata.obs['MFgenotype'] == 'Maternal', adata.obs['donor_id'].astype(str) + '_mother', adata.obs['donor_id'])

In [81]:
#adata.obs['donor_id'] = ['unknown'] * len(adata.obs['names'])

In [82]:
# change datatype of the column

In [83]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [84]:
# view unique values of donor_id column

In [85]:
list(adata.obs['donor_id'].unique())

['scDonor_Tg2_mother',
 'scDonor_Tg1_fetus',
 'scDonor_Tg1_mother',
 'scDonor_Tg2_fetus',
 'scDonor_Tg4_fetus',
 'scDonor_Tg3_fetus',
 'scDonor_Tg3_mother',
 'scDonor_Tg4_mother',
 'Hrv124_fetus',
 'Hrv124_mother',
 'Hrv168_fetus',
 'Hrv168_mother',
 'Hrv232_fetus',
 'Hrv236_mother',
 'Hrv236_fetus',
 'Hrv136_fetus',
 'Hrv135_fetus',
 'Hrv135_mother',
 'Hrv136_mother',
 'Hrv232_mother']

In [86]:
#view obs

In [87]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092


#### **development_stage_ontology_term_id**

In [88]:
list(adata.obs['Dev_Stage'].unique())

['12pcw', 'CS22', '13pcw', 'CS23', '11pcw', 'CS18', '14pcw', '9pcw']

In [89]:
mapping = {'12pcw' :'HsapDv:0000019', 'CS22' : 'HsapDv:0000029', '13pcw' : 'HsapDv:0000020', 'CS23' : 'HsapDv:0000030', '11pcw' : 'HsapDv:0000018', 'CS18' : 'HsapDv:0000025', '14pcw' : 'HsapDv:0000021', '9pcw' : 'HsapDv:0000016'}

In [90]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['Dev_Stage'].map(mapping)

In [91]:
mask = adata.obs['donor_id'].str.contains('_mother')

In [92]:
mask = np.char.endswith(adata.obs['donor_id'].values.astype(str), '_mother')

In [93]:
adata.obs['development_stage_ontology_term_id'] = np.where(mask, 'unknown', adata.obs['development_stage_ontology_term_id'])

In [94]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['unknown',
 'HsapDv:0000029',
 'HsapDv:0000019',
 'HsapDv:0000020',
 'HsapDv:0000030',
 'HsapDv:0000018',
 'HsapDv:0000025',
 'HsapDv:0000021',
 'HsapDv:0000016']

In [95]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [96]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057,unknown
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060,HsapDv:0000029
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001,HsapDv:0000029
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021


#### **disease_ontology_term_id**

In [97]:
list(adata.obs['infection'].unique())

['UI', 'Tg', 'Pf', 'Lm']

In [98]:
mapping = { 'UI' : 'PATO:0000461', 'Tg':'MONDO:0005989', 'Pf' :'MONDO:0001943', 'Lm':'MONDO:0005828'}

In [99]:
# Assign normal since all are healthy patients

In [100]:
# add the disease_ontology_term_id column

In [101]:
adata.obs['disease_ontology_term_id']= adata.obs['infection']. map(mapping)

In [102]:
# change datatype of the column

In [103]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [104]:
# view obs

In [105]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057,unknown,PATO:0000461
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060,HsapDv:0000029,PATO:0000461
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001,HsapDv:0000029,PATO:0000461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021,MONDO:0005828
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021,MONDO:0005828
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021,MONDO:0005828
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021,MONDO:0005828


#### **is_primary_data**

In [106]:
adata.obs['is_primary_data'] = [True] * len(adata.obs)

In [107]:
list(adata.obs['is_primary_data'].unique())

[True]

In [108]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057,unknown,PATO:0000461,True
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060,HsapDv:0000029,PATO:0000461,True
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001,HsapDv:0000029,PATO:0000461,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021,MONDO:0005828,True
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021,MONDO:0005828,True
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021,MONDO:0005828,True
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021,MONDO:0005828,True


In [109]:
#change data type of column

In [110]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

In [111]:
# view obs

In [112]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057,unknown,PATO:0000461,True
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060,HsapDv:0000029,PATO:0000461,True
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001,HsapDv:0000029,PATO:0000461,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021,MONDO:0005828,True
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021,MONDO:0005828,True
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021,MONDO:0005828,True
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021,MONDO:0005828,True


#### **organism_ontology_term_id**

In [113]:
# assign organism id 

In [114]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [115]:
#change data type of column

In [116]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [117]:
# view obs

In [118]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057,unknown,PATO:0000461,True,NCBITaxon:9606
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [119]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [120]:
# change data type

In [121]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [122]:
# view obs

In [123]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,stage_perInfection,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:0000057,unknown,PATO:0000461,True,NCBITaxon:9606,unknown
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:2000060,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,UI_Tg_24h,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,UI_Tg_24h,False,CS22,EFO:0009922,CL:3000001,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,Lm_48h,,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown


#### **sex_ontology_term_id**

In [124]:
# identify the column in adata.obs which corresponds to sex

In [125]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage',
       'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'Dev_Stage', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'development_stage_ontology_term_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id'],
      dtype='object')

In [126]:
# list the unique values 

In [127]:
list(adata.obs['MFgenotype'].unique())

['Maternal', 'Fetal']

In [128]:
# create a dictionary of sex and sex ontology term id

In [129]:
mapping= {'Maternal': 'PATO:0000383', 'Fetal': 'unknown'}

In [130]:
# add sex_ontology_term_id column

In [131]:
adata.obs['sex_ontology_term_id'] = adata.obs['MFgenotype'].map(mapping)

In [132]:
# change data type

In [133]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [134]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,...,Tg_infected,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,False,12pcw,EFO:0009922,CL:0000057,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,False,CS22,EFO:0009922,CL:2000060,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,...,False,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,False,CS22,EFO:0009922,CL:3000001,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,...,,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown


#### **suspension_type**

In [135]:
list(adata.obs['donor_id'].unique())

['scDonor_Tg2_mother',
 'scDonor_Tg1_fetus',
 'scDonor_Tg1_mother',
 'scDonor_Tg2_fetus',
 'scDonor_Tg4_fetus',
 'scDonor_Tg3_fetus',
 'scDonor_Tg3_mother',
 'scDonor_Tg4_mother',
 'Hrv124_fetus',
 'Hrv124_mother',
 'Hrv168_fetus',
 'Hrv168_mother',
 'Hrv232_fetus',
 'Hrv236_mother',
 'Hrv236_fetus',
 'Hrv136_fetus',
 'Hrv135_fetus',
 'Hrv135_mother',
 'Hrv136_mother',
 'Hrv232_mother']

In [136]:
adata.obs['suspension_type'] = ['cell'] * len(adata.obs)

In [137]:
# change data type of column

In [138]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [139]:
# view obs

In [140]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,...,Dev_Stage,assay_ontology_term_id,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,12pcw,EFO:0009922,CL:0000057,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CS22,EFO:0009922,CL:2000060,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,...,12pcw,EFO:0009922,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CS22,EFO:0009922,CL:3000001,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,...,14pcw,EFO:0009922,CL:2000060,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,14pcw,EFO:0009922,CL:3000001,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,14pcw,EFO:0009922,CL:4033054,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,14pcw,EFO:0009922,CL:0009092,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell


#### **tissue_type**

In [141]:
adata.obs['tissue_type'] = ['tissue'] * len(adata.obs)

In [142]:
adata.obs['tissue_type'] = adata.obs['tissue_type'].astype('category')

#### **tissue_ontology_term_id**

In [143]:
# identify the column in adata.obs which corresponds to tissue

In [144]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage',
       'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'Dev_Stage', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'development_stage_ontology_term_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type'],
      dtype='object')

In [145]:
# add 'tissue_ontology_term_id' column

In [146]:
adata.obs['tissue_ontology_term_id'] = ['UBERON:0001987'] * len(adata.obs)

In [147]:
# change data type of column

In [148]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [149]:
#list the unique values in 'tissue_ontology_term_id' column

In [150]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0001987']

In [151]:
# view obs

In [152]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,...,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,CL:0000057,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CL:2000060,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,...,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CL:3000001,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,...,CL:2000060,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:3000001,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:4033054,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:0009092,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987


#### **obsm (Embeddings)**

In [153]:
# view obsm

In [154]:
# check whether all columns are prefixed with X

In [155]:
adata.obsm

AxisArrays with keys: X_umap

#### **uns (Dataset Metadata)**

In [156]:
# View

In [157]:
adata.uns

OverloadedDict, wrapping:
	OrderedDict()
With overloaded keys:
	['neighbors'].

In [158]:
adata.uns.keys

<bound method OverloadedDict.keys of OverloadedDict, wrapping:
	OrderedDict()
With overloaded keys:
	['neighbors'].>

In [159]:
# Give a title for the dataset

In [160]:
adata.uns['title'] = 'Placenta Infection'

In [161]:
# Set the default embedding

In [162]:
adata.uns['default_embedding'] = 'X_umap'

### **Final check**

In [163]:
# view anndata object

In [164]:
adata

AnnData object with n_obs × n_vars = 158978 × 36503
    obs: 'n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage', 'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection', 'Tg_infected', 'Dev_Stage', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'tissue_type', 'tissue_ontology_term_id'
    var: 'feature_types', 'mean-0-0', 'std-0-0', 'mean-1-0', 'std-1-0', 'mean-2-0', 'std-2-0', 'highly_variable-0', 'highly_variable_rank-0', 'means-0', 'variances-0', 'variances_norm-0', 'mean-0-1', 'std-0-1', 'mean-1-1', 'std-1-1', 'mean-2-1', 'std-2-1', 'highly_variable-1', 'highly_variable_rank-1', 'means-1', 'variances-1', 'variances_norm-1', 'name', 'feature_is_filtered'
    uns: 'title', 'default_embedding'
    obsm: 'X_umap'

In [165]:
# view obs and var data types

In [166]:
adata.obs.dtypes

n_counts                                     float32
n_genes                                        int64
percent_mito                                 float32
cell_type                                   category
hpi                                         category
stage                                       category
phase                                       category
donor_id                                    category
MFgenotype                                  category
infection                                   category
stage_perInfection                          category
Tg_infected                                 category
Dev_Stage                                   category
assay_ontology_term_id                      category
cell_type_ontology_term_id                  category
development_stage_ontology_term_id          category
disease_ontology_term_id                    category
is_primary_data                                 bool
organism_ontology_term_id                   ca

In [167]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

changed mean-0-0 from float64 to float32
changed std-0-0 from float64 to float32
changed mean-1-0 from float64 to float32
changed std-1-0 from float64 to float32
changed mean-2-0 from float64 to float32
changed std-2-0 from float64 to float32
changed means-0 from float64 to float32
changed variances-0 from float64 to float32
changed variances_norm-0 from float64 to float32
changed mean-0-1 from float64 to float32
changed std-0-1 from float64 to float32
changed mean-1-1 from float64 to float32
changed std-1-1 from float64 to float32
changed mean-2-1 from float64 to float32
changed std-2-1 from float64 to float32
changed means-1 from float64 to float32
changed variances-1 from float64 to float32
changed variances_norm-1 from float64 to float32


In [168]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed n_genes from int64 to int32


In [169]:
# view obs

In [170]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,...,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,CL:0000057,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CL:2000060,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,...,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CL:3000001,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,...,CL:2000060,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:3000001,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:4033054,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:0009092,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987


In [171]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'cell_type', 'hpi', 'stage',
       'phase', 'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'Dev_Stage', 'assay_ontology_term_id',
       'cell_type_ontology_term_id', 'development_stage_ontology_term_id',
       'disease_ontology_term_id', 'is_primary_data',
       'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id',
       'sex_ontology_term_id', 'suspension_type', 'tissue_type',
       'tissue_ontology_term_id'],
      dtype='object')

In [172]:
# delete unwanted columns in obs

In [173]:
del adata.obs['Dev_Stage']
#del adata.var['gene_ids']
#del araw.var['gene_ids']

In [174]:
# view obs

In [175]:
adata.obs

Unnamed: 0,n_counts,n_genes,percent_mito,cell_type,hpi,stage,phase,donor_id,MFgenotype,infection,...,cell_type_ontology_term_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
Pla_HDBR13007974_AAACCCAAGCGTTGTT,6432.0,1904,0.019590,F,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,CL:0000057,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCAAGTAGTCAA,49221.0,5525,0.045489,VCT_fusing,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CL:2000060,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAATGAACA,9243.0,3032,0.045332,HBC,24h,UI_24h,G1,scDonor_Tg2_mother,Maternal,UI,...,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAGAGAGGG,7753.0,2803,0.031214,HBC,24h,UI_24h,G2M,scDonor_Tg2_mother,Maternal,UI,...,CL:3000001,unknown,PATO:0000461,True,NCBITaxon:9606,unknown,PATO:0000383,cell,tissue,UBERON:0001987
Pla_HDBR13007974_AAACCCACAGTAGAAT,14361.0,3982,0.043799,HBC,24h,UI_24h,G1,scDonor_Tg1_fetus,Fetal,UI,...,CL:3000001,HsapDv:0000029,PATO:0000461,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pla_HDBR13661576_TTTGTTGAGAGTACCG,50722.0,7538,0.071074,VCT,48h,Lm_48h,G1,Hrv236_fetus,Fetal,Lm,...,CL:2000060,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGAGCATATGA,16358.0,4066,0.086808,HBC,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:3000001,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGCATCCCGTT,12800.0,4219,0.018906,PV,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:4033054,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987
Pla_HDBR13661576_TTTGTTGTCAGCTGAT,21082.0,5860,0.049094,Endo_f,48h,Lm_48h,S,Hrv236_fetus,Fetal,Lm,...,CL:0009092,HsapDv:0000021,MONDO:0005828,True,NCBITaxon:9606,unknown,unknown,cell,tissue,UBERON:0001987


In [176]:
adata.obs['celltype_annotation'] = adata.obs['cell_type']

In [177]:
del adata.obs['cell_type']

In [178]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'hpi', 'stage', 'phase',
       'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_type', 'tissue_ontology_term_id',
       'celltype_annotation'],
      dtype='object')

In [179]:
# view var

In [180]:
adata.var

Unnamed: 0_level_0,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,means-0,...,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name,feature_is_filtered
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,0.079892,...,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6,False
ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,0.000120,...,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665,False
ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,0.000000,...,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1,False
ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,0.000893,...,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1,False
ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,0.328198,...,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,0.003968,...,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A,False
ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,0.000023,...,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2,False
ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,0.000603,...,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458,False
ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,0.026489,...,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2,False


In [181]:
araw.var

Unnamed: 0_level_0,feature_types,mean-0-0,std-0-0,mean-1-0,std-1-0,mean-2-0,std-2-0,highly_variable-0,highly_variable_rank-0,means-0,...,mean-1-1,std-1-1,mean-2-1,std-2-1,highly_variable-1,highly_variable_rank-1,means-1,variances-1,variances_norm-1,name
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000130226,Gene Expression,0.017129,0.116157,0.024975,0.183925,0.023525,0.157431,True,805.0,0.079892,...,0.038353,0.251360,0.022551,0.149444,True,441.0,0.101883,1.780006,6.958053,DPP6
ENSG00000223581,Gene Expression,0.000085,0.006993,0.000016,0.003524,0.000057,0.006526,False,,0.000120,...,0.000000,1.000000,0.000015,0.002162,False,,0.000041,0.000057,1.331620,LINC02665
ENSG00000231795,Gene Expression,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,False,,0.000000,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,ITCH-IT1
ENSG00000254238,Gene Expression,0.000448,0.016993,0.000180,0.012661,0.000457,0.019265,False,,0.000893,...,0.000169,0.014344,0.000831,0.024771,False,,0.001571,0.001601,0.764653,AC100782.1
ENSG00000103051,Gene Expression,0.092065,0.231948,0.070127,0.228696,0.083868,0.254461,False,,0.328198,...,0.064695,0.230533,0.100610,0.278123,False,,0.318025,0.613838,0.634368,COG4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000135447,Gene Expression,0.001417,0.029373,0.001095,0.033551,0.002680,0.055134,False,,0.003968,...,0.000509,0.023663,0.002276,0.047242,False,,0.004201,0.008173,1.277199,PPP1R1A
ENSG00000261629,Gene Expression,0.000000,1.000000,0.000003,0.000799,0.000011,0.002319,False,,0.000023,...,0.000000,1.000000,0.000000,1.000000,False,,0.000000,0.000000,0.000000,AC005692.2
ENSG00000234787,Gene Expression,0.000246,0.013797,0.000227,0.017777,0.000179,0.011465,False,,0.000603,...,0.000336,0.024277,0.000209,0.014916,True,1731.0,0.001067,0.004306,3.004970,LINC00458
ENSG00000271727,Gene Expression,0.008520,0.074654,0.005196,0.062597,0.007954,0.079646,False,,0.026489,...,0.006109,0.075940,0.007365,0.075187,False,,0.023708,0.043419,0.879939,AL357054.2


In [182]:
#view uns

In [183]:
adata.uns

OverloadedDict, wrapping:
	OrderedDict([('title', 'Placenta Infection'), ('default_embedding', 'X_umap')])
With overloaded keys:
	['neighbors'].

In [184]:
list(adata.uns.keys())

['title', 'default_embedding']

In [185]:
adata.obs.columns

Index(['n_counts', 'n_genes', 'percent_mito', 'hpi', 'stage', 'phase',
       'donor_id', 'MFgenotype', 'infection', 'stage_perInfection',
       'Tg_infected', 'assay_ontology_term_id', 'cell_type_ontology_term_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id',
       'suspension_type', 'tissue_type', 'tissue_ontology_term_id',
       'celltype_annotation'],
      dtype='object')

In [186]:
# Remove unwanted columns in uns

In [187]:
#check the format of expression matrix

In [188]:
adata.X

<158978x36503 sparse matrix of type '<class 'numpy.float32'>'
	with 721430113 stored elements in Compressed Sparse Row format>

In [189]:
araw.X

<158978x36503 sparse matrix of type '<class 'numpy.float32'>'
	with 721430113 stored elements in Compressed Sparse Row format>

In [190]:
#Copy raw counts to adata.raw

In [191]:
adata.raw = araw

In [192]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/Placenta_Infection/Final_objects/placenta_infection_lognorm.h5ad', compression = 'gzip')