### **Curating bcc_face_cheek1.h5ad**

Article:  Multi-scale spatial mapping of cell populations across anatomical sites in healthy human skin and basal cell carcinoma

DOI: https://doi.org/10.1101/2023.08.08.551504

Data Source : https://spatial-skin-atlas.cellgeni.sanger.ac.uk

##### **Mount farm**

mount-farm

##### **Packages required for curation**

In [1]:
#Import all packages required for curation

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
from tqdm import tqdm
from scipy import sparse
from scipy.sparse import csr_matrix
import anndata as ad
import os
import subprocess
import math

### **Curation Schema**

##### **X (Matrix Layers)**

##### **AnnData object**

In [3]:
# Load the AnnData object

In [4]:
adata = sc.read_h5ad('/lustre/scratch127/cellgen/cellgeni/cakirb/website_objectfix/spatial-skin-atlas/vis_fixed2/fat/WSSKNKCLsp12140270.h5ad')

In [5]:
# View the AnnData object

In [6]:
adata

AnnData object with n_obs × n_vars = 813 × 18502
    obs: 'in_tissue', 'array_row', 'array_col', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb', 'pct_counts_rb', 'barcode', 'c2l_Th', 'c2l_NK', 'c2l_APOD+ fibroblasts', 'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2', 'c2l_DC1', 'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes', 'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc', 'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes', 'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts', 'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC', 'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes', 'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1'
    uns: 'leiden', 'log1p', 'neighbors', 'pca', 'spatial', 'umap'
    obsm: 'X_spatial'

##### **X- expression matrix**

In [7]:
# View the expression matrix of the anndata object

In [8]:
adata.X

array([[-0.14158162, -0.10626014, -0.26868093, ..., -0.09139787,
        -0.35211322, -0.13622642],
       [-0.14158162, -0.10626014,  9.55642   , ..., -0.09139787,
        -0.35211322, -0.13622642],
       [-0.14158162, -0.10626014, -0.26868093, ..., -0.09139787,
        -0.35211322, -0.13622642],
       ...,
       [-0.14158162, -0.10626014,  2.9825585 , ..., -0.09139787,
        -0.35211322, -0.13622642],
       [-0.14158162, -0.10626014, -0.26868093, ..., -0.09139787,
         1.1955801 , -0.13622642],
       [-0.14158162, -0.10626014,  1.7795514 , ..., -0.09139787,
        -0.35211322, -0.13622642]], dtype=float32)

In [9]:
#data type of adata.X

In [10]:
type(adata.X)

numpy.ndarray

In [11]:
#convert to csr_matrix

In [12]:
adata.X = csr_matrix(adata.X)

In [13]:
adata.X

<813x18502 sparse matrix of type '<class 'numpy.float32'>'
	with 15042126 stored elements in Compressed Sparse Row format>

In [14]:
# Print the matrix to check whether they are normalized counts or raw counts. if the matrix has floating numbers,they are normalized counts.if they are integers, they are raw counts.

In [15]:
print(adata.X)

  (0, 0)	-0.14158162
  (0, 1)	-0.106260136
  (0, 2)	-0.26868093
  (0, 3)	-0.35578898
  (0, 4)	-0.11136355
  (0, 5)	-0.33017373
  (0, 6)	-0.18426721
  (0, 7)	4.358582
  (0, 8)	-0.78549427
  (0, 9)	-0.4563895
  (0, 10)	-0.34002554
  (0, 11)	-0.13974263
  (0, 12)	-0.84755695
  (0, 13)	-0.78929675
  (0, 14)	-0.55167663
  (0, 15)	-0.09099433
  (0, 16)	-0.57618064
  (0, 17)	-0.47136736
  (0, 18)	-0.08862121
  (0, 19)	-0.32893533
  (0, 20)	3.3227012
  (0, 21)	0.22411619
  (0, 22)	-0.5615778
  (0, 23)	-0.36980015
  (0, 24)	-0.8187957
  :	:
  (812, 18477)	-0.13934317
  (812, 18478)	-0.54288554
  (812, 18479)	0.7242452
  (812, 18480)	-0.8066544
  (812, 18481)	-1.2227138
  (812, 18482)	0.22858495
  (812, 18483)	-0.22060402
  (812, 18484)	-0.53886014
  (812, 18485)	-0.4129639
  (812, 18486)	-0.27598333
  (812, 18487)	-0.9820389
  (812, 18488)	-0.14966804
  (812, 18489)	-1.0449147
  (812, 18490)	-0.6746705
  (812, 18491)	-0.17006423
  (812, 18492)	-0.696981
  (812, 18493)	-0.15189923
  (812, 18494)

In [16]:
print(adata.raw.X)

  (0, 23)	1.0
  (0, 43)	1.0
  (0, 44)	1.0
  (0, 53)	3.0
  (0, 57)	1.0
  (0, 58)	4.0
  (0, 59)	1.0
  (0, 60)	1.0
  (0, 73)	2.0
  (0, 78)	3.0
  (0, 86)	2.0
  (0, 97)	1.0
  (0, 106)	1.0
  (0, 115)	1.0
  (0, 126)	1.0
  (0, 134)	3.0
  (0, 138)	1.0
  (0, 145)	1.0
  (0, 168)	1.0
  (0, 170)	2.0
  (0, 176)	1.0
  (0, 184)	3.0
  (0, 208)	2.0
  (0, 219)	3.0
  (0, 237)	3.0
  :	:
  (812, 36410)	1.0
  (812, 36411)	1.0
  (812, 36416)	2.0
  (812, 36429)	1.0
  (812, 36433)	3.0
  (812, 36445)	1.0
  (812, 36450)	15.0
  (812, 36470)	5.0
  (812, 36492)	1.0
  (812, 36494)	2.0
  (812, 36513)	1.0
  (812, 36519)	1.0
  (812, 36559)	21.0
  (812, 36560)	8.0
  (812, 36561)	59.0
  (812, 36562)	62.0
  (812, 36564)	38.0
  (812, 36565)	69.0
  (812, 36566)	19.0
  (812, 36567)	1.0
  (812, 36568)	23.0
  (812, 36569)	2.0
  (812, 36571)	21.0
  (812, 36575)	1.0
  (812, 36584)	2.0


##### **Raw counts matrix**

In [17]:
# If X has normalized counts, check for the raw counts matrix.

In [18]:
#Here the raw counts are provided in a separate object, load the raw counts matrix

In [19]:
#araw= sc.read_10x_h5('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/spatial-skin/Raw/all/WSSKNKCLsp12140270/filtered_feature_bc_matrix.h5')

In [20]:
# view raw object

In [21]:
#araw

In [22]:
# view raw matrix

In [23]:
araw = adata.raw.to_adata()

In [24]:
araw.X

<813x36601 sparse matrix of type '<class 'numpy.float32'>'
	with 3370712 stored elements in Compressed Sparse Row format>

In [25]:
print(araw.X)

  (0, 23)	1.0
  (0, 43)	1.0
  (0, 44)	1.0
  (0, 53)	3.0
  (0, 57)	1.0
  (0, 58)	4.0
  (0, 59)	1.0
  (0, 60)	1.0
  (0, 73)	2.0
  (0, 78)	3.0
  (0, 86)	2.0
  (0, 97)	1.0
  (0, 106)	1.0
  (0, 115)	1.0
  (0, 126)	1.0
  (0, 134)	3.0
  (0, 138)	1.0
  (0, 145)	1.0
  (0, 168)	1.0
  (0, 170)	2.0
  (0, 176)	1.0
  (0, 184)	3.0
  (0, 208)	2.0
  (0, 219)	3.0
  (0, 237)	3.0
  :	:
  (812, 36410)	1.0
  (812, 36411)	1.0
  (812, 36416)	2.0
  (812, 36429)	1.0
  (812, 36433)	3.0
  (812, 36445)	1.0
  (812, 36450)	15.0
  (812, 36470)	5.0
  (812, 36492)	1.0
  (812, 36494)	2.0
  (812, 36513)	1.0
  (812, 36519)	1.0
  (812, 36559)	21.0
  (812, 36560)	8.0
  (812, 36561)	59.0
  (812, 36562)	62.0
  (812, 36564)	38.0
  (812, 36565)	69.0
  (812, 36566)	19.0
  (812, 36567)	1.0
  (812, 36568)	23.0
  (812, 36569)	2.0
  (812, 36571)	21.0
  (812, 36575)	1.0
  (812, 36584)	2.0


In [26]:
# since the raw object is combined one, extract the raw counts for this dataset 

##### **Variables(var)**

In [27]:
#View the var of anndata and raw object

In [28]:
adata.var

AL627309.1
AL627309.5
LINC01409
LINC01128
LINC00115
...
AC240274.1
AC004556.3
AC136616.1
AC007325.4
AC007325.2


In [29]:
araw.var

Unnamed: 0,gene_ids,feature_types,genome
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38
FAM138A,ENSG00000237613,Gene Expression,GRCh38
OR4F5,ENSG00000186092,Gene Expression,GRCh38
AL627309.1,ENSG00000238009,Gene Expression,GRCh38
AL627309.3,ENSG00000239945,Gene Expression,GRCh38
...,...,...,...
AC141272.1,ENSG00000277836,Gene Expression,GRCh38
AC023491.2,ENSG00000278633,Gene Expression,GRCh38
AC007325.1,ENSG00000276017,Gene Expression,GRCh38
AC007325.4,ENSG00000278817,Gene Expression,GRCh38


In [30]:
#Ensembl IDs

In [31]:
gene_info = pd.read_table('/lustre/scratch127/cellgen/cellgeni/shibla/ref_files/2020A.gene_names.tsv')

In [32]:
gene_info

Unnamed: 0,ensembl_ids,gene,version
0,ENSG00000243485,MIR1302-2HG,2020A
1,ENSG00000237613,FAM138A,2020A
2,ENSG00000186092,OR4F5,2020A
3,ENSG00000238009,AL627309.1,2020A
4,ENSG00000239945,AL627309.3,2020A
...,...,...,...
36596,ENSG00000277836,AC141272.1,2020A
36597,ENSG00000278633,AC023491.2,2020A
36598,ENSG00000276017,AC007325.1,2020A
36599,ENSG00000278817,AC007325.4,2020A


In [33]:
#create a dictionary with gene symbols and ensembl ids from the gene information file

In [34]:
gene_info_genesym_to_ensembl = dict(zip(gene_info['gene'],gene_info['ensembl_ids']))

In [35]:
gene_info_genesym_to_ensembl

{'MIR1302-2HG': 'ENSG00000243485',
 'FAM138A': 'ENSG00000237613',
 'OR4F5': 'ENSG00000186092',
 'AL627309.1': 'ENSG00000238009',
 'AL627309.3': 'ENSG00000239945',
 'AL627309.2': 'ENSG00000239906',
 'AL627309.5': 'ENSG00000241860',
 'AL627309.4': 'ENSG00000241599',
 'AP006222.2': 'ENSG00000286448',
 'AL732372.1': 'ENSG00000236601',
 'OR4F29': 'ENSG00000284733',
 'AC114498.1': 'ENSG00000235146',
 'OR4F16': 'ENSG00000284662',
 'AL669831.2': 'ENSG00000229905',
 'LINC01409': 'ENSG00000237491',
 'FAM87B': 'ENSG00000177757',
 'LINC01128': 'ENSG00000228794',
 'LINC00115': 'ENSG00000225880',
 'FAM41C': 'ENSG00000230368',
 'AL645608.6': 'ENSG00000272438',
 'AL645608.2': 'ENSG00000230699',
 'AL645608.4': 'ENSG00000241180',
 'LINC02593': 'ENSG00000223764',
 'SAMD11': 'ENSG00000187634',
 'NOC2L': 'ENSG00000188976',
 'KLHL17': 'ENSG00000187961',
 'PLEKHN1': 'ENSG00000187583',
 'PERM1': 'ENSG00000187642',
 'AL645608.7': 'ENSG00000272512',
 'HES4': 'ENSG00000188290',
 'ISG15': 'ENSG00000187608',
 'AL6

In [36]:
#Store ensembl ids in a new column in adata.var by matching gene symbols and ensembl ids from the gene information file

In [37]:
adata.var['gene_symbols'] = adata.var_names

In [38]:
araw.var['gene_symbols'] = araw.var_names

In [39]:
araw.var_names = araw.var['gene_ids']

In [40]:
adata.var['ensembl_id'] = adata.var['gene_symbols'].map(gene_info_genesym_to_ensembl)

In [41]:
adata.var

Unnamed: 0,gene_symbols,ensembl_id
AL627309.1,AL627309.1,ENSG00000238009
AL627309.5,AL627309.5,ENSG00000241860
LINC01409,LINC01409,ENSG00000237491
LINC01128,LINC01128,ENSG00000228794
LINC00115,LINC00115,ENSG00000225880
...,...,...
AC240274.1,AC240274.1,ENSG00000271254
AC004556.3,AC004556.3,ENSG00000276345
AC136616.1,AC136616.1,ENSG00000273554
AC007325.4,AC007325.4,ENSG00000278817


In [42]:
adata.var['ensembl_id'].isna().sum()

18

In [43]:
adata.var_names = adata.var['ensembl_id']

In [44]:
adata.var

Unnamed: 0_level_0,gene_symbols,ensembl_id
ensembl_id,Unnamed: 1_level_1,Unnamed: 2_level_1
ENSG00000238009,AL627309.1,ENSG00000238009
ENSG00000241860,AL627309.5,ENSG00000241860
ENSG00000237491,LINC01409,ENSG00000237491
ENSG00000228794,LINC01128,ENSG00000228794
ENSG00000225880,LINC00115,ENSG00000225880
...,...,...
ENSG00000271254,AC240274.1,ENSG00000271254
ENSG00000276345,AC004556.3,ENSG00000276345
ENSG00000273554,AC136616.1,ENSG00000273554
ENSG00000278817,AC007325.4,ENSG00000278817


In [45]:
del adata.var['gene_symbols']
del adata.var['ensembl_id']

In [46]:
del araw.var['gene_ids']
del araw.var['feature_types']
del araw.var['genome']
del araw.var['gene_symbols']

In [47]:
adata.var.index= adata.var.index.drop_duplicates()

In [48]:
adata.var

ENSG00000238009
ENSG00000241860
ENSG00000237491
ENSG00000228794
ENSG00000225880
...
ENSG00000271254
ENSG00000276345
ENSG00000273554
ENSG00000278817
ENSG00000277196


In [49]:
araw.var

ENSG00000243485
ENSG00000237613
ENSG00000186092
ENSG00000238009
ENSG00000239945
...
ENSG00000277836
ENSG00000278633
ENSG00000276017
ENSG00000278817
ENSG00000277196


In [50]:
# load the approved genes file

In [51]:
approved_genes = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/gene_info/genes_approved.csv')

In [52]:
# Create a dictionary from the approved genes file using the symbols and feature id columns.

In [53]:
genedict = {key: 1 for key in list(approved_genes.feature_id)}

In [54]:
genedict

{'ERCC-00002': 1,
 'ERCC-00003': 1,
 'ERCC-00004': 1,
 'ERCC-00009': 1,
 'ERCC-00012': 1,
 'ERCC-00013': 1,
 'ERCC-00014': 1,
 'ERCC-00016': 1,
 'ERCC-00017': 1,
 'ERCC-00019': 1,
 'ERCC-00022': 1,
 'ERCC-00024': 1,
 'ERCC-00025': 1,
 'ERCC-00028': 1,
 'ERCC-00031': 1,
 'ERCC-00033': 1,
 'ERCC-00034': 1,
 'ERCC-00035': 1,
 'ERCC-00039': 1,
 'ERCC-00040': 1,
 'ERCC-00041': 1,
 'ERCC-00042': 1,
 'ERCC-00043': 1,
 'ERCC-00044': 1,
 'ERCC-00046': 1,
 'ERCC-00048': 1,
 'ERCC-00051': 1,
 'ERCC-00053': 1,
 'ERCC-00054': 1,
 'ERCC-00057': 1,
 'ERCC-00058': 1,
 'ERCC-00059': 1,
 'ERCC-00060': 1,
 'ERCC-00061': 1,
 'ERCC-00062': 1,
 'ERCC-00067': 1,
 'ERCC-00069': 1,
 'ERCC-00071': 1,
 'ERCC-00073': 1,
 'ERCC-00074': 1,
 'ERCC-00075': 1,
 'ERCC-00076': 1,
 'ERCC-00077': 1,
 'ERCC-00078': 1,
 'ERCC-00079': 1,
 'ERCC-00081': 1,
 'ERCC-00083': 1,
 'ERCC-00084': 1,
 'ERCC-00085': 1,
 'ERCC-00086': 1,
 'ERCC-00092': 1,
 'ERCC-00095': 1,
 'ERCC-00096': 1,
 'ERCC-00097': 1,
 'ERCC-00098': 1,
 'ERCC-000

In [55]:
len(genedict)

116184

In [56]:
# Filter out the genes which are not in the approved genes file

In [57]:
var_to_keep_adata = [x for x in adata.var_names if (x in genedict)]
var_to_keep_araw = [x for x in araw.var_names if (x in genedict)]

In [58]:
len(var_to_keep_adata)

18455

In [59]:
len(var_to_keep_araw)

36503

In [60]:
# Modify the anndata object by filtering out the filtered genes. copy the index column values to a new column called gene_symbols

In [61]:
adata = adata[:, var_to_keep_adata].copy()
araw = araw[:, var_to_keep_araw].copy()

In [62]:
#  View the var

In [63]:
adata.var

ENSG00000238009
ENSG00000241860
ENSG00000237491
ENSG00000228794
ENSG00000225880
...
ENSG00000271254
ENSG00000276345
ENSG00000273554
ENSG00000278817
ENSG00000277196


In [64]:
araw.var

ENSG00000243485
ENSG00000237613
ENSG00000186092
ENSG00000238009
ENSG00000239945
...
ENSG00000277836
ENSG00000278633
ENSG00000276017
ENSG00000278817
ENSG00000277196


feature is filtered

In [65]:
def add_zero():
	global adata
	global araw
	if araw.shape[1] > adata.shape[1]:
		genes_add = [x for x in araw.var.index.to_list() if x not in adata.var.index.to_list()]
		new_matrix = sparse.csr_matrix((adata.X.data, adata.X.indices, adata.X.indptr), shape = araw.shape)
		all_genes = adata.var.index.to_list()
		all_genes.extend(genes_add)
		new_var = pd.DataFrame(index=all_genes)
		new_var = pd.merge(new_var, araw.var, left_index=True, right_index=True, how='left')
		new_var['feature_is_filtered'] = False
		new_var.loc[genes_add, 'feature_is_filtered'] = True
		new_adata = ad.AnnData(X=new_matrix, obs=adata.obs, var=new_var, uns=adata.uns, obsm=adata.obsm)
		if adata.layers:
			for layer in adata.layers:
				new_layer = sparse.csr_matrix((adata.layers[layer].data, adata.layers[layer].indices, adata.layers[layer].indptr), shape = araw.shape)
				new_adata.layers[layer] = new_layer
		new_adata = new_adata[:,araw.var.index.to_list()]
		new_adata.var = new_adata.var.merge(adata.var, left_index=True, right_index=True, how='left')
		adata = new_adata
	else:
		adata.var['feature_is_filtered'] = False

In [66]:
add_zero()

In [67]:
# view var

In [68]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000243485,True
ENSG00000237613,True
ENSG00000186092,True
ENSG00000238009,False
ENSG00000239945,True
...,...
ENSG00000277836,True
ENSG00000278633,True
ENSG00000276017,True
ENSG00000278817,False


In [69]:
list(adata.var['feature_is_filtered'].unique())

[True, False]

In [70]:
True_count = (adata.var['feature_is_filtered']== True).sum()

In [71]:
True_count

18048

In [72]:
araw.var

ENSG00000243485
ENSG00000237613
ENSG00000186092
ENSG00000238009
ENSG00000239945
...
ENSG00000277836
ENSG00000278633
ENSG00000276017
ENSG00000278817
ENSG00000277196


#### **Observations(obs) (Cell metadata)**

In [73]:
#view obs

In [74]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_PTGDS+ fibroblasts,c2l_MigDC,c2l_Neuronal_SchwannC,c2l_SMC,c2l_Skeletal muscle cells,leiden,c2l_Suprabasal keratinocytes,c2l_Basal keratinocytes,c2l_Chondrocytes,c2l_IL8+ DC1
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0.196646,0.003914,0.144389,0.070759,0.017138,0,1.082395,0.327031,0.034997,0.042624
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0.035559,0.002264,0.128930,0.091078,0.081168,0,1.175316,0.281979,0.054190,0.003451
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,0.012436,0.079175,0.013865,0.000971,4.855034,4,12.310648,3.720640,0.004490,0.001909
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,0.177809,0.060277,0.217748,0.019028,0.707736,7,4.451590,1.649659,0.134956,0.092296
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,0.049895,0.011001,0.038003,0.056791,0.102328,3,1.994533,1.576291,0.012748,0.013764
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,0.002728,0.033210,0.004577,0.000079,0.000047,1,0.209446,5.144510,0.003048,0.001604
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,0.138064,0.003570,0.062600,0.034816,0.002648,2,0.655483,1.187766,0.007860,0.027963
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.158557,0.003885,0.052626,0.032617,0.030343,0,0.693258,0.187635,0.017904,0.057686
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,0.017451,0.111877,0.012859,0.000322,4.910613,4,22.481792,4.310461,0.003245,0.016599


In [75]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'n_genes_by_counts',
       'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb',
       'pct_counts_rb', 'barcode', 'c2l_Th', 'c2l_NK', 'c2l_APOD+ fibroblasts',
       'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2', 'c2l_DC1',
       'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes',
       'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc',
       'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes',
       'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts',
       'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC',
       'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes',
       'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1'],
      dtype='object')

#### **assay_ontology_term_id**

In [76]:
# add the assay_ontology_term_id column

In [77]:
adata.obs['assay_ontology_term_id'] = ['EFO:0010961'] * len(adata.obs)

In [78]:
# change datatype of the column

In [79]:
adata.obs['assay_ontology_term_id'] = adata.obs['assay_ontology_term_id'].astype('category')

In [80]:
# view adata.obs

In [81]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_MigDC,c2l_Neuronal_SchwannC,c2l_SMC,c2l_Skeletal muscle cells,leiden,c2l_Suprabasal keratinocytes,c2l_Basal keratinocytes,c2l_Chondrocytes,c2l_IL8+ DC1,assay_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0.003914,0.144389,0.070759,0.017138,0,1.082395,0.327031,0.034997,0.042624,EFO:0010961
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0.002264,0.128930,0.091078,0.081168,0,1.175316,0.281979,0.054190,0.003451,EFO:0010961
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,0.079175,0.013865,0.000971,4.855034,4,12.310648,3.720640,0.004490,0.001909,EFO:0010961
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,0.060277,0.217748,0.019028,0.707736,7,4.451590,1.649659,0.134956,0.092296,EFO:0010961
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,0.011001,0.038003,0.056791,0.102328,3,1.994533,1.576291,0.012748,0.013764,EFO:0010961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,0.033210,0.004577,0.000079,0.000047,1,0.209446,5.144510,0.003048,0.001604,EFO:0010961
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,0.003570,0.062600,0.034816,0.002648,2,0.655483,1.187766,0.007860,0.027963,EFO:0010961
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.003885,0.052626,0.032617,0.030343,0,0.693258,0.187635,0.017904,0.057686,EFO:0010961
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,0.111877,0.012859,0.000322,4.910613,4,22.481792,4.310461,0.003245,0.016599,EFO:0010961


#### **cell_type_ontology_term_id**

In [82]:
#get the column in adata.obs related. to cell type annotation

In [83]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'n_genes_by_counts',
       'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb',
       'pct_counts_rb', 'barcode', 'c2l_Th', 'c2l_NK', 'c2l_APOD+ fibroblasts',
       'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2', 'c2l_DC1',
       'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes',
       'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc',
       'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes',
       'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts',
       'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC',
       'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes',
       'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1',
       'assay_ontology_term_id'],
      dtype='object')

In [84]:
adata.obsm

AxisArrays with keys: X_spatial

In [85]:
c2l_columns = [col for col in adata.obs.columns if col.startswith('c2l')]

In [86]:
c2l_columns

['c2l_Th',
 'c2l_NK',
 'c2l_APOD+ fibroblasts',
 'c2l_CD8+ T RM',
 'c2l_T reg',
 'c2l_Macro1_2',
 'c2l_DC1',
 'c2l_SFRP2+ fibroblasts',
 'c2l_TAGLN+ pericytes',
 'c2l_POSTN+ fibroblasts',
 'c2l_RGS5+ pericytes',
 'c2l_VEC',
 'c2l_Tc',
 'c2l_ILC_NK',
 'c2l_BC',
 'c2l_Monocytes',
 'c2l_MastC',
 'c2l_Melanocytes',
 'c2l_DC2',
 'c2l_LEC',
 'c2l_PlasmaC',
 'c2l_PTGDS+ fibroblasts',
 'c2l_MigDC',
 'c2l_Neuronal_SchwannC',
 'c2l_SMC',
 'c2l_Skeletal muscle cells',
 'c2l_Suprabasal keratinocytes',
 'c2l_Basal keratinocytes',
 'c2l_Chondrocytes',
 'c2l_IL8+ DC1']

In [87]:
adata.obs['max_c2l_column'] = adata.obs[c2l_columns].idxmax(axis=1)

In [88]:
adata.obs['max_c2l_column_value'] = adata.obs[c2l_columns].max(axis=1)

In [89]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_SMC,c2l_Skeletal muscle cells,leiden,c2l_Suprabasal keratinocytes,c2l_Basal keratinocytes,c2l_Chondrocytes,c2l_IL8+ DC1,assay_ontology_term_id,max_c2l_column,max_c2l_column_value
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0.070759,0.017138,0,1.082395,0.327031,0.034997,0.042624,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0.091078,0.081168,0,1.175316,0.281979,0.054190,0.003451,EFO:0010961,c2l_VEC,2.119842
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,0.000971,4.855034,4,12.310648,3.720640,0.004490,0.001909,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,0.019028,0.707736,7,4.451590,1.649659,0.134956,0.092296,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,0.056791,0.102328,3,1.994533,1.576291,0.012748,0.013764,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,0.000079,0.000047,1,0.209446,5.144510,0.003048,0.001604,EFO:0010961,c2l_Basal keratinocytes,5.144510
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,0.034816,0.002648,2,0.655483,1.187766,0.007860,0.027963,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.032617,0.030343,0,0.693258,0.187635,0.017904,0.057686,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,0.000322,4.910613,4,22.481792,4.310461,0.003245,0.016599,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792


In [90]:
mapping= {'c2l_Th' :'CL:0000912',
 'c2l_NK':'CL:0000623',
 'c2l_APOD+ fibroblasts':'CL:0000057',
 'c2l_CD8+ T RM':'CL:0001203' ,
 'c2l_T reg':'CL:0000815',
 'c2l_Macro1_2':'CL:0000235',
 'c2l_DC1':'CL:0000990',
 'c2l_SFRP2+ fibroblasts':'CL:0000057',
 'c2l_TAGLN+ pericytes':'CL:0000669',
 'c2l_POSTN+ fibroblasts':'CL:0000057',
 'c2l_RGS5+ pericytes':'CL:0000669',
 'c2l_VEC':'CL:0002139',
 'c2l_Tc':'CL:0000910',
 'c2l_ILC_NK':'CL:0001065', #not NK
 'c2l_BC':'CL:0000646',
 'c2l_Monocytes':'CL:0000576',
 'c2l_MastC':'CL:0000097',
 'c2l_Melanocytes':'CL:1000458',
 'c2l_DC2':'CL:0000784',
 'c2l_LEC':'CL:0002138',
 'c2l_PlasmaC':'CL:0000786',
 'c2l_PTGDS+ fibroblasts':'CL:0000057',
 'c2l_MigDC':'CL:0000451', #not mig
 'c2l_Neuronal_SchwannC':'CL:0002573',
 'c2l_SMC':'CL:0000192',
 'c2l_Skeletal muscle cells':'CL:0000188',
 'c2l_Suprabasal keratinocytes':'CL:4033013',
 'c2l_Basal keratinocytes':'CL:0002187',
 'c2l_Chondrocytes':'CL:0000138',
 'c2l_IL8+ DC1':'CL:0000990'}

In [91]:
# create a dictionary of cell type and ontology term

In [92]:
# add the cell_type_ontology_term_id column

In [93]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['max_c2l_column'].map(mapping)

In [94]:
# change datatype of the column

In [95]:
adata.obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id'].astype('category')

In [96]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_Skeletal muscle cells,leiden,c2l_Suprabasal keratinocytes,c2l_Basal keratinocytes,c2l_Chondrocytes,c2l_IL8+ DC1,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0.017138,0,1.082395,0.327031,0.034997,0.042624,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0.081168,0,1.175316,0.281979,0.054190,0.003451,EFO:0010961,c2l_VEC,2.119842,CL:0002139
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,4.855034,4,12.310648,3.720640,0.004490,0.001909,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648,CL:4033013
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,0.707736,7,4.451590,1.649659,0.134956,0.092296,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590,CL:4033013
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,0.102328,3,1.994533,1.576291,0.012748,0.013764,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533,CL:4033013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,0.000047,1,0.209446,5.144510,0.003048,0.001604,EFO:0010961,c2l_Basal keratinocytes,5.144510,CL:0002187
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,0.002648,2,0.655483,1.187766,0.007860,0.027963,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701,CL:0000057
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.030343,0,0.693258,0.187635,0.017904,0.057686,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,4.910613,4,22.481792,4.310461,0.003245,0.016599,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792,CL:4033013


In [97]:
list(adata.obs['cell_type_ontology_term_id'].unique())

['CL:0000057',
 'CL:0002139',
 'CL:4033013',
 'CL:0000786',
 'CL:0002187',
 'CL:0000669',
 'CL:0000623',
 'CL:0000815',
 'CL:0002573',
 'CL:0002138']

#### **donor_id**

In [98]:
#identify the column in adata.obs which provides donor information

In [99]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'n_genes_by_counts',
       'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb',
       'pct_counts_rb', 'barcode', 'c2l_Th', 'c2l_NK', 'c2l_APOD+ fibroblasts',
       'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2', 'c2l_DC1',
       'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes',
       'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc',
       'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes',
       'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts',
       'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC',
       'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes',
       'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1',
       'assay_ontology_term_id', 'max_c2l_column', 'max_c2l_column_value',
       'cell_type_ontology_term_id'],
      dtype='object')

In [100]:
# add the donor_id column

In [101]:
adata.obs['donor_id'] = ['WSSKNKCLsp12140270'] * len(adata.obs)

In [102]:
# change datatype of the column

In [103]:
adata.obs['donor_id'] = adata.obs['donor_id'].astype('category')

In [104]:
# view unique values of donor_id column

In [105]:
list(adata.obs['donor_id'].unique())

['WSSKNKCLsp12140270']

In [106]:
#view obs

In [107]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,leiden,c2l_Suprabasal keratinocytes,c2l_Basal keratinocytes,c2l_Chondrocytes,c2l_IL8+ DC1,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0,1.082395,0.327031,0.034997,0.042624,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057,WSSKNKCLsp12140270
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0,1.175316,0.281979,0.054190,0.003451,EFO:0010961,c2l_VEC,2.119842,CL:0002139,WSSKNKCLsp12140270
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,4,12.310648,3.720640,0.004490,0.001909,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648,CL:4033013,WSSKNKCLsp12140270
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,7,4.451590,1.649659,0.134956,0.092296,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590,CL:4033013,WSSKNKCLsp12140270
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,3,1.994533,1.576291,0.012748,0.013764,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533,CL:4033013,WSSKNKCLsp12140270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,1,0.209446,5.144510,0.003048,0.001604,EFO:0010961,c2l_Basal keratinocytes,5.144510,CL:0002187,WSSKNKCLsp12140270
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,2,0.655483,1.187766,0.007860,0.027963,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701,CL:0000057,WSSKNKCLsp12140270
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0,0.693258,0.187635,0.017904,0.057686,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057,WSSKNKCLsp12140270
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,4,22.481792,4.310461,0.003245,0.016599,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792,CL:4033013,WSSKNKCLsp12140270


In [108]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'n_genes_by_counts',
       'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb',
       'pct_counts_rb', 'barcode', 'c2l_Th', 'c2l_NK', 'c2l_APOD+ fibroblasts',
       'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2', 'c2l_DC1',
       'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes',
       'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc',
       'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes',
       'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts',
       'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC',
       'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes',
       'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1',
       'assay_ontology_term_id', 'max_c2l_column', 'max_c2l_column_value',
       'cell_type_ontology_term_id', 'donor_id'],
      dtype='object')

#### **development_stage_ontology_term_id**

In [109]:
# identify the column in adata which corresponds to age

In [110]:
# add the development_stage_ontology_term_id column

In [111]:
suppl_info = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/Spatial_skin_atlas/spatial/suppl_info_spatialskin.csv')

In [112]:
mapping = dict(zip(suppl_info['donor'], suppl_info['development_stage_ontology_term_id']))

In [113]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['donor_id'].map(mapping)

In [114]:
# change datatype of the column

In [115]:
adata.obs['development_stage_ontology_term_id'] = adata.obs['development_stage_ontology_term_id'].astype('category')

In [116]:
# view unique values of development_stage_ontology_term_id column

In [117]:
list(adata.obs['development_stage_ontology_term_id'].unique())

['HsapDv:0000133']

In [118]:
# view adata.obs

In [119]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_Suprabasal keratinocytes,c2l_Basal keratinocytes,c2l_Chondrocytes,c2l_IL8+ DC1,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,1.082395,0.327031,0.034997,0.042624,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,1.175316,0.281979,0.054190,0.003451,EFO:0010961,c2l_VEC,2.119842,CL:0002139,WSSKNKCLsp12140270,HsapDv:0000133
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,12.310648,3.720640,0.004490,0.001909,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,4.451590,1.649659,0.134956,0.092296,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,1.994533,1.576291,0.012748,0.013764,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,0.209446,5.144510,0.003048,0.001604,EFO:0010961,c2l_Basal keratinocytes,5.144510,CL:0002187,WSSKNKCLsp12140270,HsapDv:0000133
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,0.655483,1.187766,0.007860,0.027963,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.693258,0.187635,0.017904,0.057686,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,22.481792,4.310461,0.003245,0.016599,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133


#### **disease_ontology_term_id**

In [120]:
# Assign normal since all are healthy patients

In [121]:
# add the disease_ontology_term_id column

In [122]:
suppl_info = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/Spatial_skin_atlas/spatial/suppl_info_spatialskin.csv')

In [123]:
mapping = dict(zip(suppl_info['donor'], suppl_info['disease_ontology_term_id']))

In [124]:
adata.obs['disease_ontology_term_id'] = adata.obs['donor_id'].map(mapping)

In [125]:
#change data type of column

In [126]:
adata.obs['disease_ontology_term_id'] = adata.obs['disease_ontology_term_id'].astype('category')

In [127]:
# view obs

In [128]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_Basal keratinocytes,c2l_Chondrocytes,c2l_IL8+ DC1,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0.327031,0.034997,0.042624,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0.281979,0.054190,0.003451,EFO:0010961,c2l_VEC,2.119842,CL:0002139,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,3.720640,0.004490,0.001909,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,1.649659,0.134956,0.092296,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,1.576291,0.012748,0.013764,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,5.144510,0.003048,0.001604,EFO:0010961,c2l_Basal keratinocytes,5.144510,CL:0002187,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,1.187766,0.007860,0.027963,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.187635,0.017904,0.057686,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,4.310461,0.003245,0.016599,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804


#### **is_primary_data**

In [129]:
adata.obs['is_primary_data'] = [True] * len(adata.obs)

In [130]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_Chondrocytes,c2l_IL8+ DC1,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0.034997,0.042624,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0.054190,0.003451,EFO:0010961,c2l_VEC,2.119842,CL:0002139,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,0.004490,0.001909,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,0.134956,0.092296,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,0.012748,0.013764,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,0.003048,0.001604,EFO:0010961,c2l_Basal keratinocytes,5.144510,CL:0002187,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,0.007860,0.027963,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.017904,0.057686,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,0.003245,0.016599,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True


In [131]:
#change data type of column

In [132]:
adata.obs['is_primary_data'] = adata.obs['is_primary_data'].astype('bool')

#### **organism_ontology_term_id**

In [133]:
# assign organism id 

In [134]:
adata.obs['organism_ontology_term_id'] = ['NCBITaxon:9606'] * len(adata.obs)

In [135]:
#change data type of column

In [136]:
adata.obs['organism_ontology_term_id'] = adata.obs['organism_ontology_term_id'].astype('category')

In [137]:
# view obs

In [138]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,c2l_IL8+ DC1,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,0.042624,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,0.003451,EFO:0010961,c2l_VEC,2.119842,CL:0002139,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,0.001909,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,0.092296,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,0.013764,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,0.001604,EFO:0010961,c2l_Basal keratinocytes,5.144510,CL:0002187,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,0.027963,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,0.057686,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,0.016599,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606


#### **self_reported_ethnicity_ontology_term_id**

In [139]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = ['unknown'] * len(adata.obs)

In [140]:
# change data type

In [141]:
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('category')

In [142]:
# view obs

In [143]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,assay_ontology_term_id,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,EFO:0010961,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,EFO:0010961,c2l_VEC,2.119842,CL:0002139,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,EFO:0010961,c2l_Suprabasal keratinocytes,12.310648,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,EFO:0010961,c2l_Suprabasal keratinocytes,4.451590,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,EFO:0010961,c2l_Suprabasal keratinocytes,1.994533,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,EFO:0010961,c2l_Basal keratinocytes,5.144510,CL:0002187,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,EFO:0010961,c2l_POSTN+ fibroblasts,3.287701,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,EFO:0010961,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,EFO:0010961,c2l_Suprabasal keratinocytes,22.481792,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown


In [144]:
list(adata.obs['self_reported_ethnicity_ontology_term_id'].unique())

['unknown']

#### **sex_ontology_term_id**

In [145]:
suppl_info = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/Spatial_skin_atlas/spatial/suppl_info_spatialskin.csv')

In [146]:
mapping = dict(zip(suppl_info['donor'], suppl_info['sex_ontology_term_id']))

In [147]:
adata.obs['sex_ontology_term_id'] = adata.obs['donor_id'].map(mapping)

In [148]:
# change data type

In [149]:
adata.obs['sex_ontology_term_id'] = adata.obs['sex_ontology_term_id'].astype('category')

In [150]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,max_c2l_column,max_c2l_column_value,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,c2l_SFRP2+ fibroblasts,6.016631,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,c2l_VEC,2.119842,CL:0002139,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,c2l_Suprabasal keratinocytes,12.310648,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,c2l_Suprabasal keratinocytes,4.451590,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,c2l_Suprabasal keratinocytes,1.994533,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,c2l_Basal keratinocytes,5.144510,CL:0002187,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,c2l_POSTN+ fibroblasts,3.287701,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,c2l_SFRP2+ fibroblasts,6.602987,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,c2l_Suprabasal keratinocytes,22.481792,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384


#### **suspension_type**

In [151]:
# since visium suspension type is 'na'

In [152]:
adata.obs['suspension_type'] = ['na'] * len(adata.obs)

In [153]:
# change data type

In [154]:
adata.obs['suspension_type'] = adata.obs['suspension_type'].astype('category')

In [155]:
# view obs

In [156]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,max_c2l_column_value,cell_type_ontology_term_id,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,6.016631,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,2.119842,CL:0002139,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,12.310648,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,4.451590,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,1.994533,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,5.144510,CL:0002187,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,3.287701,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,6.602987,CL:0000057,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,22.481792,CL:4033013,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na


#### **tissue_type**

In [157]:
adata.obs['tissue_type'] = ['tissue'] * len(adata.obs)

In [158]:
adata.obs['tissue_type'] = adata.obs['tissue_type'].astype('category')

#### **tissue_ontology_term_id**

In [159]:
suppl_info = pd.read_csv('/home/jovyan/CXG_DATASETS_PORTAL/Spatial_skin_atlas/spatial/suppl_info_spatialskin.csv')

In [160]:
mapping = dict(zip(suppl_info['donor'], suppl_info['tissue_ontology_term_id']))

In [161]:
adata.obs['tissue_ontology_term_id'] = adata.obs['donor_id'].map(mapping)

In [162]:
adata.obs['tissue_ontology_term_id'] = adata.obs['tissue_ontology_term_id'].astype('category')

In [163]:
list(adata.obs['tissue_ontology_term_id'].unique())

['UBERON:0008803']

In [164]:
# view obs

In [165]:
adata.obs

Unnamed: 0_level_0,in_tissue,array_row,array_col,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,barcode,...,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,35,79,2901,6221,112,1.800354,632,10.159139,AAACGAGACGGTTGAT-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAACTGCTGGCTCCAA-1,1,45,67,1254,1980,111,5.606061,214,10.808082,AAACTGCTGGCTCCAA-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAAGGGATGTAGCAAG-1,1,24,62,5032,17380,442,2.543153,1617,9.303798,AAAGGGATGTAGCAAG-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAAGTTGACTCCCGTA-1,1,42,96,2113,4404,251,5.699364,662,15.031789,AAAGTTGACTCCCGTA-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAATACCTATAAGCAT-1,1,47,69,888,1353,93,6.873614,176,13.008129,AAATACCTATAAGCAT-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,30,76,6681,18322,310,1.691955,2217,12.100207,TTGGGCGGCGGTTGCC-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTAAGGCCAGTTGG-1,1,27,81,4018,8698,100,1.149690,1029,11.830306,TTGTAAGGCCAGTTGG-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTAATCCGTACTCG-1,1,35,55,3670,12288,130,1.057943,901,7.332357,TTGTAATCCGTACTCG-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTTCAGTGTGCTAC-1,1,24,64,5885,23806,705,2.961438,2600,10.921617,TTGTTCAGTGTGCTAC-1,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803


In [166]:
adata.obs.columns

Index(['in_tissue', 'array_row', 'array_col', 'n_genes_by_counts',
       'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb',
       'pct_counts_rb', 'barcode', 'c2l_Th', 'c2l_NK', 'c2l_APOD+ fibroblasts',
       'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2', 'c2l_DC1',
       'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes',
       'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc',
       'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes',
       'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts',
       'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC',
       'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes',
       'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1',
       'assay_ontology_term_id', 'max_c2l_column', 'max_c2l_column_value',
       'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 

In [167]:
del adata.obs['barcode']
del adata.obs['max_c2l_column']
del adata.obs['max_c2l_column_value']
del adata.obs['array_row']
del adata.obs['array_col']

#### **obsm (Embeddings)**

In [168]:
adata.obsm

AxisArrays with keys: X_spatial

In [169]:
adata.obsm.keys()

KeysView(AxisArrays with keys: X_spatial)

#### **uns (Dataset Metadata)**

In [170]:
adata.uns

OverloadedDict, wrapping:
	{'leiden': {'params': {'n_iterations': -1, 'random_state': 0, 'resolution': 1}}, 'log1p': {}, 'neighbors': {'connectivities_key': 'connectivities', 'distances_key': 'distances', 'params': {'method': 'umap', 'metric': 'euclidean', 'n_neighbors': 15, 'random_state': 0}}, 'pca': {'params': {'use_highly_variable': False, 'zero_center': True}, 'variance': array([583.29565 , 193.31863 , 136.79582 ,  75.48066 ,  58.14236 ,
        56.418648,  45.823303,  41.865646,  40.302658,  39.336243,
        38.66031 ,  38.22584 ,  37.676617,  37.498497,  37.135212,
        37.077515,  36.7297  ,  36.5181  ,  36.448246,  36.17613 ,
        36.11147 ,  36.019226,  35.966793,  35.810246,  35.72352 ,
        35.587204,  35.44174 ,  35.35613 ,  35.260666,  35.213608,
        35.158707,  35.08975 ,  35.036385,  34.943695,  34.7785  ,
        34.76162 ,  34.716442,  34.6357  ,  34.553642,  34.445824,
        34.419483,  34.34197 ,  34.282284,  34.21541 ,  34.162838,
        34.131287

In [171]:
adata.uns['image_caption'] = 'Shown here is an image of 10 μm thick slice of the skin from the cheek stained with H&E'

In [172]:
adata.uns['title'] = 'Visium spatial - bcc_face_cheek1'

In [173]:
adata.uns['default_embedding'] = 'X_spatial'

In [174]:
adata.uns.keys()

dict_keys(['leiden', 'log1p', 'neighbors', 'pca', 'spatial', 'umap', 'image_caption', 'title', 'default_embedding'])

### **Final checks and adjustments**

In [175]:
adata

AnnData object with n_obs × n_vars = 813 × 36503
    obs: 'in_tissue', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb', 'pct_counts_rb', 'c2l_Th', 'c2l_NK', 'c2l_APOD+ fibroblasts', 'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2', 'c2l_DC1', 'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes', 'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc', 'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes', 'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts', 'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC', 'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes', 'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'donor_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'tissue_ty

In [176]:
adata.obs.dtypes

in_tissue                                      int64
n_genes_by_counts                              int32
total_counts                                   int32
total_counts_mt                                int32
pct_counts_mt                                float32
total_counts_rb                                int32
pct_counts_rb                                float32
c2l_Th                                       float64
c2l_NK                                       float64
c2l_APOD+ fibroblasts                        float64
c2l_CD8+ T RM                                float64
c2l_T reg                                    float64
c2l_Macro1_2                                 float64
c2l_DC1                                      float64
c2l_SFRP2+ fibroblasts                       float64
c2l_TAGLN+ pericytes                         float64
c2l_POSTN+ fibroblasts                       float64
c2l_RGS5+ pericytes                          float64
c2l_VEC                                      f

In [177]:
dty = pd.DataFrame(adata.var.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.var[c] = adata.var[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.var[c] = adata.var[c].astype('int32') 
    print(f"changed {c} from int64 to int32")

In [178]:
dty = pd.DataFrame(adata.obs.dtypes, columns = ['dtype'])
for c in dty[dty['dtype'] == 'float64'].index.values:
    adata.obs[c] = adata.obs[c].astype('float32')
    print(f"changed {c} from float64 to float32")
for c in dty[dty['dtype'] == 'int64'].index.values:
    adata.obs[c] = adata.obs[c].astype('int32') 
    print(f"changed {c} from int64 to int32")
for c in dty[dty['dtype'] == 'object'].index.values:
    adata.obs[c] = adata.obs[c].astype('category') 
    print(f"changed {c} from object to category")

changed c2l_Th from float64 to float32
changed c2l_NK from float64 to float32
changed c2l_APOD+ fibroblasts from float64 to float32
changed c2l_CD8+ T RM from float64 to float32
changed c2l_T reg from float64 to float32
changed c2l_Macro1_2 from float64 to float32
changed c2l_DC1 from float64 to float32
changed c2l_SFRP2+ fibroblasts from float64 to float32
changed c2l_TAGLN+ pericytes from float64 to float32
changed c2l_POSTN+ fibroblasts from float64 to float32
changed c2l_RGS5+ pericytes from float64 to float32
changed c2l_VEC from float64 to float32
changed c2l_Tc from float64 to float32
changed c2l_ILC_NK from float64 to float32
changed c2l_BC from float64 to float32
changed c2l_Monocytes from float64 to float32
changed c2l_MastC from float64 to float32
changed c2l_Melanocytes from float64 to float32
changed c2l_DC2 from float64 to float32
changed c2l_LEC from float64 to float32
changed c2l_PlasmaC from float64 to float32
changed c2l_PTGDS+ fibroblasts from float64 to float32
chan

In [179]:
adata.obs

Unnamed: 0_level_0,in_tissue,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,c2l_Th,c2l_NK,c2l_APOD+ fibroblasts,...,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,2901,6221,112,1.800354,632,10.159139,0.031733,0.040995,0.067614,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAACTGCTGGCTCCAA-1,1,1254,1980,111,5.606061,214,10.808082,0.013743,0.093198,0.426036,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAAGGGATGTAGCAAG-1,1,5032,17380,442,2.543153,1617,9.303798,0.008600,0.003021,0.001483,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAAGTTGACTCCCGTA-1,1,2113,4404,251,5.699364,662,15.031789,0.068089,0.372738,0.367517,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAATACCTATAAGCAT-1,1,888,1353,93,6.873614,176,13.008129,0.019409,0.157548,0.267281,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,6681,18322,310,1.691955,2217,12.100207,0.001034,0.008258,0.001664,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTAAGGCCAGTTGG-1,1,4018,8698,100,1.149690,1029,11.830306,0.020429,0.051064,0.129155,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTAATCCGTACTCG-1,1,3670,12288,130,1.057943,901,7.332357,0.026250,0.153597,0.103507,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTTCAGTGTGCTAC-1,1,5885,23806,705,2.961438,2600,10.921617,0.023592,0.007000,0.001506,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803


In [180]:
adata.obs.columns

Index(['in_tissue', 'n_genes_by_counts', 'total_counts', 'total_counts_mt',
       'pct_counts_mt', 'total_counts_rb', 'pct_counts_rb', 'c2l_Th', 'c2l_NK',
       'c2l_APOD+ fibroblasts', 'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2',
       'c2l_DC1', 'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes',
       'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc',
       'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes',
       'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts',
       'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC',
       'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes',
       'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1',
       'assay_ontology_term_id', 'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology

In [181]:
adata.var

Unnamed: 0,feature_is_filtered
ENSG00000243485,True
ENSG00000237613,True
ENSG00000186092,True
ENSG00000238009,False
ENSG00000239945,True
...,...
ENSG00000277836,True
ENSG00000278633,True
ENSG00000276017,True
ENSG00000278817,False


In [182]:
adata.obs

Unnamed: 0_level_0,in_tissue,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_rb,pct_counts_rb,c2l_Th,c2l_NK,c2l_APOD+ fibroblasts,...,donor_id,development_stage_ontology_term_id,disease_ontology_term_id,is_primary_data,organism_ontology_term_id,self_reported_ethnicity_ontology_term_id,sex_ontology_term_id,suspension_type,tissue_type,tissue_ontology_term_id
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACGAGACGGTTGAT-1,1,2901,6221,112,1.800354,632,10.159139,0.031733,0.040995,0.067614,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAACTGCTGGCTCCAA-1,1,1254,1980,111,5.606061,214,10.808082,0.013743,0.093198,0.426036,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAAGGGATGTAGCAAG-1,1,5032,17380,442,2.543153,1617,9.303798,0.008600,0.003021,0.001483,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAAGTTGACTCCCGTA-1,1,2113,4404,251,5.699364,662,15.031789,0.068089,0.372738,0.367517,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
AAATACCTATAAGCAT-1,1,888,1353,93,6.873614,176,13.008129,0.019409,0.157548,0.267281,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGGCGGCGGTTGCC-1,1,6681,18322,310,1.691955,2217,12.100207,0.001034,0.008258,0.001664,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTAAGGCCAGTTGG-1,1,4018,8698,100,1.149690,1029,11.830306,0.020429,0.051064,0.129155,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTAATCCGTACTCG-1,1,3670,12288,130,1.057943,901,7.332357,0.026250,0.153597,0.103507,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803
TTGTTCAGTGTGCTAC-1,1,5885,23806,705,2.961438,2600,10.921617,0.023592,0.007000,0.001506,...,WSSKNKCLsp12140270,HsapDv:0000133,MONDO:0020804,True,NCBITaxon:9606,unknown,PATO:0000384,na,tissue,UBERON:0008803


In [183]:
adata.obs.columns

Index(['in_tissue', 'n_genes_by_counts', 'total_counts', 'total_counts_mt',
       'pct_counts_mt', 'total_counts_rb', 'pct_counts_rb', 'c2l_Th', 'c2l_NK',
       'c2l_APOD+ fibroblasts', 'c2l_CD8+ T RM', 'c2l_T reg', 'c2l_Macro1_2',
       'c2l_DC1', 'c2l_SFRP2+ fibroblasts', 'c2l_TAGLN+ pericytes',
       'c2l_POSTN+ fibroblasts', 'c2l_RGS5+ pericytes', 'c2l_VEC', 'c2l_Tc',
       'c2l_ILC_NK', 'c2l_BC', 'c2l_Monocytes', 'c2l_MastC', 'c2l_Melanocytes',
       'c2l_DC2', 'c2l_LEC', 'c2l_PlasmaC', 'c2l_PTGDS+ fibroblasts',
       'c2l_MigDC', 'c2l_Neuronal_SchwannC', 'c2l_SMC',
       'c2l_Skeletal muscle cells', 'leiden', 'c2l_Suprabasal keratinocytes',
       'c2l_Basal keratinocytes', 'c2l_Chondrocytes', 'c2l_IL8+ DC1',
       'assay_ontology_term_id', 'cell_type_ontology_term_id', 'donor_id',
       'development_stage_ontology_term_id', 'disease_ontology_term_id',
       'is_primary_data', 'organism_ontology_term_id',
       'self_reported_ethnicity_ontology_term_id', 'sex_ontology

In [184]:
#check the format of expression matrix

In [185]:
adata.X

<813x36503 sparse matrix of type '<class 'numpy.float32'>'
	with 15003915 stored elements in Compressed Sparse Row format>

In [186]:
araw.X

<813x36503 sparse matrix of type '<class 'numpy.float32'>'
	with 3368666 stored elements in Compressed Sparse Row format>

In [187]:
#Copy raw counts to adata.raw

In [188]:
adata.raw = araw

In [189]:
#write the curated object to final_objects folder

In [190]:
adata.write('/lustre/scratch127/cellgen/cellgeni/cxgportal_sets/spatial-skin/Final_objects/bcc_face_cheek1.h5ad', compression = 'gzip')