# Import packages and data 

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import scipy.stats
import anndata
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.axes._axes import _log as matplotlib_axes_logger
from scipy import sparse
matplotlib_axes_logger.setLevel('ERROR')
# silence NumbaPerformanceWarning
import warnings
from numba.errors import NumbaPerformanceWarning
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)

  from pandas.core.index import RangeIndex


In [2]:
sc.settings.verbosity = 1  # verbosity: errors (0), warnings (1), info (2), hints (3)
# Set up the plot config for viewing the annotation clearly.
sc.settings.set_figure_params(dpi=120, dpi_save=1000)
sc.logging.print_versions()

scanpy==1.4.4 anndata==0.7.1 umap==0.3.10 numpy==1.17.1 scipy==1.4.1 pandas==1.0.5 scikit-learn==0.22.2.post1 statsmodels==0.11.1 python-igraph==0.8.0 louvain==0.6.1


# Import the fetal liver lymphoid compartment

In [3]:
adata = sc.read('/Users/b8058304/Documents/PhD_work/Coding/liver/data/fetal_liver_alladata_Copy1.h5ad')

In [4]:
adata

AnnData object with n_obs × n_vars = 113063 × 27080 
    obs: 'nGene', 'nUMI', 'orig.ident', 'percent.mito', 'fetal.ids', 'sort.ids', 'tissue', 'lanes', 'stages', 'sample.type', 'gender', 'AnnatomicalPart', 'doublets', 'cell.labels', 'combined.labels', 'batch', 'sample', 'n_counts', 'cell.labels_progen'
    obsm: 'X_fdg', 'X_tsne', 'X_umap'

In [5]:
cell_numbers = adata.obs.groupby(["cell.labels"]).apply(len)
cell_numbers

cell.labels
B cell                            1079
DC1                                336
DC2                               3954
DC precursor                       330
Early Erythroid                  11985
Early lymphoid_T lymphocyte        767
Endothelial cell                  3348
Fibroblast                        1713
HSC_MPP                           3439
Hepatocyte                        2479
ILC precursor                     1726
Kupffer Cell                     24841
Late Erythroid                    3180
MEMP                              1342
Mast cell                         1308
Megakaryocyte                     3983
Mid Erythroid                    27000
Mono-Mac                          6590
Monocyte                          2586
Monocyte precursor                 350
NK                                6706
Neutrophil-myeloid progenitor      658
Pre pro B cell                     234
VCAM1+ EI macrophage               161
pDC precursor                      253
pre-B cell   

In [6]:
from scipy import sparse
array_vals = adata.X
adata.X = sparse.csr_matrix(adata.X)
adata.raw = adata
adata.X = array_vals

In [7]:
adata = adata[adata.obs['cell.labels'].isin(['pre-B cell', 'pro-B cell', 'B cell', 'Pre pro B cell'])].copy()

In [8]:
adata.obs["dataset"] = "liver"

In [9]:
cell_numbers = adata.obs.groupby(["fetal.ids", "cell.labels"]).apply(len)
pd.DataFrame(cell_numbers).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/resources_for_pipelines/liver_lymphoid_nrs_by_sample_20210115.csv")
cell_numbers

fetal.ids           cell.labels   
F16_male_8+1PCW     B cell              3
                    Pre pro B cell      2
                    pre-B cell          6
                    pro-B cell          3
F17_male_9+1PCW     B cell              3
                    pre-B cell          5
                    pro-B cell          6
F21_male_16+2PCW    B cell            117
                    Pre pro B cell     22
                    pre-B cell        118
                    pro-B cell        109
F22_female_9+5PCW   B cell             33
                    Pre pro B cell     14
                    pre-B cell         52
                    pro-B cell         83
F23_male_11+3PCW    B cell             38
                    Pre pro B cell     24
                    pre-B cell         84
                    pro-B cell         91
F29_female_17+0PCW  B cell            152
                    Pre pro B cell     12
                    pre-B cell         87
                    pro-B cell         91

In [10]:
liver = adata

# Import the FBM lymphoid compartment

In [11]:
adata = sc.read('/Users/b8058304/Documents/PhD_work/Coding/bm_plus_19pcw/data/bm_plus_19pcw_raw_dr_20201007.h5ad')

In [12]:
adata

AnnData object with n_obs × n_vars = 103228 × 33712 
    obs: 'cell.labels', 'doublets', 'fetal.ids', 'gender', 'is_doublet', 'is_doublet_poptrim', 'is_doublet_wolock', 'lanes', 'nGene', 'nUMI', 'orig.ident', 'percent.mito', 'processing.type', 'scrublet_cluster_score', 'scrublet_score', 'sequencing.type', 'sort.ids', 'april_cell.labels', 'cell.labels_20200708', 'cell.labels_20200713', 'cell.labels_20200718', 'nk_meta', 'mito.threshold'
    var: 'gene_ids-1', 'feature_types-1'
    obsm: 'X_orig_pca', 'X_pca', 'X_umap'

In [13]:
cell_numbers = adata.obs.groupby(["cell.labels"]).apply(len)
cell_numbers

cell.labels
CD4 T cell             327
CD8 T cell             171
CD14 monocyte         8763
CD56 bright NK         449
CMP                    425
                      ... 
schwann cells            9
sinusoidal EC          550
stromal macrophage    1464
tDC                    193
tip EC                 362
Length: 64, dtype: int64

In [14]:
from scipy import sparse
array_vals = adata.X
adata.X = sparse.csr_matrix(adata.X)
adata.raw = adata
adata.X = array_vals

In [15]:
adata = adata[adata.obs['cell.labels'].isin(["pre pro B progenitor", "pro B progenitor", "pre B progenitor", 
                                            "immature B cell", "naive B cell"])].copy()

In [16]:
cell_numbers = adata.obs.groupby(["cell.labels"]).apply(len)
cell_numbers

cell.labels
immature B cell          1988
naive B cell             1411
pre B progenitor        14229
pre pro B progenitor     5427
pro B progenitor         5528
dtype: int64

In [17]:
adata.obs["dataset"] = "FBM"

In [18]:
cell_numbers = adata.obs.groupby(["fetal.ids", "cell.labels"]).apply(len)
pd.DataFrame(cell_numbers).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/resources_for_pipelines/fbm_lymphoid_nrs_by_sample_20210115.csv")
cell_numbers

fetal.ids           cell.labels         
F21_male_16+2PCW    immature B cell            5
                    naive B cell              34
                    pre B progenitor         597
                    pre pro B progenitor     383
                    pro B progenitor          66
F29_female_17+0PCW  immature B cell           26
                    naive B cell             194
                    pre B progenitor        2481
                    pre pro B progenitor     717
                    pro B progenitor         114
F30_male_14+3PCW    immature B cell           14
                    naive B cell              55
                    pre B progenitor        1251
                    pre pro B progenitor     944
                    pro B progenitor         110
F38_male_12PCW      immature B cell           22
                    naive B cell               9
                    pre B progenitor         328
                    pre pro B progenitor     147
                    pro B pr

In [19]:
fbm = adata

# Import the ABM lymphoid compartment

In [20]:
adata = sc.read('/Users/b8058304/Documents/PhD_work/Coding/adult_bm/data/abm_raw_dr_20200717.h5ad')

In [21]:
adata

AnnData object with n_obs × n_vars = 142026 × 33694 
    obs: 'sample', 'lanes', 'sex', 'age', 'ethnicity', 'mad_prd', 'auto_prd', 'cell.labels', 'cell.labels2', 'leiden', 'percent.mito', 'nGene', 'nUMI', 'cell.labels_july', 'cell.labels_20200717'
    var: 'gene_ids'
    obsm: 'X_orig_pca', 'X_pca', 'X_umap'

In [22]:
cell_numbers = adata.obs.groupby(["cell.labels"]).apply(len)
cell_numbers

cell.labels
CD14 monocyte             3670
CD16 monocyte             1938
CD56 bright NK            1228
CLP                        882
CMP                        288
DC1                        135
DC2                        481
DC3                        550
DC precursor               462
HSC                        497
LMPP                        80
MEMP                       785
MK                         577
MOP                       1440
MPP                        365
Treg                      6327
early MK                   136
early erythroid           5441
erythroid macrophage        77
immature B cell           2728
late erythroid            1150
mature CD8 T cell        15725
mature NK                 6074
memory B cell             4106
memory CD4 T cell        22197
mid erythroid             2192
monocyte-DC                515
myelocyte                 6675
myeloid DC progenitor      110
naive B cell             19265
naive CD4 T cell          5873
naive CD8 T cell          8

In [23]:
from scipy import sparse
array_vals = adata.X
adata.X = sparse.csr_matrix(adata.X)
adata.raw = adata
adata.X = array_vals

In [24]:
adata = adata[adata.obs['cell.labels'].isin(["pro B progenitor", "pre B cell", "immature B cell", 
                                            "naive B cell", "transitional B cell", "memory B cell", 
                                            "plasma cell"])].copy()

In [25]:
cell_numbers = adata.obs.groupby(["cell.labels"]).apply(len)
cell_numbers

cell.labels
immature B cell         2728
memory B cell           4106
naive B cell           19265
plasma cell             2074
pre B cell               971
pro B progenitor        1390
transitional B cell     2151
dtype: int64

In [26]:
adata.obs["dataset"] = "ABM"

In [27]:
cell_numbers = adata.obs.groupby(["sample", "cell.labels"]).apply(len)
pd.DataFrame(cell_numbers).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/resources_for_pipelines/abm_lymphoid_nrs_by_sample_20210115.csv")
cell_numbers

sample     cell.labels        
MantonBM1  immature B cell        1314
           memory B cell           783
           naive B cell           4175
           plasma cell             447
           pre B cell              512
           pro B progenitor        468
           transitional B cell     738
MantonBM2  immature B cell         853
           memory B cell          1295
           naive B cell           8273
           plasma cell             522
           pre B cell              213
           pro B progenitor        347
           transitional B cell     843
MantonBM5  immature B cell         213
           memory B cell          1186
           naive B cell           4214
           plasma cell             665
           pre B cell              132
           pro B progenitor        262
           transitional B cell     366
MantonBM6  immature B cell         348
           memory B cell           842
           naive B cell           2603
           plasma cell           

In [28]:
abm = adata