# Import packages and data 

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import scipy.stats
import anndata
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.axes._axes import _log as matplotlib_axes_logger
from scipy import sparse
matplotlib_axes_logger.setLevel('ERROR')
import warnings
warnings.filterwarnings('ignore')

  from pandas.core.index import RangeIndex


In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
sc.settings.verbosity = 1  # verbosity: errors (0), warnings (1), info (2), hints (3)
# Set up the plot config for viewing the annotation clearly.
sc.settings.set_figure_params(dpi=120, dpi_save=1000)
sc.logging.print_versions()

scanpy==1.4.4 anndata==0.7.1 umap==0.3.10 numpy==1.17.1 scipy==1.4.1 pandas==1.0.5 scikit-learn==0.22.2.post1 statsmodels==0.11.1 python-igraph==0.8.0 louvain==0.6.1


# Load FBM transformed dataset

In [4]:
adata = sc.read("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/data/fig1b_fbm_scaled_gex_updated_dr_20210104.h5ad")

In [5]:
cell_numbers = adata.obs.groupby(["broad_fig1_cell.labels"]).apply(len)
cell_numbers

broad_fig1_cell.labels
HSC_MPP          3795
erythroid       26407
MK               2624
B_lineage       28583
DC               2459
eo/baso/mast     1108
neutrophil      10486
monocyte        20038
T_NK             1349
stroma           6379
dtype: int64

In [6]:
cell_numbers = adata.obs.groupby(["fetal.ids", "broad_fig1_cell.labels"]).apply(len)
pd.DataFrame(cell_numbers).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/resources_for_pipelines/fig1d_barplot_nrs_20210104.csv")
cell_numbers

fetal.ids           broad_fig1_cell.labels
F21_male_16+2PCW    HSC_MPP                     214
                    erythroid                   569
                    MK                          228
                    B_lineage                  1085
                    DC                          118
                    eo/baso/mast                 48
                    neutrophil                   94
                    monocyte                    557
                    T_NK                         50
                    stroma                      179
F29_female_17+0PCW  HSC_MPP                     458
                    erythroid                  1113
                    MK                          271
                    B_lineage                  3532
                    DC                          484
                    eo/baso/mast                110
                    neutrophil                  354
                    monocyte                   1453
                    T

In [7]:
cell_numbers = adata.obs.groupby(["fetal.ids", "cell.labels"]).apply(len)
pd.DataFrame(cell_numbers).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/resources_for_pipelines/fig1d_barplot_nrs_refined_20210104.csv")
cell_numbers

fetal.ids         cell.labels       
F21_male_16+2PCW  CD4 T cell             12
                  CD8 T cell             13
                  CD14 monocyte         217
                  CD56 bright NK          6
                  CMP                    19
                                       ... 
SB19PCW           promyelocyte          472
                  schwann cells           8
                  sinusoidal EC          42
                  stromal macrophage     35
                  tip EC                104
Length: 529, dtype: int64

In [8]:
adata.obs["fetal.ids"].unique().tolist()

['F21_male_16+2PCW',
 'F29_female_17+0PCW',
 'F30_male_14+3PCW',
 'F38_male_12PCW',
 'F41_female_16PCW',
 'F45_female_13+6PCW',
 'F50_female_15PCW',
 'F51_female_15PCW',
 'SB19PCW']

In [9]:
fetal_ids = [
 'F38_male_12PCW', 'F45_female_13+6PCW',
 'F30_male_14+3PCW','F50_female_15PCW', 'F51_female_15PCW',
 'F41_female_16PCW', 'F21_male_16+2PCW', 'F29_female_17+0PCW', 
'SB19PCW']

stage_ids = ["Stage 1", "Stage 1", 
            "Stage 2", "Stage 2", "Stage 2", 
             "Stage 3", "Stage 3", "Stage 3", 
            "Stage 4"]

adata.obs["stage"] = adata.obs["fetal.ids"].replace(fetal_ids, stage_ids)

In [10]:
pd.DataFrame(adata.obs).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/resources_for_pipelines/fig1d_barplot_meta_20210104.csv")

In [11]:
adata.write("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs_mk2/data/fig1b_fbm_scaled_gex_updated_dr_20210104.h5ad")

... storing 'stage' as categorical
