# Import packages and data 

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import scipy.stats
import anndata
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.axes._axes import _log as matplotlib_axes_logger
from scipy import sparse
matplotlib_axes_logger.setLevel('ERROR')
import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
sc.settings.verbosity = 1  # verbosity: errors (0), warnings (1), info (2), hints (3)
# Set up the plot config for viewing the annotation clearly.
sc.settings.set_figure_params(dpi=120, dpi_save=1000)
sc.logging.print_versions()

scanpy==1.4.4 anndata==0.7.1 umap==0.3.10 numpy==1.18.1 scipy==1.4.1 pandas==0.23.4 scikit-learn==0.22.2.post1 statsmodels==0.11.1 python-igraph==0.8.0 louvain==0.6.1


# Load FBM transformed dataset

In [4]:
adata = sc.read("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs/data/fig1c_fbm_scaled_gex_updated_dr_20200428.h5ad")

In [5]:
cell_numbers = adata.obs.groupby(["broad_fig1_cell.labels"]).apply(len)
cell_numbers

broad_fig1_cell.labels
HSC_MPP          7984
erythroid       26591
MK               2701
B_lineage       28613
DC               2460
eo/baso/mast     1112
neutrophil      10292
monocyte        16733
T_NK             1440
stroma           6726
dtype: int64

In [6]:
cell_numbers = adata.obs.groupby(["fetal.ids", "broad_fig1_cell.labels"]).apply(len)
pd.DataFrame(cell_numbers).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs/resources_for_pipelines/fig1d_barplot_nrs_20200528.csv")
cell_numbers

fetal.ids           broad_fig1_cell.labels
F21_male_16+2PCW    HSC_MPP                     348
                    erythroid                   569
                    MK                          228
                    B_lineage                  1085
                    DC                          118
                    eo/baso/mast                 48
                    neutrophil                   64
                    monocyte                    453
                    T_NK                         60
                    stroma                      179
F29_female_17+0PCW  HSC_MPP                     793
                    erythroid                  1113
                    MK                          271
                    B_lineage                  3532
                    DC                          484
                    eo/baso/mast                110
                    neutrophil                  297
                    monocyte                   1175
                    T

In [6]:
adata.obs["fetal.ids"].unique().tolist()

['F21_male_16+2PCW',
 'F29_female_17+0PCW',
 'F30_male_14+3PCW',
 'F38_male_12PCW',
 'F41_female_16PCW',
 'F45_female_13+6PCW',
 'F50_female_15PCW',
 'F51_female_15PCW',
 'SB19PCW']

In [7]:
fetal_ids = [
 'F38_male_12PCW', 'F45_female_13+6PCW',
 'F30_male_14+3PCW','F50_female_15PCW', 'F51_female_15PCW',
 'F41_female_16PCW', 'F21_male_16+2PCW', 'F29_female_17+0PCW', 
'SB19PCW']

stage_ids = ["Stage 1", "Stage 1", 
            "Stage 2", "Stage 2", "Stage 2", 
             "Stage 3", "Stage 3", "Stage 3", 
            "Stage 4"]

adata.obs["stage"] = adata.obs["fetal.ids"].replace(fetal_ids, stage_ids)

In [8]:
# Load 19PCW lane info
tot = sc.read('/Users/b8058304/Documents/PhD_work/Coding/bm_plus_19pcw/data/bm_plus-19pcw_combined_20200218.h5ad')
tot_lanes_meta = tot.obs["lanes"]
adata.obs["lanes"] = tot_lanes_meta

In [9]:
cell_numbers = adata.obs.groupby(["lanes"]).apply(len)
cell_numbers

lanes
FCAImmP7179367       1962
FCAImmP7179368       1190
FCAImmP7277558       1463
FCAImmP7277559       3072
FCAImmP7277566       4035
FCAImmP7277567       2291
FCAImmP7292031       3638
FCAImmP7292035       4097
FCAImmP7528281       2033
FCAImmP7528282       1096
FCAImmP7555853       2014
FCAImmP7555854       2169
FCAImmP7555855       1505
FCAImmP7555862       2232
FCAImmP7579220       1693
FCAImmP7579221       1469
FCAImmP7579232       2061
FCAImmP7579233       1396
FCAImmP7803016       4736
FCAImmP7803017       4785
FCAImmP7803018       3125
FCAImmP7803019       3256
FCAImmP7803028       6379
FCAImmP7803029       5852
FCAImmP7803030       5953
FCAImmP7803031       3649
FCAImmP7803032       3484
FCAImmP7803033       3696
WSSS_F_BON8710636    2977
WSSS_F_BON8710637    2451
WSSS_F_BON8710638    3202
WSSS_F_BON8710639    2660
WSSS_F_BON8710640    3129
WSSS_F_BON8710641     894
WSSS_F_BON8710642    3008
WSSS_F_BON8710643    2000
dtype: int64

In [11]:
pd.DataFrame(adata.obs).to_csv("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs/resources_for_pipelines/fig1d_barplot_meta_20200513.csv")

In [12]:
adata.write("/Users/b8058304/Documents/PhD_work/Coding/manuscript_figs/data/fig1c_fbm_scaled_gex_updated_dr_20200428.h5ad")

... storing 'stage' as categorical
