In [None]:
# The conda env used in this notebook is a customised "crc-atlas env.: contains scanpy_helper_submodule from the LUCAS project
import os

import decoupler as dc
import numpy as np
import pandas as pd
import sc_atlas_helpers as ah
import scanpy as sc
from matplotlib.pyplot import rc_context
from scanpy_helper_submodule import scanpy_helpers as sh

# Figure default parameters
sc.settings.set_figure_params(
    dpi=400,
    facecolor="white",
    frameon=False,
)

# Set the number of max CPUs to be used by the processes
from threadpoolctl import threadpool_limits

cpus = 4
threadpool_limits(cpus)

## 1. Neutrophil cluster annotation - Arnold lab: Mouse data

In [None]:
# Load in the data
# This H5AD file was processed and provided by Kristina
adata = sc.read_h5ad(
    "/data/projects/2022/CRCA/data/own_datasets/arnold_lab_mouse/tumor_bm_blood_neutrophils.h5ad"
)

### 1.1 Sanity check

In [None]:
# Check the metadata (obs, var, etc.)
adata

In [None]:
# Plot the clustering results
sc.pl.umap(
    adata,
    color=["seurat_clusters", "phenotype", "tissue"],
    layer="log1p_norm",
    vmin=0,
    vmax="p99",
    sort_order=False,
    frameon=False,
    add_outline=True,
    # legend_loc="on data"
    # save="_all_neutro.png",
)

### 1.1 Feature engineering 
Create subsets for blood/bone marrow, to be used for healthy/tumor DE analysis

In [None]:
1. Create the Bone Marrow groups ("BM_phenotype") - healthy_BM and tumor_BM

In [None]:
# Subset for BM
adata_bm = adata[adata.obs["tissue"] == "BM"].copy()

In [None]:
# Check value counts
adata_bm.obs[["tissue","phenotype"]].value_counts()

In [None]:
# Plot the BM clusters
sc.pl.umap(
    adata_bm,
    color=["phenotype", "tissue"],
    layer="log1p_norm",
    vmin=0,
    vmax="p99",
    sort_order=False,
    frameon=False,
    add_outline=True,
    # legend_loc="on data"
    # save="_all_neutro.png",
)