In [None]:
from aliases import *
import scanpy as sc
import scvi

In [None]:
root_path = Path(os.path.expand("~/data/CompBio/SenNet-UPMC"))

data = {
    "control_E170": root_path / "Radiation/E170-0Gy_SenNet_051523_CR710/filtered_feature_bc_matrix.h5",
    "rad_E170": root_path / "Radiation/E170-30Gy_SenNet_051523_CR710/filtered_feature_bc_matrix.h5",
    "control_E185": root_path / "/Radiation/E185-0Gy_SenNet_051523_CR710/filtered_feature_bc_matrix.h5",
    "rad_E185": root_path / "Radiation/E185-30Gy_SenNet_051523_CR710/filtered_feature_bc_matrix.h5",
    "control_E187": root_path / "Radiation/E187-0Gy_SenNet_051523_CR710/filtered_feature_bc_matrix.h5",
    "rad_E187": root_path / "Radiation/E187-30Gy_SenNet_051523_CR710/filtered_feature_bc_matrix.h5",
    "control_E196": root_path / "Radiation/E196_0Gy_LAF5556A15/outs/filtered_feature_bc_matrix.h5",
    "rad_E196": root_path / "Radiation/E196_30Gy_LAF5556A16/outs/filtered_feature_bc_matrix.h5",
    "control_LTC113": root_path / "LTC-113/LTC-113_Control_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "bleo_LTC113": root_path / "LTC-113/LTC-113_Bleo15_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "dmso_LTC113": root_path / "LTC-113/LTC-113_DMSO_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "doxo_LTC113": root_path / "LTC-113/LTC-113_Doxo0_1_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "control_LTC117": root_path / "LTC-117/LTC-117_Control_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "bleo_LTC117": root_path / "LTC-117/LTC-117_Bleo15_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "dmso_LTC117": root_path / "LTC-117/LTC-117_DMSO_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "doxo_LTC117": root_path / "LTC-117/LTC-117_Doxo0_1_042123_SenNet_CR710/filtered_feature_bc_matrix.h5",
    "control_LTC120": root_path / "LTC-120/LTC-120_Control-POS_LAF5556A9/outs/filtered_feature_bc_matrix.h5",
    "bleo_LTC120": root_path / "LTC-120/LTC-120_Bleo_15_LAF5556A10/outs/filtered_feature_bc_matrix.h5",
    "control_LTC124": root_path / "LTC-124/LTC-124_Control_LAF5556A11/outs/filtered_feature_bc_matrix.h5",
    "bleo_LTC124": root_path / "LTC-124/LTC-124_Bleo_15_LAF5556A12/outs/filtered_feature_bc_matrix.h5",
    "dmso_LTC124": root_path / "LTC-124/LTC-124_DMSO_LAF5556A13/outs/filtered_feature_bc_matrix.h5",
    "doxo_LTC124": root_path / "LTC-124/LTC-124_Doxo_LAF5556A14/outs/filtered_feature_bc_matrix.h5",
}

## Load adatas and integrate

In [None]:
adatas = []

for key, val in data.items():
    adata = sc.read_10x_h5(val)
    adata.var_names_make_unique()
    adata.obs['batch_ID'] = key  # important for integration
    adatas.append(adata)

In [None]:
adata = anndata.concat(adatas)
adata.obs_names_make_unique()

In [None]:
adata.raw = adata  # keep full dimension safe
sc.pp.highly_variable_genes(
    adata,
    flavor="seurat_v3",
    n_top_genes=2000,
    subset=True,  # need to add all genes back after integration manually
)

## Run scVI

Following instructions from https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scrna/harmonization.html

In [None]:
scvi.model.SCVI.setup_anndata(adata, batch_key="batch_ID")
model = scvi.model.SCVI(adata, n_layers=2, n_latent=30, n_hidden=1000, gene_likelihood="zinb")

In [None]:
model.train(use_gpu=True)

In [None]:
SCVI_LATENT_KEY = "X_scVI"
adata.obsm[SCVI_LATENT_KEY] = model.get_latent_representation()

In [None]:
sc.pp.neighbors(adata, use_rep=SCVI_LATENT_KEY)
sc.tl.leiden(adata)
sc.tl.umap(adata)

In [None]:
adata.obs['condition'] = np.where((adata.obs['batch_ID'].str.contains('control')
                                    | adata.obs['batch_ID'].str.contains('dmso')),
                                  'Control', 'Treatment')

In [None]:
cond = np.asarray([v[0] for v in adata.obs['batch_ID'].str.split('_')], dtype=object)
cond[cond == 'control'] = 'Control'
cond[cond == 'doxo'] = 'Doxo'
cond[cond == 'bleo'] = 'Bleo'
cond[cond == 'dmso'] = 'DMSO'
cond[cond == 'rad'] = 'Rad'
adata.obs['cond'] = pd.Categorical(cond, categories=['Control', 'Bleo', 'DMSO', 'Doxo', 'Rad'])

In [None]:
sc.tl.leiden(adata, resolution=1, key_added="leiden")

In [None]:
## Add back all genes