# Fetch results and re-compute embedding

In [None]:
import scvi
import scanpy as sc

import pandas as pd
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt

import sys
import os

## Root directory

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

## Custom modules

In [None]:
sys.path.append('bin/')
from adata_qc import *

## Settup rpy2 

In [None]:
os.environ['R_HOME'] = '/nobackup/peer/fdeckert/miniconda3/envs/r.4.4.1-FD20200109SPLENO/lib/R'

In [None]:
import rpy2
%load_ext rpy2.ipython

## Figures 

In [None]:
sc.set_figure_params(figsize=(5, 5), dpi_save=1200, fontsize=12, frameon=False, facecolor='white')
mpl.rcParams['figure.facecolor'] = 'white'

In [None]:
# Load color 
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

In [None]:
# Set color function 
def set_color(adata, categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 

        adata.obs[category] = pd.Series(adata.obs[category], dtype='category').cat.remove_unused_categories()
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]        

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)

## Helper functions

In [None]:
def model_history(model): 
    
    # Plot model history 
    fig, axes=plt.subplots(1, 2, figsize=(10, 5))
    
    axes[0].plot(model.history['reconstruction_loss_train']['reconstruction_loss_train'], label='train')
    axes[0].plot(model.history['reconstruction_loss_validation']['reconstruction_loss_validation'], label='validation')
    axes[0].set_title('Reconstruction Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    
    axes[1].plot(model.history['elbo_train']['elbo_train'], label='train')
    axes[1].plot(model.history['elbo_validation']['elbo_validation'], label='validation')
    axes[1].set_title('ELBO')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Loss')
    axes[1].legend()

# All data  

In [None]:
adata = sc.read_h5ad('data/scRNAseq/object/pp.h5ad')

In [None]:
set_color(adata, color.keys())

In [None]:
sc.pl.umap(adata, color=['celltype_low', 'S_score', 'G2M_score', 'facility'], frameon=False, ncols=6, wspace=0.1, size=30, legend_loc='on data', use_raw=False)

In [None]:
adata.write_h5ad('data/scRNAseq/object/pp.h5ad')

## Store rsults

In [None]:
cnt = adata.X.T.todense()
cell_id = adata.obs_names
gene_id = adata.var_names
meta = adata.obs
umap = adata.obsm['X_umap']
latent = adata.obsm['latent']

In [None]:
%%R -i cnt -i cell_id -i gene_id -i meta -i umap -i latent

colnames(cnt) <- cell_id
rownames(cnt) <- gene_id

so <- Seurat::CreateSeuratObject(cnt, meta.data=meta, assay="RNA")

colnames(umap) <- paste0("UMAP_", 1:ncol(umap))
rownames(umap) <- cell_id
so[["umap"]] <- Seurat::CreateDimReducObject(embeddings=as.matrix(umap), key="UMAP_", assay="RNA")

colnames(latent) <- paste0("LATENT_", 1:ncol(latent))
rownames(latent) <- cell_id
so[["latent"]] <- Seurat::CreateDimReducObject(embeddings=as.matrix(latent), key="LATENT_", assay="RNA")

saveRDS(so, 'data/scRNAseq/object/pp.rds')

# BSF

In [None]:
cache_bsf = True

In [None]:
if not cache_bsf:
    
    adata_0 = adata[adata.obs['facility']=="BSF"].copy()
    
    # Neighbor graph and leiden 
    sc.pp.neighbors(adata_0, n_neighbors=10, use_rep='latent')
    
    # PAGA graph 
    sc.tl.paga(adata_0, groups='celltype_low')
    
    # Get UMAP position to adjust PAGA graph 
    pos = pd.DataFrame(adata_0.obsm["X_umap"], index=adata_0.obs_names)
    pos["group"] = adata_0.obs[adata_0.uns["paga"]["groups"]]
    pos = pos.groupby("group", observed=True).mean()
        
    # Plot UMAP in the background
    ax = sc.pl.umap(adata_0, show=False)
        
    # Plot PAGA ontop of the UMAP - this will set the PAGA position eventually 
    sc.pl.paga(adata_0, color="celltype_low", threshold=0, node_size_scale=1, edge_width_scale=0.7, pos=pos.values, random_state=0, ax=ax)
    
    # Re-compute UMAP based on PAGA position 
    sc.tl.umap(adata_0, init_pos='paga', min_dist=1.00, spread=0.50)

    # Save results 
    adata_0.write_h5ad('data/scRNAseq/object/pp_0.h5ad')

else: 

    adata_0 = sc.read_h5ad('data/scRNAseq/object/pp_0.h5ad')

In [None]:
sc.pl.umap(adata_0, color=['celltype_low', 'S_score', 'G2M_score', 'facility'], frameon=False, ncols=6, wspace=0.1, size=30, legend_loc='on data', use_raw=False)

## Store results

In [None]:
cnt = adata_0.X.T.todense()
cell_id = adata_0.obs_names
gene_id = adata_0.var_names
meta = adata_0.obs
umap = adata_0.obsm['X_umap']
latent = adata_0.obsm['latent']

In [None]:
%%R -i cnt -i cell_id -i gene_id -i meta -i umap -i latent

colnames(cnt) <- cell_id
rownames(cnt) <- gene_id

so <- Seurat::CreateSeuratObject(cnt, meta.data=meta, assay="RNA")

colnames(umap) <- paste0("UMAP_", 1:ncol(umap))
rownames(umap) <- cell_id
so[["umap"]] <- Seurat::CreateDimReducObject(embeddings=as.matrix(umap), key="UMAP_", assay="RNA")

colnames(latent) <- paste0("LATENT_", 1:ncol(latent))
rownames(latent) <- cell_id
so[["latent"]] <- Seurat::CreateDimReducObject(embeddings=as.matrix(latent), key="LATENT_", assay="RNA")

saveRDS(so, 'data/scRNAseq/object/pp_0.rds')

# VBC

In [None]:
cache_vbc = True

In [None]:
if not cache_vbc:
    
    # Get subset data 
    adata_1 = adata[adata.obs['facility']=="VBC"].copy()
    
    # Neighbor graph and leiden 
    sc.pp.neighbors(adata_1, n_neighbors=15, use_rep='latent')
    
    # PAGA graph 
    sc.tl.paga(adata_1, groups='celltype_low')
    
    # Get UMAP position to adjust PAGA graph 
    pos = pd.DataFrame(adata_1.obsm["X_umap"], index=adata_1.obs_names)
    pos["group"] = adata_1.obs[adata_1.uns["paga"]["groups"]]
    pos = pos.groupby("group", observed=True).mean()
    
    # Plot UMAP in the background
    ax = sc.pl.umap(adata_1, show=False)
    
    # Plot PAGA ontop of the UMAP - this will set the PAGA position eventually 
    sc.pl.paga(adata_1, color="celltype_low", threshold=0, node_size_scale=1, edge_width_scale=0.7, pos=pos.values, random_state=0, ax=ax)
    
    # Re-compute UMAP based on PAGA position 
    sc.tl.umap(adata_1, init_pos='paga', min_dist=1.00, spread=0.50)
    
    # Plot UMAP
    sc.pl.umap(adata_1, color=['celltype_low', 'Spic', 'S_score', 'G2M_score', 'facility', 'Adgre1'], frameon=False, ncols=6, wspace=0.1, size=30, legend_loc='on data', use_raw=False)

    # Save results 
    adata_1.write_h5ad('data/scRNAseq/object/pp_1.h5ad')

else: 

    adata_1 = sc.read_h5ad('data/scRNAseq/object/pp_1.h5ad')

In [None]:
sc.pl.umap(adata_1, color=['celltype_low', 'S_score', 'G2M_score', 'facility'], frameon=False, ncols=6, wspace=0.1, size=30, legend_loc='on data', use_raw=False)

## Store rsults

In [None]:
cnt = adata_1.X.T.todense()
cell_id = adata_1.obs_names
gene_id = adata_1.var_names
meta = adata_1.obs
umap = adata_1.obsm['X_umap']
latent = adata_1.obsm['latent']

In [None]:
%%R -i cnt -i cell_id -i gene_id -i meta -i umap -i latent

colnames(cnt) <- cell_id
rownames(cnt) <- gene_id

so <- Seurat::CreateSeuratObject(cnt, meta.data=meta, assay="RNA")

colnames(umap) <- paste0("UMAP_", 1:ncol(umap))
rownames(umap) <- cell_id
so[["umap"]] <- Seurat::CreateDimReducObject(embeddings=as.matrix(umap), key="UMAP_", assay="RNA")

colnames(latent) <- paste0("LATENT_", 1:ncol(latent))
rownames(latent) <- cell_id
so[["latent"]] <- Seurat::CreateDimReducObject(embeddings=as.matrix(latent), key="LATENT_", assay="RNA")

saveRDS(so, 'data/scRNAseq/object/pp_1.rds')