# Integration

**[Sanorama Hie et al., 2019](https://www.nature.com/articles/s41587-019-0113-3)**  
[GitHub](https://github.com/brianhie/scanorama)  
[Tutorial external API](https://scanpy-tutorials.readthedocs.io/en/latest/spatial/integration-scanorama.html)  
[External external API tutorial](https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.scanorama_integrate.html)

A [fix](https://github.com/theislab/single-cell-tutorial/issues/60) to run [scran pooling normalization](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0947-7) computeSumFactors in current python environment. 

In [None]:
import scanpy as sc
import scanorama

import numpy as np
import pandas as pd

import os

In [None]:
# Working directory 
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [None]:
# rpy2 
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'

In [None]:
# Plotting 
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

sc.set_figure_params(figsize=(5, 5))

# Scanorama (SCTransform + 2000 HVG)

In [None]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')

In [None]:
hvg_2000 = list(adata.uns['hvg_int_2000'])
adata = adata[:,hvg_2000]

In [None]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))

In [None]:
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
    adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())

# Dimred

In [None]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=150, knn=50, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=50, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=100, knn=50, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=50, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=50, knn=50, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=50, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=30, knn=50, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=50, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

# knn

In [None]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=100, knn=20, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=50, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=100, knn=50, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=50, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=100, knn=100, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=50, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

In [None]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'doublet_score_log2', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)