# PAGA and sub-cluster 

In [1]:
import scanpy as sc
import scanorama
import pandas as pd

import numpy as np

import os

In [2]:
# Scanpy settings  
sc.settings.verbosity=0
sc.settings.vector_friendly = False
sc.set_figure_params(figsize=(5, 5), )

In [3]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

# Import data

In [4]:
adata = sc.read_h5ad('data/object/qc.h5ad')
obs = pd.read_csv('data/object/components/meta.csv', index_col=0)

In [5]:
adata = adata[adata.obs.index.isin(obs.index.tolist())]
obs = obs.reindex(adata.obs_names)

In [6]:
adata.obs = obs

In [7]:
adata_tmp = adata

# PAGA

In [None]:
# Filter genes
sc.pp.filter_genes(adata, min_cells=3)

# Normalize and scale 
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata)

# Dim reduction, SNN, and leiden 
sc.tl.pca(adata, n_comps=100)
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=100)

In [None]:
sc.tl.paga(adata, groups='seurat_clusters')
sc.pl.paga(adata, color=['seurat_clusters'], plot=True)

In [None]:
sc.tl.draw_graph(adata, init_pos='paga')
sc.pl.draw_graph(adata, color=['seurat_clusters', 'label_fine_haemosphere', 'treatment', 'pRb_RNA', 'cc_phase_class'], wspace=0.5, ncols=5)

In [None]:
connectivities = adata.uns['paga']['connectivities'].todense()
groups = adata.obs['seurat_clusters'].cat.categories
pd.DataFrame(connectivities, columns=groups, index=groups).to_csv('result/paga/connectivities.csv', index_label=0)

# PAGA (CpG)

In [None]:
adata_cpg = adata_tmp[adata_tmp.obs['treatment']=='CpG'].copy()

# Filter genes
sc.pp.filter_genes(adata_cpg, min_cells=3)

# Normalize and scale 
sc.pp.normalize_total(adata_cpg)
sc.pp.log1p(adata_cpg)
sc.pp.scale(adata_cpg)

# Dim reduction, SNN, and leiden 
sc.tl.pca(adata_cpg, n_comps=100)
sc.pp.neighbors(adata_cpg, n_neighbors=10, n_pcs=100)

In [None]:
sc.tl.paga(adata_cpg, groups='seurat_clusters')
sc.pl.paga(adata_cpg, color=['seurat_clusters'], plot=True)

In [None]:
sc.tl.draw_graph(adata_cpg, init_pos='paga')
sc.pl.draw_graph(adata_cpg, color=['seurat_clusters', 'label_fine_haemosphere', 'treatment', 'pRb_RNA', 'cc_phase_class'], wspace=0.5, ncols=5)

In [None]:
connectivities = adata_cpg.uns['paga']['connectivities'].todense()
groups = adata_cpg.obs['seurat_clusters'].cat.categories
pd.DataFrame(connectivities, columns=groups, index=groups).to_csv('result/paga/connectivities_cpg.csv', index_label=0)

# PAGA (NaCl)

In [None]:
adata_nacl = adata_tmp[adata_tmp.obs['treatment']=='NaCl'].copy()

# Filter genes
sc.pp.filter_genes(adata_nacl, min_cells=3)

# Normalize and scale 
sc.pp.normalize_total(adata_nacl)
sc.pp.log1p(adata_nacl)
sc.pp.scale(adata_nacl)

# Dim reduction, SNN, and leiden 
sc.tl.pca(adata_nacl, n_comps=100)
sc.pp.neighbors(adata_nacl, n_neighbors=10, n_pcs=100)

In [None]:
sc.tl.paga(adata_nacl, groups='seurat_clusters')
sc.pl.paga(adata_nacl, color=['seurat_clusters'], plot=True)

In [None]:
sc.tl.draw_graph(adata_nacl, init_pos='paga')
sc.pl.draw_graph(adata_nacl, color=['seurat_clusters', 'label_fine_haemosphere', 'treatment', 'pRb_RNA', 'cc_phase_class'], wspace=0.5, ncols=5)

In [None]:
connectivities = adata_nacl.uns['paga']['connectivities'].todense()
groups = adata_nacl.obs['seurat_clusters'].cat.categories
pd.DataFrame(connectivities, columns=groups, index=groups).to_csv('result/paga/connectivities_nacl.csv', index_label=0)