# R workshop: <font color=blue> Generating KEGG Pathway view </font>

#### MLBI@DKU

### __0. Install required R packages (skip if they are already installed)__

In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("biocLite")
## BiocManager::install()

In [None]:
BiocManager::install("org.Mm.eg.db")
BiocManager::install("org.Hs.eg.db")
BiocManager::install("biomaRt")
BiocManager::install("gageData")
BiocManager::install("gage")
BiocManager::install("pathview")
install.packages("filesstrings")
install.packages("anndata")

In [43]:
## Install anndata if it was not
system('pip install anndata')

In [5]:
devtools::install_github("combio-dku/KEGGPathviewGen4SCODA")

Downloading GitHub repo combio-dku/KEGGPathviewGen4SCODA@HEAD






Skipping 5 packages not available: org.Mm.eg.db, org.Hs.eg.db, gageData, gage, pathview



[36m──[39m [36mR CMD build[39m [36m───────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[32m✔[39m  [90mchecking for file ‘/tmp/RtmpgTOdCu/remotes1c5a052d0ad68a/combio-dku-KEGGPathviewGen4SCODA-7a6296e/DESCRIPTION’[39m[36m[39m
[90m─[39m[90m  [39m[90mpreparing ‘KEGGPathviewGen4SCODA’:[39m[36m[39m
[32m✔[39m  [90mchecking DESCRIPTION meta-information[39m[36m[39m
[90m─[39m[90m  [39m[90mchecking for LF line-endings in source and make files and shell scripts[39m[36m[39m
[90m─[39m[90m  [39m[90mchecking for empty or unneeded directories[39m[36m[39m
   Omitted ‘LazyData’ from DESCRIPTION
[90m─[39m[90m  [39m[90mbuilding ‘KEGGPathviewGen4SCODA_0.0.2.tar.gz’[39m[36m[39m
   


### __1. Load libraries and data__

In [2]:
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(filesstrings))
suppressPackageStartupMessages(library(pathview))
suppressPackageStartupMessages(library(gage))
suppressPackageStartupMessages(library(gageData))
suppressPackageStartupMessages(library(org.Hs.eg.db))
suppressPackageStartupMessages(library(org.Mm.eg.db))
suppressPackageStartupMessages(library(reticulate))
suppressPackageStartupMessages(library(anndata))
suppressPackageStartupMessages(library(KEGGPathviewGen4SCODA))

### __2. Load SCODA result__

In [3]:
data.dir <- "./"

flst <- list.files(data.dir)
flst

In [4]:
file <- 'scoda_workshop_example_dataset_GSE161529_33K_results.tar.gz'
untar(file)

In [4]:
### Load data & extract cell-gene matrix as a data.frame (rownames: cell barcode, colnames: gene symbol)
dir <- 'scoda_workshop_example_dataset_GSE161529_33K/'
file_h5ad <- paste0( dir, 'scoda_workshop_example_dataset_GSE161529_33K.h5ad' )

adata_t <- read_h5ad(file_h5ad)
adata_t

AnnData object with n_obs × n_vars = 33785 × 22621
    obs: 'Patient', 'Description', 'Source', 'Condition', 'Menopause', 'Parity', 'Gender', 'geo_no', 'sid', 'subtype', 'subtype_detail', 'tissue', 'condition', 'sample', 'sample_rev', 'sample_ext', 'celltype_major', 'celltype_minor', 'celltype_subset', 'cnv_ref_ind', 'ploidy_score', 'ploidy_dec', 'condition_for_deg', 'sample_ext_for_deg', 'celltype_for_deg', 'celltype_for_cci', 'tumor_origin_ind'
    var: 'variable_genes', 'chr', 'spot_no'
    uns: 'CCI', 'CCI_sample', 'Celltype_marker_DB', 'DEG', 'DEG_grouping_vars', 'DEG_stat', 'DEG_vs_ref', 'DEG_vs_ref_stat', 'GSA_down', 'GSA_up', 'GSA_vs_ref_down', 'GSA_vs_ref_up', 'GSEA', 'GSEA_vs_ref', 'Pathways_DB', 'analysis_parameters', 'cnv', 'cnv_neighbors_info', 'inferploidy_summary', 'log', 'lut_sample_to_cond', 'usr_param'
    obsm: 'HiCAT_result', 'X_cnv', 'X_cnv_pca', 'X_pca', 'inferploidy_results'
    obsp: 'cnv_neighbor_graph_connectivity', 'cnv_neighbor_graph_distance'

### __3. Get mapping to KEGG pathway__

In [8]:
species <- adata_t$uns[['usr_param']][['species']]
pathways.used <- adata_t$uns[['Pathways_DB']]

df_pathways_map <- get_pathways_map( pathways.used, species, min_overlap = 0.85 )

Converting Pathways DB .. done.        


In [72]:
head(df_pathways_map)

Unnamed: 0_level_0,pw_id,pw_name,pw_id_name,pw_name_used
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
hsa02010 ABC transporters,hsa02010,ABC transporters,hsa02010 ABC transporters,ABC transporters
hsa04933 AGE-RAGE signaling pathway in diabetic complications,hsa04933,AGE-RAGE signaling pathway in diabetic complications,hsa04933 AGE-RAGE signaling pathway in diabetic complications,AGE-RAGE signaling pathway in diabetic complications
hsa04152 AMPK signaling pathway,hsa04152,AMPK signaling pathway,hsa04152 AMPK signaling pathway,AMPK signaling pathway
hsa05221 Acute myeloid leukemia,hsa05221,Acute myeloid leukemia,hsa05221 Acute myeloid leukemia,Acute myeloid leukemia
hsa04520 Adherens junction,hsa04520,Adherens junction,hsa04520 Adherens junction,Adherens junction
hsa04920 Adipocytokine signaling pathway,hsa04920,Adipocytokine signaling pathway,hsa04920 Adipocytokine signaling pathway,Adipocytokine signaling pathway


### __4. Generate KEGG pathview__

In [11]:
lst.deg.all <- adata_t$uns[['DEG']]
lst.gsa.all <- adata_t$uns[['GSA_up']]

lst.fcs.all <- get_all_fold_changes( lst.deg.all, species, pval.cutoff = 1e-4 )

Getting fold changes .. 
     Aneuploid Epithelial cell: ER+_vs_others(1857), HER2+_vs_others(1584), TNBC_vs_others(3388)
                        B cell: HER2+_vs_others(6), TNBC_vs_others(9)
       Diploid Epithelial cell: ER+_vs_others(229), HER2+_vs_others(109), Normal_vs_others(2564), TNBC_vs_others(170)
              Endothelial cell: ER+_vs_others(12), HER2+_vs_others(14), Normal_vs_others(1656), TNBC_vs_others(45)
               Epithelial cell: Diploid_vs_others(1546), ER+_vs_others(1131), HER2+_vs_others(1089), Normal_vs_others(1995), TNBC_vs_others(1813)
                    Fibroblast: ER+_vs_others(100), HER2+_vs_others(36), Normal_vs_others(2907), TNBC_vs_others(273)
                           ILC: ER+_vs_others(12), HER2+_vs_others(5), Normal_vs_others(164), TNBC_vs_others(5)
                    Macrophage: ER+_vs_others(73), HER2+_vs_others(46), Normal_vs_others(1242), TNBC_vs_others(193)
                     Mast cell: ER+_vs_others(19)
                   Plasma cell: ER

In [12]:
target_cell <- 'Epithelial cell'
dir_saved <- save_kegg_pathviews( target_cell, lst.gsa.all,
                                  lst.fcs.all, df_pathways_map,
                                  species, gsa.p.val.cutoff = 1e-4 )

               Epithelial cell: 5/5 - 12/12 - Huntington's disease                is                     


In [73]:
dir_saved

In [None]:
names(lst.fcs.all)

In [None]:
for( target_cell in names(lst.fcs.all) )
{
    dir_saved <- save_kegg_pathviews( target_cell, lst.gsa.all, lst.fcs.all, df_pathways_map, species, 
                                      gsa.p.val.cutoff = 0.01 )
}