# R workshop: <font color=blue> Generating KEGG Pathway view </font>

#### MLBI@DKU
Seokhyun Yoon, Jan. 09, 2025, syoon@dku.edu

### __0. Install required R packages (skip if they are already installed)__

In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("biocLite")
## BiocManager::install()

In [None]:
BiocManager::install("org.Mm.eg.db")
BiocManager::install("org.Hs.eg.db")
BiocManager::install("biomaRt")
BiocManager::install("gageData")
BiocManager::install("gage")
BiocManager::install("pathview")
install.packages("filesstrings")
install.packages("anndata")

In [3]:
## Install anndata if it was not
system('pip install anndata')

In [None]:
devtools::install_github("combio-dku/KEGGPathviewGen4SCODA")

### __1. Load libraries and data__

In [5]:
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(filesstrings))
suppressPackageStartupMessages(library(pathview))
suppressPackageStartupMessages(library(gage))
suppressPackageStartupMessages(library(gageData))
suppressPackageStartupMessages(library(org.Hs.eg.db))
suppressPackageStartupMessages(library(org.Mm.eg.db))
suppressPackageStartupMessages(library(reticulate))
suppressPackageStartupMessages(library(anndata))
suppressPackageStartupMessages(library(KEGGPathviewGen4SCODA))

### __2. Load SCODA result__

In [6]:
data.dir <- "./"

flst <- list.files(data.dir)
flst

In [7]:
file <- 'example_human_brca_12k_results.h5ad.tar.gz'
untar(file)

In [8]:
### Load data & extract cell-gene matrix as a data.frame (rownames: cell barcode, colnames: gene symbol)
file_h5ad <- 'example_human_brca_12k_results.h5ad'

adata_t <- read_h5ad(file_h5ad)
adata_t

AnnData object with n_obs × n_vars = 12000 × 19438
    obs: 'Patient', 'Percent_mito', 'nCount_RNA', 'nFeature_RNA', 'Celltype_Major', 'Celltype_Minor', 'Celltype_Subset', 'subtype', 'gene_module', 'Calls', 'normal_cell_call', 'CNA_value', 'sample', 'condition', 'sample_rev', 'sample_ext', 'celltype_major', 'celltype_minor', 'celltype_subset', 'cnv_ref_ind', 'ploidy_score', 'ploidy_dec', 'condition_for_deg', 'sample_ext_for_deg', 'celltype_for_deg', 'celltype_for_cci', 'tumor_origin_ind'
    var: 'gene_ids', 'variable_genes', 'chr', 'spot_no'
    uns: 'CCI', 'CCI_sample', 'Celltype_marker_DB', 'DEG', 'DEG_grouping_vars', 'DEG_stat', 'GSA_down', 'GSA_up', 'GSEA', 'Pathways_DB', 'analysis_parameters', 'cnv', 'cnv_neighbors_info', 'inferploidy_summary', 'log', 'lut_sample_to_cond', 'usr_param'
    obsm: 'HiCAT_result', 'X_cnv', 'X_cnv_pca', 'X_pca', 'inferploidy_results'
    obsp: 'cnv_neighbor_graph_connectivity', 'cnv_neighbor_graph_distance'

### __3. Get mapping to KEGG pathway__

In [9]:
species <- adata_t$uns[['usr_param']][['species']]
pathways.used <- adata_t$uns[['Pathways_DB']]

df_pathways_map <- get_pathways_map( pathways.used, species, min_overlap = 0.85 )

Converting Pathways DB .. done.        


In [10]:
head(df_pathways_map)

Unnamed: 0_level_0,pw_id,pw_name,pw_id_name,pw_name_used
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
hsa02010 ABC transporters,hsa02010,ABC transporters,hsa02010 ABC transporters,ABC transporters
hsa04933 AGE-RAGE signaling pathway in diabetic complications,hsa04933,AGE-RAGE signaling pathway in diabetic complications,hsa04933 AGE-RAGE signaling pathway in diabetic complications,AGE-RAGE signaling pathway in diabetic complications
hsa04152 AMPK signaling pathway,hsa04152,AMPK signaling pathway,hsa04152 AMPK signaling pathway,AMPK signaling pathway
hsa05221 Acute myeloid leukemia,hsa05221,Acute myeloid leukemia,hsa05221 Acute myeloid leukemia,Acute myeloid leukemia
hsa04520 Adherens junction,hsa04520,Adherens junction,hsa04520 Adherens junction,Adherens junction
hsa04920 Adipocytokine signaling pathway,hsa04920,Adipocytokine signaling pathway,hsa04920 Adipocytokine signaling pathway,Adipocytokine signaling pathway


### __4. Generate KEGG pathview__

In [11]:
lst.deg.all <- adata_t$uns[['DEG']]
lst.gsa.all <- adata_t$uns[['GSA_up']]

lst.fcs.all <- get_all_fold_changes( lst.deg.all, species, pval.cutoff = 1e-4 )

Getting fold changes .. 
     Aneuploid Epithelial cell: ER+_vs_others(3211), HER2+_vs_others(2159), TNBC_vs_others(2664)
                        B cell: ER+_vs_others(55), HER2+_vs_others(16), TNBC_vs_others(171)
       Diploid Epithelial cell: HER2+_vs_others(72), TNBC_vs_others(157)
              Endothelial cell: ER+_vs_others(173), HER2+_vs_others(35)
               Epithelial cell: Diploid_vs_others(871), ER+_vs_others(1363), HER2+_vs_others(943), TNBC_vs_others(1563)
                    Fibroblast: ER+_vs_others(338), HER2+_vs_others(93), TNBC_vs_others(270)
                           ILC: ER+_vs_others(95), HER2+_vs_others(28)
                    Macrophage: ER+_vs_others(443), HER2+_vs_others(101), TNBC_vs_others(262)
                   Plasma cell: ER+_vs_others(107), HER2+_vs_others(37), TNBC_vs_others(132)
                   T cell CD4+: ER+_vs_others(316), HER2+_vs_others(151), TNBC_vs_others(508)
                   T cell CD8+: ER+_vs_others(235), HER2+_vs_others(135), TN

In [13]:
target_cell <- 'Aneuploid Epithelial cell'
dir_saved <- save_kegg_pathviews( target_cell, lst.gsa.all,
                                  lst.fcs.all, df_pathways_map,
                                  species, gsa.p.val.cutoff = 1e-4 )

     Aneuploid Epithelial cell: 3/3 - 7/7 - Huntington's disease 


In [None]:
dir_saved

### __Generate KEGG pathview for all cell types__

In [None]:
names(lst.fcs.all)

In [None]:
for( target_cell in names(lst.fcs.all) )
{
    dir_saved <- save_kegg_pathviews( target_cell, lst.gsa.all, lst.fcs.all, df_pathways_map, species,
                                      gsa.p.val.cutoff = 0.01 )
}