## Map compound target genes to Gene Ontology Pathways

In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(topGO))


groupGOTerms: 	GOBPTerm, GOMFTerm, GOCCTerm environments built.



In [2]:
# Load compound target info
cpd_file <- file.path("data", "split_moas_targets_cpds.csv")

cpd_df <- readr::read_csv(cpd_file, show_col_types = FALSE)

# How many unique targets:
print(length(unique(cpd_df$target_unique)))
print(dim(cpd_df))
head(cpd_df, 3)

[1] 744
[1] 3178    9


pert_iname,moa,train,test,marked,target_unique,clinical_phase,disease_area,indication
<chr>,<chr>,<lgl>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>
ketoprofen,cyclooxygenase inhibitor,True,False,True,PTGS1,Launched,rheumatology,rheumatoid arthritis|osteoarthritis
ketoprofen,cyclooxygenase inhibitor,True,False,True,PTGS2,Launched,rheumatology,rheumatoid arthritis|osteoarthritis
ketoprofen,cyclooxygenase inhibitor,True,False,True,SLC5A8,Launched,rheumatology,rheumatoid arthritis|osteoarthritis


In [3]:
# For each GO ontology, map target genes to pathways
go_ontologies <- c("BP", "CC", "MF")

go_mapping_df <- list()
for (go_ont in go_ontologies) {
    # Identify total gene map per ontology
    geneMap <- topGO::annFUN.org(
        whichOnto = go_ont,
        feasibleGenes = NULL,
        mapping = "org.Hs.eg.db",
        ID = "symbol"
    )
    
    # Pull pathway assignment per target
    target_pathways <- topGO::inverseList(
        topGO::annFUN.GO2genes(
            whichOnto = go_ont,
            feasibleGenes = cpd_df$target_unique,
            GO2genes = geneMap
        )
    )
    
    # Clean the data for easier input analysis downstream
    go_term_dfs <- list()
    for (gene in names(target_pathways)) {
        go_terms <- target_pathways[[gene]]
        go_terms <- dplyr::tibble(go_terms, gene)
        colnames(go_terms) <- c("go_term", "gene")
        go_term_dfs[[gene]] <- go_terms
    }
    
    go_mapping_df[[go_ont]] <- do.call(rbind, go_term_dfs) %>%
        dplyr::as_tibble() %>%
        dplyr::mutate(go_ontology = go_ont)
}

Loading required package: org.Hs.eg.db





In [4]:
# Combine data and describe results
full_go_mapping_df <- do.call(rbind, go_mapping_df) %>%
        dplyr::as_tibble()

# How many unique go terms:
print(length(unique(full_go_mapping_df$go_term)))

# How many unique genes:
print(length(unique(full_go_mapping_df$gene)))

# How many GO terms per gene:
sort(table(full_go_mapping_df$gene), decreasing = TRUE)

[1] 5822
[1] 732



    AKT1     TP53      SRC      TNF     ABL1     MTOR    PSEN1     EGFR 
     179      170      166      160      143      142      138      137 
    BCL2     DRD2     CDK5     JAK2    STAT3     RELA    PPARG     PKD2 
     135      123      115      115      109      108      105      103 
     SYK    ANXA1     IL1B      IL6      KIT    GSK3B     MDM2   PDGFRB 
      99       96       96       96       94       93       93       93 
   GPER1   PRKAA1    HDAC1    HDAC6     RARA    ROCK1    KCNQ1    PTGS2 
      89       88       85       85       84       84       83       80 
  HSPA1A    PPARD       AR    ITGB3   MAPK14    ROCK2      SMO    PARP1 
      79       79       78       78       78       78       78       77 
   PRKCD     ESR1      INS    CALM1     CDK1    ITGAV    PPARA    ABCA1 
      77       76       76       75       75       75       75       74 
   HSPA5   PDGFRA     PTK2     CCR2     INSR    HDAC2    HDAC4 HSP90AA1 
      74       74       74       73       73      

In [5]:
# What does this dataset look like?
print(dim(full_go_mapping_df))
head(full_go_mapping_df)

[1] 23072     3


go_term,gene,go_ontology
<chr>,<chr>,<chr>
GO:0002790,ABCA1,BP
GO:0006497,ABCA1,BP
GO:0006869,ABCA1,BP
GO:0006911,ABCA1,BP
GO:0007040,ABCA1,BP
GO:0007186,ABCA1,BP


In [6]:
# Merge compound info with GO terms
cpd_go_df <- cpd_df %>%
    dplyr::left_join(full_go_mapping_df, by = c("target_unique" = "gene"))

output_file <- file.path("data", "split_moas_targets_pathways_cpds.csv")
readr::write_csv(cpd_go_df, output_file)

print(dim(cpd_go_df))
head(cpd_go_df, 3)

[1] 83825    11


pert_iname,moa,train,test,marked,target_unique,clinical_phase,disease_area,indication,go_term,go_ontology
<chr>,<chr>,<lgl>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
ketoprofen,cyclooxygenase inhibitor,True,False,True,PTGS1,Launched,rheumatology,rheumatoid arthritis|osteoarthritis,GO:0001516,BP
ketoprofen,cyclooxygenase inhibitor,True,False,True,PTGS1,Launched,rheumatology,rheumatoid arthritis|osteoarthritis,GO:0006954,BP
ketoprofen,cyclooxygenase inhibitor,True,False,True,PTGS1,Launched,rheumatology,rheumatoid arthritis|osteoarthritis,GO:0006979,BP
