In [1]:
options(warn=-1)

In [2]:
library(tidyverse)
library(motifmatchr)
library(Matrix)
library(TFBSTools)
library(SummarizedExperiment)
library(BSgenome.Hsapiens.UCSC.hg38)
library(BiocParallel)
library(JASPAR2018)
library(purrr)
register(MulticoreParam(8))
set.seed(2019)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.2.1     [32m✔[39m [34mpurrr  [39m 0.3.2
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.3
[32m✔[39m [34mtidyr  [39m 0.8.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Registered S3 method overwritten by 'R.oo':
  method        from       
  throw.default R.methodsS3

Attaching package: ‘Matrix’

The following object is masked from ‘package:tidyr’:

    expand


Attaching package: ‘TFBSTools’

The following object is masked from ‘package:Matrix’:

    Matrix

Loading required package: GenomicRanges
Loading required packa

In [3]:
setwd("~/shank3")

In [45]:
# method to get JASPAR2018, Getting both human and mouse
#opts <- list()
#opts[["species"]] <- c("Homo sapiens")
jaspar_motifs_hs <- getMatrixSet(JASPAR2018, list("species"="Homo sapiens"))
jaspar_motifs_ms <- getMatrixSet(JASPAR2018, list("species"="Mus musculus"))
# combining both human and mouse motifs
jaspar_motifs <- c(jaspar_motifs_hs, jaspar_motifs_ms)

In [236]:
# lookup table to join on motif_id to bring together motif information, such as species, symbols, etc.
motif_lookup <- list()
for (m in names(jaspar_motifs)) {
    #motif_lookup[[m]][["ID"]] <- ID(jaspar_motifs[[m]])
    motif_lookup[[m]][["motif_nm"]] <- name(jaspar_motifs[[m]])
    motif_lookup[[m]][["tf_symbol"]] <- ifelse(is.null(tags(jaspar_motifs[[m]])$symbol), "",tags(jaspar_motifs[[m]])$symbol)
    motif_lookup[[m]][["description"]] <- ifelse(is.null(tags(jaspar_motifs[[m]])$description),"", tags(jaspar_motifs[[m]])$description)
    motif_lookup[[m]][["species"]] <- ifelse(is.null(tags(jaspar_motifs[[m]])$species), "", tags(jaspar_motifs[[m]])$species %>% paste0(., collapse = "; "))
}
motif_lookup <- do.call(rbind, motif_lookup)
motif_lookup <- as.data.frame(motif_lookup) %>% rownames_to_column(., "motif_id")

In [266]:
# read in Shank3 gene and all feature annotations
shank_gene <- read.table("shank3_up2K.txt", header = T, stringsAsFactors = F)
shank_all <- read.table("shank3_allfeatures.txt", header = T, stringsAsFactors = F)

shank_gene <- makeGRangesFromDataFrame(shank_gene, keep.extra.columns = T)
shank_all <- makeGRangesFromDataFrame(shank_all, keep.extra.columns = T)

In [85]:
# match shank gene with jaspar 2018 motifs
shank.match.motif.pos <- matchMotifs(jaspar_motifs, shank_gene, out = "positions", genome = BSgenome.Hsapiens.UCSC.hg38)

In [96]:
# add motif_id to the genomic ranges
shank.match.motif.ranges <- shank.match.motif.pos %>% as.data.frame %>% 
                            select(seqnames, start, end, strand, score, group_name) %>%
                            makeGRangesFromDataFrame(keep.extra.columns = T)

In [100]:
# get overlaps between motif matching ranges and shank3 annotated features
feature.overlap <- GenomicRanges::findOverlaps(shank.match.motif.ranges, shank_all, minoverlap = 10)

shank.match.motif.df <- shank.match.motif.ranges[queryHits(feature.overlap)] %>% as.data.frame %>%
                        cbind(., as.data.frame(mcols(shank_all[subjectHits(feature.overlap)]))) # add feature info

In [254]:
# join on motif_id to bring in motif meta data, such as species, symbols etc.
shank.match.motif.df <- left_join(shank.match.motif.df, motif_lookup, by = c("group_name" = "motif_id"))

In [326]:
# get motif matching result, a logic matrix
feature_motif_matches <- motifMatches(match_feature_motif)
# get the row index that matches the gene_name
row.idx <- which(rowRanges(match_feature_motif)$gene_name == "SHANK3") 

In [330]:
which(rowRanges(match_feature_motif)$gene_name == "SHANK3") 

In [333]:
rowRanges(match_feature_motif)[19044,]

GRanges object with 1 range and 2 metadata columns:
      seqnames            ranges strand |           gene_id   gene_name
         <Rle>         <IRanges>  <Rle> |       <character> <character>
  [1]    chr22 50672415-50733298      + | ENSG00000251322.8      SHANK3
  -------
  seqinfo: 24 sequences from an unspecified genome; no seqlengths

In [335]:
# get the motif names that bind to the target gene
regulators <- colnames(feature_motif_matches)[feature_motif_matches[row.idx,] %>% which(.)]
# convert motif name to gene names
regulators <- filter(motif_lookup, motif %in% regulators) %>% pull(gene_name) 
# get granges of binding regulators
regulators.granges <- feature_granges[which(feature_granges$gene_name %in% regulators), ] 

In [339]:
feature_motif_matches[19044,] %>% t


THRB,THAP11,TFAP4(var.2),TFAP2E,TGIF2LY,TGIF2LX,TBX3,TBX18,TCF21(var.2),TBX6,⋯,KLF4,KLF16,LHX2,KLF9,MAFG,MAFF,DMRT3,FOXG1,HSF1,BACH2(var.2)
True,True,False,True,True,True,False,False,True,False,⋯,True,True,True,True,False,False,False,False,True,False


In [470]:
shank3_motif_mx <- motifMatches(shank3_motif_ix)

In [477]:
all_motif_names <- colnames(shank3_motif_mx)

In [482]:
shank3_promoter_motifs <- all_motif_names[which(shank3_motif_mx[1,])]
shank3_promoter_motifs.df <- JASPAR2020_CORE_META %>% filter(Name %in% shank3_promoter_motifs)

In [496]:
shank3_utr_motifs <- c()
for (id in which(shank_all$feature == "UTR")) {
    shank3_utr_motifs <- c(shank3_utr_motifs, all_motif_names[which(shank3_motif_mx[id,])])
}
shank3_utr_motifs <- unique(shank3_utr_motifs)
shank3_utr_motifs.df <- JASPAR2020_CORE_META %>% filter(Name %in% shank3_utr_motifs)

In [500]:
shank3_intron_motifs <- c()
for (id in which(shank_all$feature == "intron")) {
    shank3_intron_motifs <- c(shank3_intron_motifs, all_motif_names[which(shank3_motif_mx[id,])])
}
shank3_intron_motifs <- unique(shank3_intron_motifs)
shank3_intron_motifs.df <- JASPAR2020_CORE_META %>% filter(Name %in% shank3_intron_motifs)

In [502]:
shank3_exon_motifs <- c()
for (id in which(shank_all$feature == "exon")) {
    shank3_exon_motifs <- c(shank3_exon_motifs, all_motif_names[which(shank3_motif_mx[id,])])
}
shank3_exon_motifs <- unique(shank3_exon_motifs)
shank3_exon_motifs.df <- JASPAR2020_CORE_META %>% filter(Name %in% shank3_exon_motifs)

In [504]:
shank3.motifs.df <- list("promoter" = shank3_promoter_motifs.df, "utr" = shank3_utr_motifs.df, "intron" = shank3_intron_motifs.df, "exon" = shank3_exon_motifs.df)

In [506]:
WriteXLS(shank3.motifs.df, ExcelFileName = "SHANK3_matching_JASPAR_MOTIFS_by_feature.xls")

In [None]:
Bioc