In [1]:
# Loading libraries
library(Gviz)
library(GenomicRanges)
library(rtracklayer)
library(GenomicAlignments)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)
library(BiocManager)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(Biostrings)
library(KEGGREST)
library(AnnotationDbi)
library(biomaRt)
library(GenomicFeatures)
library(BSgenome)
library(plotgardener)

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min



Attaching package: ‘S4Vectors’


The following objects are masked from ‘package:base’:

    expand.grid, I, unname


Loading required package: IRanges

Loading required package: GenomicRanges

Loading required package: GenomeInfoDb

Loading required package: grid

Loading required package: SummarizedExperim

In [2]:
# Define the region of interest 
chrom <- "chr7"
start <- 18086949
end <- 19002416

In [3]:
# Load GTF file
gtf_file <- "gencode.v44.basic.annotation.gtf"
gtf_data <- import(gtf_file)

# Filter gtf_data for the specific chromosome and region
subset_gtf_data <- gtf_data[seqnames(gtf_data) == chrom & start(gtf_data) >= start & end(gtf_data) <= end, ]

# Additionally filter for the gene of interest
gene_of_interest <- "HDAC9"
subset_gtf_data <- subset_gtf_data[subset_gtf_data$gene_name == gene_of_interest, ]

In [4]:
# Ideogram track ----- 
gen <- "hg38"
itrack <- IdeogramTrack(genome = gen, chromosome = chrom, name = '')

displayPars(itrack) <- list(showBandId = FALSE, # Set the desired font size (10 is the default; adjust as needed)
                                col.frame = 'black', col = 'black', cex = 1.2, col.line = 'black',showTitle = TRUE,
                               fontcolor.group = 'black')  # Set the font color to black

In [5]:
# For the transcript id plot
filtered_gtf_data <- subset_gtf_data[subset_gtf_data$type == "transcript"]
mcols(filtered_gtf_data)$feature <- filtered_gtf_data$transcript_id
gene_track <- GeneRegionTrack(filtered_gtf_data,
                              name = "",
                              chromosome = chrom,
                              start = start, end = end,
                              transcriptAnnotation = 'feature',
                              showId = TRUE, just.group = "above",
                              fontcolour = 'black',
                              fontsize = 300
                              ) 

displayPars(gene_track) <- list(fontcolor = 'black', col = 'black', cex = 10, col.line = 'black',
                               fontcolor.group = 'black')  # Set the font color to black

In [6]:
# For adding the exon information, let's filter the exonic information 
subset_gtf_transcripts_exons <- subset_gtf_data[subset_gtf_data$type %in% c("exon"), ]
subset_gtf_transcripts_exons <- subset_gtf_transcripts_exons[subset_gtf_transcripts_exons$transcript_id %in% filtered_gtf_data$transcript_id,]
exon_track <- AnnotationTrack(subset_gtf_transcripts_exons,
                              name = "Exons",
                              chromosome = chrom,
                              start = start, end = end ,
                              fill = "darkgreen",    # Color for exons
                              col = "black",
                              direction = NA)         # Border color for exons
# Ideogram track ----- 
gen <- "hg38"
itrack <- IdeogramTrack(genome = gen, chromosome = chrom, name = '')
displayPars(itrack) <- list(showBandId = FALSE, # Set the desired font size (10 is the default; adjust as needed)
                                col.frame = 'black', col = 'black', cex = 1.2, col.line = 'black',showTitle = TRUE,
                               fontcolor.group = 'black')  # Set the font color to black

In [7]:
# For bam coverage plot: Define the bam files for tumor and normal samples
# Define the path to the BAM folder and the sample information file
bam_folder <- "."
sample_info_file <- as.data.frame(read_csv('meta_with_batchinfo.csv'))

# List all BAM files in the folder
bam_files <- list.files(path = bam_folder, pattern = "*.bam.bai", full.names = FALSE)

# Extract sample IDs from BAM filenames (the "SRRxxxxxx" part)
bam_sample_ids <- sapply(strsplit(bam_files, "_"), `[`, 1)

# Add sample IDs to a data frame with BAM file names
bam_df <- data.frame(sample_id = bam_sample_ids, bam_file = bam_files, stringsAsFactors = FALSE)


[1m[22mNew names:
[36m•[39m `` -> `...1`
[1mRows: [22m[34m448[39m [1mColumns: [22m[34m99[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (45): Project_ID, Run, batch, download_path, Experiment, LibraryName, L...
[32mdbl[39m  (31): ...1, spots, bases, spots_with_mates, avgLength, size_MB, InsertS...
[33mlgl[39m  (21): AssemblyName, Study_Pubmed_id, g1k_pop_code, source, g1k_analysis...
[34mdttm[39m  (2): ReleaseDate, LoadDate

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [8]:
# Filter for tumor and normal samples
sample_info_file <- sample_info_file %>%
  mutate(Tumor_Normal1 = case_when(
    Tumor_Normal %in% c("TumorTissue", "TumorTissue.2018", "TumorTissue.rep") ~ "Tumor",
    Tumor_Normal %in% c("PairedNormalTissue", "PairedNormalTissue.rep") ~ "AdjNormal"
  ))
sample_info_file <- sample_info_file[,c('Run', 'Tumor_Normal1')]

# Merge the BAM data frame with the sample information
merged_data <- merge(bam_df, sample_info_file, by.x = "sample_id", by.y = "Run")
tumor_bam_files <- merged_data %>%
  filter(Tumor_Normal1 == "Tumor") %>%
  dplyr::select(bam_file)

normal_bam_files <- merged_data %>%
  filter(Tumor_Normal1 == "AdjNormal") %>%
  dplyr::select(bam_file)

In [9]:
# Define the TNBC samples bam files
tnbc_bam <- tumor_bam_files$bam_file

# Define the normal samples bam files
nor_bam <- normal_bam_files$bam_file

# Remove the ".bai" extension from each filename
tnbc_bam <- gsub(".bai$", "", tnbc_bam)
nor_bam <- gsub(".bai$", "", nor_bam)

In [10]:
# Modify to compute mean coverage
get_mean_coverage_as_granges <- function(bam_files, chromosome, start, end) {
  coverage_list <- lapply(bam_files, function(bam_file) {
    bam_data <- readGAlignments(bam_file, param=ScanBamParam(which=GRanges(chromosome, IRanges(start, end))))
    coverage(bam_data)[[chromosome]]
  })
  # Sum coverages and then divide by the number of BAM files to get the mean
  cumulative_coverage <- Reduce("+", coverage_list)
  mean_coverage <- cumulative_coverage / length(bam_files)
  
  gr <- GRanges(seqnames = chromosome,
                ranges = IRanges(start = start:(end - 1), width = 1),
                score = as.numeric(mean_coverage[start:(end - 1)]))
  return(gr)
}

# Use the new function to get mean coverage
tnbc_coverage_mean <- get_mean_coverage_as_granges(tnbc_bam, chromosome = chrom, start = start, end = end)
nor_coverage_mean <- get_mean_coverage_as_granges(nor_bam, chromosome = chrom, start = start, end = end)

In [11]:
# TNBC track with explicit color settings for histogram
tnbc_track <- DataTrack(range = tnbc_coverage_mean, genome = "hg38", type = "hist",
                         chromosome = chrom, start = (start), end = end,
                         name = "TNBC\nmean\ncoverage", col.histogram = 'tomato2',
                         fill.histogram='tomato2', ylim = c(0,40))

# Normal track with explicit color settings for histogram
nor_track <- DataTrack(range = nor_coverage_mean, genome = "hg38", type = "hist",
                          chromosome = chrom, start = (start), end = end,
                          name = "Normal\nmean\ncoverage", col.histogram = 'steelblue1',
                          fill.histogram='steelblue1', ylim = c(0,40))

In [12]:
# Create GenomeAxisTrack
axis_track <- GenomeAxisTrack(name = 'Axis Track', add53 = TRUE, add35 = TRUE)
displayPars(axis_track) <- list(  # Set the desired font size (10 is the default; adjust as needed)
                                fontcolor = 'black', col = 'black', cex = 1, col.line = 'black',
                               fontcolor.group = 'black')  # Set the font color to black

# Create promoter regions (p1; p2; p3)
p1 <- AnnotationTrack(start = 18086949, end = end, chromosome = chrom,
                      name = "", fill = "pink", col = NULL, 
                     fontcolor.group = 'black',fontcolor.item = 'black')
p2 <- AnnotationTrack(start = 18495745, end = end, chromosome = chrom,
                      name = "", fill = "orange", col = NULL,
                      fontcolor.group = 'black', fontcolor.item = 'black')
p3 <- AnnotationTrack(start = 18509273, end = end, chromosome = chrom,
                      name = "", fill = "lightblue", col = NULL,
                      fontcolor.group = 'black', fontcolor.item = 'black')

In [13]:
# For adding the H3K4me3 track for Tumor
bw.file_tum <- 'HCC1937_t2_SRX1998157.bw'
bw_tumor<- readBigwig(file=bw.file_tum,
                      chrom = "chr7",
                      chromstart = 18086949,
                      chromend = 19002416)

In [14]:
data_tum <- bw_tumor
gr_data_tumor <- GRanges(
    seqnames = data_tum$seqnames,         # Chromosome column
    ranges = IRanges(
        start = data_tum$start,        # Start positions
        end = data_tum$end             # End positions
    ),
    score = data_tum$score             # Additional metadata column
)

## Create a DataTrack object
track_tumor <- DataTrack(
    range = gr_data_tumor,
    genome = "hg38",      # Specify the genome version, e.g., "hg19" or "hg38"
    name = "HCC1937\nH3K4me3", # Name of the track
    type = "h",
    chromosome = chrom,
    ylim = c(0, 1.5),# Set limits for the y-axis
    col = "tomato2" 
)

In [15]:
# For adding the H3K4me3 track for Normal
bw.file_nor <- 'MCF10A_t1.bw'
bw_normal<- readBigwig(file=bw.file_nor,
                      chrom = "chr7",
                      chromstart = 18086949,
                      chromend = 19002416)

In [16]:
data_normal <- bw_normal

gr_data_normal <- GRanges(
    seqnames = data_normal$seqnames,         # Chromosome column
    ranges = IRanges(
        start = data_normal$start,        # Start positions
        end = data_normal$end             # End positions
    ),
    score = data_normal$score             # Additional metadata column
)

## Create a DataTrack object
track_nor <- DataTrack(
    range = gr_data_normal,
    genome = "hg38",      # Specify the genome version, e.g., "hg19" or "hg38"
    name = "MCF10A\nH3K4me3",
    type = "h",  
    chromosome = chrom,
    ylim = c(0, 1.5),       # Set limits for the y-axis
    col = "steelblue1" 
)

In [17]:
## Chunck for adding the H3K27ac enhancer chipseq data plot:------------------------->>

# For Tumor
bw.file_tum <- 'HCC1937.H3K27ac_SRX1998148.bw'
bw_tumor<- readBigwig(file=bw.file_tum,
                      chrom = "chr7",
                      chromstart = 18086949,
                      chromend = 19002416)

In [18]:
data_tum <- bw_tumor
gr_data_tumor <- GRanges(
    seqnames = data_tum$seqnames,         # Chromosome column
    ranges = IRanges(
        start = data_tum$start,        # Start positions
        end = data_tum$end             # End positions
    ),
    score = data_tum$score             # Additional metadata column
)

## Create a DataTrack object
track_tumor_enhancer <- DataTrack(
    range = gr_data_tumor,
    genome = "hg38",      # Specify the genome version, e.g., "hg19" or "hg38"
    name = "HCC1937\nH3K27ac", # Name of the track
    type = "h",  
    chromosome = chrom,
    ylim = c(0, 1.5),     # Set limits for the y-axis
    col = "tomato2" 
)

In [19]:
# Add H3K27ac track for Normal
bw.file_nor <- 'MCF10A.H3K27ac_SRX1997949.bw'
bw_normal<- readBigwig(file=bw.file_nor,
                      chrom = "chr7",
                      chromstart = 18086949,
                      chromend = 19002416)

In [20]:
data_normal <- bw_normal
gr_data_normal <- GRanges(
    seqnames = data_normal$seqnames,         # Chromosome column
    ranges = IRanges(
        start = data_normal$start,        # Start positions
        end = data_normal$end             # End positions
    ),
    score = data_normal$score             # Additional metadata column
)

## Create a DataTrack object
track_nor_enhancer <- DataTrack(
    range = gr_data_normal,
    genome = "hg38",      # Specify the genome version, e.g., "hg19" or "hg38"
    name = "MCF10A\nH3K27ac",
    type = "h",  
    chromosome = chrom,
    ylim = c(0, 1.5),     # Set limits for the y-axis
    col = "steelblue1" 
)

In [21]:
# Adding the highlight track plot for HDAC9
ht <- HighlightTrack(trackList = list(p1,p2,p3,tnbc_track,nor_track,track_tumor,track_nor,track_tumor_enhancer,track_nor_enhancer),
                    start = c(18082249,18491845,18506023), width = c(8000,8000,8000), chromosome = chrom, 
                     col = "orange", fill = "orange", inBackground = FALSE, alpha = 0.2)

In [None]:
# Plot all tracks
pdf('HDAC9_trackplot.pdf',width = 8,height = 8)
plotTracks(list(axis_track, ht),
           from = 18086949, to = 18656949, chromosome = chrom,
           sizes = c(4,1,1,1,6,6,6,6,6,6),
           background.panel = "white",
           background.title = "grey39", col.axis = "white",
           extend.left = 0.025,
           cex.axis = 0.7, cex.title = 1,showTitle = TRUE)
dev.off()