# Creating the environment
This env is required to run this notebook with trackviewer


$ conda create -n genome-view -c conda-forge -c bioconda jupyter r-irkernel bioconductor-trackviewer

Note: Trackviewer citation: https://www.nature.com/articles/s41592-019-0430-y#Sec2

# Example from their package 
link: https://www.bioconductor.org/packages/release/bioc/vignettes/trackViewer/inst/doc/trackViewer.html#Lolliplot

In [1]:
library(repr)
library(trackViewer)
library(tidyr)
library(dplyr)

“package ‘repr’ was built under R version 4.2.2”
“package ‘trackViewer’ was built under R version 4.2.2”
Loading required package: GenomicRanges

“package ‘GenomicRanges’ was built under R version 4.2.2”
Loading required package: stats4

Loading required package: BiocGenerics

“package ‘BiocGenerics’ was built under R version 4.2.1”

Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min


Loading required package: S4Vectors

“package ‘S4Vectors’ was built under R version 4

In [6]:
gencode_file = "/data5/bx_reference/hg38/annotations/gene_annotations/GENCODE39/gencode.v39.basic.parsed.exons.tsv"
variant_file = "/data5/deepro/ukbiobank/papers/bmi_project/4_characterization/protective/protective_noicd/data/variably_expressive/gene_variants_formatted.csv"

In [72]:
gencode_df = read.csv(file = gencode_file, sep="\t")[ ,c('Chrom', 'Start', 'End', 'Feature', 'gene_name')]
variant_df = read.csv(file = variant_file)[ ,c('Sample', 'SYMBOL', 'Mut_type', 'variant_id', 'obesity_type', 'obesity_type_color')]

In [73]:
variant_df = separate(variant_df, variant_id, c("Chrom", "Location", "Ref", "Alt"), "_")

# TTN


In [207]:
gene_df = variant_df[variant_df$SYMBOL=="TTN",]
class(gene_df$Location) = "integer"
gene_df = gene_df[order(gene_df$Location),]
gencode_gene_df = gencode_df[gencode_df$gene_name=="TTN",] %>% distinct()

gene_df_selected = gene_df[1:11,]

# creating function to computer product
is_present = function(df){
    start = df["Start"]
    end = df["End"]
    class(start) = "numeric"
    class(end) = "numeric"
    flag = FALSE
    for (l in gene_df_selected$Location) {
        if((l > start) & (l < end)) {
            flag = TRUE 
            break
            } 
    }

  # return product
  return(flag)
}

gencode_gene_df_selected = gencode_gene_df[apply(gencode_gene_df, 1, is_present),]
gencode_gene_df_selected$Width = gencode_gene_df_selected$End - gencode_gene_df_selected$Start

SNP <- gene_df_selected$Location
# defining the initial snps 
sample.gr <- GRanges("chr2", IRanges(SNP, width=1, names=SNP))
# adding features to the genomic regions
features <- GRanges("chr2", IRanges(gencode_gene_df_selected$Start, 
                                    width=gencode_gene_df_selected$Width,
                    fill = rep(c("#FF8833", "#51C6E6", "#DFA32D"), each = 1, length.out = length(gencode_gene_df_selected$Start)),
                    height = rep(c(0.025, 0.025), each = 1, length.out = length(gencode_gene_df_selected$Start))))
## adding colors to unique features
sample.gr$color <- gene_df_selected$obesity_type_color
# sample.gr$border <- sample(c("gray30", "gray30"), length(SNP), replace=TRUE)
sample.gr$alpha <- 1 #sample(100:255, length(SNP), replace = TRUE)/255
# ## adding index labels to nodes : not working
# sample.gr$node.label <- as.character(1:length(sample.gr))
# sample.gr$node.label.col <- 
#   ifelse(sample.gr$alpha>0.5 | sample.gr$color==1, "white", "black")
# sample.gr$node.label.cex <- 1
#Try a score value greater than 10
sample.gr$score <- gene_df_selected$Sample
# # add xticks
# xaxis <- c(1, 200, 400, 701, 1000, 1200, 1402)
## use list to define more attributes. see ?grid::gpar to get more details.
legend <- c("#ab162a", "#1e61a5", "#ba6598") ## legend fill color
names(legend) <- c("risk", "protection", "variably_expressive") ## legend labels
legend <- list(labels=c("risk", "protection", "variably_expressive") , 
               col=c("#ab162a", "#1e61a5", "#ba6598"), 
               fill=c("#ab162a", "#1e61a5", "#ba6598"))


pdf(file = "../data/variably_expressive/lolliplots/TTN.pdf",   # The directory you want to save the file in
    width = 8, # The width of the plot in inches
    height = 6) # The height of the plot in inches
options(repr.plot.width=12, repr.plot.height=10)
lolliplot(sample.gr, features, legend=legend)
dev.off()

# RYR3

In [225]:
gene_df = variant_df[variant_df$SYMBOL=="RYR3",]
class(gene_df$Location) = "integer"
gene_df = gene_df[order(gene_df$Location),]
gencode_gene_df = gencode_df[gencode_df$gene_name=="RYR3",] %>% distinct()

gene_df_selected = gene_df[16:32,]

# creating function to computer product
is_present = function(df){
    start = df["Start"]
    end = df["End"]
    class(start) = "numeric"
    class(end) = "numeric"
    flag = FALSE
    for (l in gene_df_selected$Location) {
        if((l > start) & (l < end)) {
            flag = TRUE 
            break
            } 
    }

  # return product
  return(flag)
}

gencode_gene_df_selected = gencode_gene_df[apply(gencode_gene_df, 1, is_present),]
gencode_gene_df_selected$Width = gencode_gene_df_selected$End - gencode_gene_df_selected$Start

SNP <- gene_df_selected$Location
# defining the initial snps 
sample.gr <- GRanges("chr2", IRanges(SNP, width=1, names=SNP))
# adding features to the genomic regions
features <- GRanges("chr2", IRanges(gencode_gene_df_selected$Start, 
                                    width=gencode_gene_df_selected$Width,
                    fill = rep(c("#FF8833", "#51C6E6", "#DFA32D"), each = 1, length.out = length(gencode_gene_df_selected$Start)),
                    height = rep(c(0.025, 0.025), each = 1, length.out = length(gencode_gene_df_selected$Start))))
## adding colors to unique features
sample.gr$color <- gene_df_selected$obesity_type_color
# sample.gr$border <- sample(c("gray30", "gray30"), length(SNP), replace=TRUE)
sample.gr$alpha <- 1 #sample(100:255, length(SNP), replace = TRUE)/255
# ## adding index labels to nodes : not working
# sample.gr$node.label <- as.character(1:length(sample.gr))
# sample.gr$node.label.col <- 
#   ifelse(sample.gr$alpha>0.5 | sample.gr$color==1, "white", "black")
# sample.gr$node.label.cex <- 1
#Try a score value greater than 10
sample.gr$score <- gene_df_selected$Sample
# # add xticks
# xaxis <- c(1, 200, 400, 701, 1000, 1200, 1402)
## use list to define more attributes. see ?grid::gpar to get more details.
legend <- c("#ab162a", "#1e61a5", "#ba6598") ## legend fill color
names(legend) <- c("risk", "protection", "variably_expressive") ## legend labels
legend <- list(labels=c("risk", "protection", "variably_expressive") , 
               col=c("#ab162a", "#1e61a5", "#ba6598"), 
               fill=c("#ab162a", "#1e61a5", "#ba6598"))


pdf(file = "../data/variably_expressive/lolliplots/RYR3.pdf",   # The directory you want to save the file in
    width = 8, # The width of the plot in inches
    height = 6) # The height of the plot in inches
options(repr.plot.width=12, repr.plot.height=10)
lolliplot(sample.gr, features, legend=legend)
dev.off()

In [221]:
gene_df[13:32,]

Unnamed: 0_level_0,Sample,SYMBOL,Mut_type,Chrom,Location,Ref,Alt,obesity_type,obesity_type_color
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>
62,2,RYR3,missense,chr15,33662195,G,A,risk,#ab162a
63,1,RYR3,missense,chr15,33662280,G,A,protection,#1e61a5
64,7,RYR3,missense,chr15,33707052,A,C,risk,#ab162a
65,4,RYR3,missense,chr15,33722793,G,A,risk,#ab162a
66,3,RYR3,missense,chr15,33724099,G,A,variably_expressive,#ba6598
67,2,RYR3,missense,chr15,33738457,C,T,risk,#ab162a
68,1,RYR3,missense,chr15,33738516,G,A,risk,#ab162a
69,1,RYR3,missense,chr15,33738577,C,T,risk,#ab162a
70,8,RYR3,missense,chr15,33739987,C,G,variably_expressive,#ba6598
71,1,RYR3,missense,chr15,33742434,C,T,risk,#ab162a
