Skip to content

Commit

Permalink
Addressing the "no visible binding for global variable" warning
Browse files Browse the repository at this point in the history
- Changes to ensure that the warning is stopped
- Improved other issues, including variable names, provided data types, made internal functions

#2 #28 #27
  • Loading branch information
biodavidjm committed Sep 17, 2018
1 parent 0f7c7f6 commit 6683b63
Show file tree
Hide file tree
Showing 11 changed files with 170 additions and 45 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: artMS
Type: Package
Title: Analytical R tools for Mass Spectrometry
Version: 0.99.5
Version: 0.99.6
Date: 2018-09-13
Author: David Jimenez-Morales, John Von Dollen, Alexandre Rosa Campos
Maintainer: David Jimenez-Morales <biodavidjm@gmail.com>
Expand Down Expand Up @@ -53,6 +53,7 @@ Imports:
org.Pf.plasmo.db,
org.Pt.eg.db,
org.Rn.eg.db,
org.Sc.sgd.db,
org.Ss.eg.db,
org.Xl.eg.db,
PerformanceAnalytics,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import(org.Mmu.eg.db)
import(org.Pf.plasmo.db)
import(org.Pt.eg.db)
import(org.Rn.eg.db)
import(org.Sc.sgd.db)
import(org.Ss.eg.db)
import(org.Xl.eg.db)
import(pheatmap)
Expand Down Expand Up @@ -105,6 +106,7 @@ importFrom(stats,order.dendrogram)
importFrom(stats,phyper)
importFrom(tidyr,unnest)
importFrom(utils,combn)
importFrom(utils,globalVariables)
importFrom(utils,head)
importFrom(utils,read.delim)
importFrom(utils,setTxtProgressBar)
Expand Down
10 changes: 5 additions & 5 deletions R/MSstats_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,9 @@ artms_mergeMaxQDataWithKeys <- function(data, keys, by=c('RawFile')){
#' @return (data.frame) with both evidence and keys files merged by raw.files
#' @keywords internal, merge, evidence, keys
#' @examples \donttest{
#' evidenceKeys <- artms_mergeEvidenceKeysByFiles(
#' evidence_file = "FLU-THP1-H1N1-AB-evidence.txt",
#' keys_file = "FLU-THP1-H1N1-AB-keys.txt")
#' evidenceKeys <- artms_mergeEvidenceKeysByFiles(
#' evidence_file = "FLU-THP1-H1N1-AB-evidence.txt",
#' keys_file = "FLU-THP1-H1N1-AB-keys.txt")
#' }
#' @export
artms_mergeEvidenceKeysByFiles <- function(evidence_file, keys_file) {
Expand Down Expand Up @@ -400,8 +400,8 @@ artms_resultsWide <- function(evidence_file, output_file){
.artms_significantHits <- function(mss_results, labels='*', LFC=c(-2,2), FDR=0.05){
## get subset based on labels
selected_results = mss_results[grep(labels,mss_results$Label), ]
cat(sprintf('>> AVAILABLE LABELS FOR HEATMAP:\n %s\n',paste(unique(mss_results$Label), collapse=', ')))
cat(sprintf('>> SELECTED LABELS FOR HEATMAP:\n %s\n',paste(unique(selected_results$Label), collapse=', ')))
# cat(sprintf('>> AVAILABLE LABELS FOR HEATMAP:\n %s\n',paste(unique(mss_results$Label), collapse=', ')))
# cat(sprintf('>> SELECTED LABELS FOR HEATMAP:\n %s\n',paste(unique(selected_results$Label), collapse=', ')))
significant_proteins = selected_results[(!is.na(selected_results$log2FC) & selected_results$adj.pvalue <= FDR & (selected_results$log2FC >= LFC[2] | selected_results$log2FC <= LFC[1])) , 'Protein']
significant_results = selected_results[selected_results$Protein %in% significant_proteins, ]
return(significant_results)
Expand Down
23 changes: 12 additions & 11 deletions R/analysisQuantifications.R
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ artms_analysisQuantifications <- function(log2fc_file,
abundancesName <- paste0("plot.",abundancesName)
abundancesName <- paste0(output_dir,"/",abundancesName)
pdf(abundancesName)
.artms_plotAbundanceBoxplots(dfmq)
.artms_plotNumberProteinsAbundance(dfmq)
.artms_plotAbundanceBoxplots(data = dfmq)
.artms_plotNumberProteinsAbundance(data = dfmq)
garbage <- dev.off()

# Reproducibility plots based on normalized abundance
Expand Down Expand Up @@ -336,7 +336,7 @@ artms_analysisQuantifications <- function(log2fc_file,
# TECHNICAL REPLICAS: if there are technical replicas means that we will find
# two values for the same protein in the same bioreplica, therefore we need to
# aggregate first just in case:
abundance <- aggregate(Abundance~Prey+Bait+Bioreplica, data = abundance, FUN = mean)
abundance <- aggregate(Abundance~Prey+Bait+BioReplicate, data = abundance, FUN = mean)

# Let's aggregate to get the sum of the abundance, we will use it later.
abundance_dcsum <- data.table::dcast(abundance, Prey~Bait, value.var = 'Abundance', fun.aggregate = sum, fill = 0 )
Expand Down Expand Up @@ -1221,10 +1221,10 @@ artms_generatePhSiteExtended <- function(df, pathogen, specie, ptmType){
# Take the abundance values for all the proteins
abu2imp <- .artms_loadModelqcBasic(dfmq)
# Aggregate the technical replica by choosing the maximum value
abu2imp2 <- aggregate(Abundance~Protein+Condition+Bioreplica, data = abu2imp, FUN = mean)
abu2imp2 <- aggregate(Abundance~Protein+Condition+BioReplicate, data = abu2imp, FUN = mean)

# Check things that will be imputed
# dfdc.ni <- data.table::dcast(data=abu2imp2, Protein~Bioreplica, value.var = "Abundance")
# dfdc.ni <- data.table::dcast(data=abu2imp2, Protein~BioReplicate, value.var = "Abundance")

# Two possible options here.
# 1. Select based on the bottom x%
Expand All @@ -1247,13 +1247,13 @@ artms_generatePhSiteExtended <- function(df, pathogen, specie, ptmType){
numbers2sample <- seq(from=theMin, to=theMax, by=.00001)

# dcast on abundance and fill with random numbers between the minimum and q05
suppressWarnings(dfdc.im <- data.table::dcast(data=abu2imp2, Protein~Bioreplica, value.var = "Abundance", fill = sample(numbers2sample, replace = F) ) )
suppressWarnings(dfdc.im <- data.table::dcast(data=abu2imp2, Protein~BioReplicate, value.var = "Abundance", fill = sample(numbers2sample, replace = F) ) )

# Needs to aggregate on biological replicas
# 1. Melt on biological replicas
dfdc.melt <- reshape2::melt(dfdc.im, id.vars = c('Protein'), value.name = 'Abundance', variable.name = 'Bioreplica')
dfdc.melt <- reshape2::melt(dfdc.im, id.vars = c('Protein'), value.name = 'Abundance', variable.name = 'BioReplicate')
# 2. Get the condition
dfdc.melt$Condition <- gsub("(.*)(-)(.*)","\\1",dfdc.melt$Bioreplica)
dfdc.melt$Condition <- gsub("(.*)(-)(.*)","\\1",dfdc.melt$BioReplicate)
# 3. Dcast and Aggregate on the condition, taking the mean
dfdc.final <- data.table::dcast(data=dfdc.melt, Protein~Condition, value.var = 'Abundance', fun.aggregate = mean)
# 4. Filter by proteins to impute
Expand Down Expand Up @@ -1326,9 +1326,10 @@ artms_generatePhSiteExtended <- function(df, pathogen, specie, ptmType){

#------------------------------------------------------------------------------
# @title Load the basic ModelQC file
#
# @param data (data.frame) of the ModelQC file
# @return (data.frame) of the modelqc file with the columns Protein, Abundance,
# Condition, Bioreplica
# Condition, BioReplicate
# @keywords internal, loading
.artms_loadModelqcBasic <- function(data){
if( length(grep(";",data$PROTEIN))>0 ) data <- data[-grep(";",data$PROTEIN),] # NOTE!!! We lose a lot of entries this way.
Expand All @@ -1351,12 +1352,12 @@ artms_generatePhSiteExtended <- function(df, pathogen, specie, ptmType){
stop("Abort mission\n!")
}
if("SUBJECT_ORIGINAL" %in% colnames(data)){
names(data)[grep("SUBJECT_ORIGINAL", names(data))] <- 'Bioreplica'
names(data)[grep("SUBJECT_ORIGINAL", names(data))] <- 'BioReplicate'
}else{
cat("ERROR: you should check the abundance file because something is seriously wrong!\n")
stop("Abort mission\n!")
}
data <- subset(data, select = c(Protein, Abundance, Condition, Bioreplica))
data <- subset(data, select = c(Protein, Abundance, Condition, BioReplicate))
return(data)
}

Expand Down
18 changes: 16 additions & 2 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#' @import org.Pf.plasmo.db
#' @import org.Pt.eg.db
#' @import org.Rn.eg.db
#' @import org.Sc.sgd.db
#' @import org.Ss.eg.db
#' @import org.Xl.eg.db
#' @importFrom PerformanceAnalytics chart.Correlation
Expand All @@ -47,10 +48,23 @@
#' @importFrom stats aggregate as.dendrogram cor dist fisher.test hclust kmeans median order.dendrogram phyper as.dist complete.cases
#' @import stringr
#' @importFrom tidyr unnest
#' @importFrom utils combn read.delim write.table setTxtProgressBar txtProgressBar head
#' @importFrom utils combn read.delim write.table setTxtProgressBar txtProgressBar head globalVariables
#' @import VennDiagram
#' @import yaml

utils::globalVariables(c("Modifications", "Protein", "Abundance", "Condition",
"BioReplicate", "Comparison", "Intensity", "log2FC", "Label", "RawFile",
"iLog2FC", "pvalue", "adj.pvalue", "iPvalue", "Prey", "ReproBioreplicaCount",
"ReproConditionCount", "category", "Specie", "AbMean", "Gene", "imputedDFext",
"pathogen.ids", "Contaminant", "TR", "MODIFICATION", "Proteins", "ComplexName",
"PTMone", "PTMsite", "output_dir", "isPtm", "log2fc_file",
"artms_data_corum_mito_database",
"SUBJECT_ORIGINAL", "ABUNDANCE", "IsotopeLabelType",
"GROUP_ORIGINAL", "SYMBOL", "GENENAME", "PROTEIN", "FEATURE",
"pearson", "..count..", "tr1", "tr2", "sample_name", "value", "prot_names",
"cluster", "ymax", "ymin", "uniprot_ac", "uniprot_id", "res_index", "ptm_site"
))

# ------------------------------------------------------------------------------
#' @title Relative quantification using MSstats
#'
Expand Down Expand Up @@ -212,7 +226,7 @@ artms_quantification <- function(yaml_config_file){
.artms_writeExtras(results$ComparisonResult, config)
}

cat(">> ANALYSIS COMPLETE! HAVE A NICE DAY :)\n")
cat("\nANALYSIS COMPLETE! HAVE A NICE DAY :)\n")
}


87 changes: 79 additions & 8 deletions R/plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
#' @return A ggplot2 correlation plot
#' @keywords plot, correlation
.artms_plotCorrelationDistribution <- function(MatrixCorrelations){
cor.data <- MatrixCorrelations[upper.tri(MatrixCorrelations, diag = FALSE)] # we're only interested in one of the off-diagonals, otherwise there'd be duplicates
cor.data <- as.data.frame(cor.data) # that's how ggplot likes it
# we're only interested in one of the off-diagonals,
# otherwise there'd be duplicates
cor.data <- MatrixCorrelations[upper.tri(MatrixCorrelations, diag = FALSE)]
cor.data <- as.data.frame(cor.data)
colnames(cor.data) <- "pearson"

g <- ggplot(data = cor.data, mapping = aes(x = pearson))
Expand Down Expand Up @@ -65,6 +67,76 @@ artms_dataPlots <- function(input_file, output_file){
garbarge <- dev.off()
}

# ------------------------------------------------------------------------------
#' @title Heatmap of significant values
#'
#' @description heatmap plot to represent proteins with significant changes
#' @param mss_F (data.frame) with the significant values (log2fc, pvalues)
#' @param out_file (char) Name for the output
#' @param labelOrder (vector) Vector with the particular order for the IDs
#' (default, `NULL` no order)
#' @param names (char) Type of ID used. Default is `Protein` (uniprot entry id).
#' Soon will be possible to use 'Gene' name ids.
#' @param cluster_cols (logical) Select whether to cluster the columns.
#' Options: `T` or `F`. Default `T`.
#' @param display (char) Value used to genarate the heatmaps. Options:
#' - `log2FC` (default)
#' - `adj.pvalue`
#' - `pvalue`
#' @return A heatmap of significant values
#' @keywords significant, heatmap
.artms_plotHeat <- function(mss_F, out_file, labelOrder=NULL, names='Protein', cluster_cols=F, display='log2FC'){
heat_data = data.frame(mss_F, names=names)

## create matrix from log2FC or p-value as user defined
if(display=='log2FC'){
# Issues with extreme_val later if we have Inf/-Inf values.
if( sum(is.infinite(heat_data$log2FC)) > 0 ){
idx <- is.infinite(heat_data$log2FC)
heat_data$log2FC[ idx ] <- NA
}
heat_data_w = dcast(names ~ Label, data=heat_data, value.var='log2FC')
}else if(display=='adj.pvalue'){
heat_data$adj.pvalue = -log10(heat_data$adj.pvalue+10^-16)
heat_data_w = dcast(names ~ Label, data=heat_data, value.var='adj.pvalue')
}else if(display=='pvalue'){
heat_data$pvalue = -log10(heat_data$pvalue+10^-16)
heat_data_w = dcast(names ~ Label, data=heat_data, value.var='pvalue')
}

## try
#gene_names = uniprot_to_gene_replace(uniprot_ac=heat_data_w$Protein)
rownames(heat_data_w) = heat_data_w$names
heat_data_w = heat_data_w[,-1]
heat_data_w[is.na(heat_data_w)]=0
max_val = ceiling(max(heat_data_w))
min_val = floor(min(heat_data_w))
extreme_val = max(max_val, abs(min_val))
if(extreme_val %% 2 != 0) extreme_val=extreme_val+1
bin_size=2
signed_bins = (extreme_val/bin_size)
colors_neg = rev(colorRampPalette(brewer.pal("Blues",n=extreme_val/bin_size))(signed_bins))
colors_pos = colorRampPalette(brewer.pal("Reds",n=extreme_val/bin_size))(signed_bins)
colors_tot = c(colors_neg, colors_pos)

if(is.null(labelOrder)){
pheatmap(heat_data_w, scale="none", cellheight=10,
cellwidth=10, filename =out_file, color=colors_tot,
breaks=seq(from=-extreme_val, to=extreme_val, by=bin_size),
cluster_cols=cluster_cols, fontfamily="mono")
cat("--- Heatmap is out\n")
}else{
heat_data_w <- heat_data_w[,labelOrder]
pheatmap(heat_data_w, scale="none", cellheight=10, cellwidth=10,
filename=out_file, color=colors_tot,
breaks=seq(from=-extreme_val, to=extreme_val, by=bin_size),
cluster_cols=cluster_cols, fontfamily="mono")
cat("--- Heatmap is out\n")
}

return(heat_data_w)
}

# ------------------------------------------------------------------------------
#' @title Outputs a heatmap of the MSStats results created using the log2fold
#' changes
Expand Down Expand Up @@ -317,12 +389,11 @@ artms_plotHeatmapQuant <- function(input_file,
#' @param data (data.frame) modelqc
#' @return (pdf) Barplots with the number of proteins per br / condition
#' @keywords internal, plots, abundance, counts
#' .artms_plotNumberProteinsAbundance()
artms_plotNumberProteinsAbundance <- function(data) {
.artms_plotNumberProteinsAbundance <- function(data) {
x <- data[c('PROTEIN','SUBJECT_ORIGINAL')]
y <- unique(x)
names(y)[grep('SUBJECT_ORIGINAL', names(y))] <- 'BioReplicates'
z <- ggplot2::ggplot(y, aes(x = BioReplicates, fill = BioReplicates))
names(y)[grep('SUBJECT_ORIGINAL', names(y))] <- 'BioReplicate'
z <- ggplot2::ggplot(y, aes(x = BioReplicate, fill = BioReplicate))
z <- z + geom_bar(stat = "count")
z <- z + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
z <- z + geom_text(stat='count', aes(label=..count..), vjust=-0.5, size = 2.7)
Expand All @@ -331,8 +402,8 @@ artms_plotNumberProteinsAbundance <- function(data) {

a <- data[c('PROTEIN','GROUP_ORIGINAL')]
b <- unique(a)
names(b)[grep('GROUP_ORIGINAL', names(b))] <- 'Conditions'
c <- ggplot2::ggplot(b, aes(x = Conditions, fill = Conditions))
names(b)[grep('GROUP_ORIGINAL', names(b))] <- 'Condition'
c <- ggplot2::ggplot(b, aes(x = Condition, fill = Condition))
c <- c + geom_bar(stat = "count")
c <- c + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
c <- c + geom_text(stat='count', aes(label=..count..), vjust=-0.5, size = 2.7)
Expand Down
9 changes: 5 additions & 4 deletions R/runMSstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' - MSstats `designSampleSize` power experiment
#' @keywords internal, run, MSstats, contrast, intensity, plots, QC
.artms_runMSstats <- function(dmss, contrasts, config){
cat(">> MSstats STARTS RUNNING:\n")
# plot the data BEFORE normalization
if(grepl('before', config$msstats$profilePlots)){
mssquant = dataProcess(raw = dmss,
Expand Down Expand Up @@ -51,7 +52,7 @@
MBimpute = config$msstats$MBimpute,
featureSubset=config$msstats$feature_subset)
}else{
cat(sprintf('\n>> NORMALIZATION\t%s\n',config$msstats$normalization_method))
cat(sprintf('\n--- NORMALIZATION METHOD: %s\n',config$msstats$normalization_method))
#mssquant = dataProcess(dmss, normalization=config$msstats$normalization_method , fillIncompleteRows = F, betweenRunInterferenceScore = F)
mssquant = dataProcess(raw = dmss,
normalization=config$msstats$normalization_method,
Expand Down Expand Up @@ -81,17 +82,17 @@

cat(sprintf('\tFITTING CONTRASTS:\t%s\n',paste(rownames(contrasts),collapse=',')))
write.table(mssquant$ProcessedData, file=gsub('.txt','-mss-normalized.txt',config$files$output), eol="\n", sep="\t", quote=F, row.names=F, col.names=T)
results = groupComparison(data = mssquant, contrast.matrix = contrasts)
results <- groupComparison(data = mssquant, contrast.matrix = contrasts)
write.table(results$ComparisonResult, file=config$files$output, eol="\n", sep="\t", quote=F, row.names=F, col.names=T)
write.table(results$ModelQC, file=gsub(".txt","_ModelQC.txt",config$files$output), eol="\n", sep="\t", quote=F, row.names=F, col.names=T)
cat(sprintf(">> WRITTEN\t%s\n",config$files$output))
cat(">> MSstats IS DONE!\n")

#(1) Minimal number of biological replicates per condition
cat(">> CALCULATING SAMPLE SIZE FOR FUTURE EXPERIMENTS\n" )
results.ss1 <- designSampleSize(data=results$fittedmodel,numSample=TRUE,desiredFC=c(1.25,2),FDR=0.05,power=0.95)
results.ss2 <- designSampleSize(data=results$fittedmodel,numSample=TRUE,desiredFC=c(1.25,2),FDR=0.05,power=0.9)
results.ss3 <- designSampleSize(data=results$fittedmodel,numSample=TRUE,desiredFC=c(1.25,2),FDR=0.05,power=0.8)
results.ss = rbind( results.ss1, results.ss2, results.ss3)
results.ss <- rbind( results.ss1, results.ss2, results.ss3)
write.table(results.ss, file=gsub(".txt","_sampleSize.txt",config$files$output), eol="\n", sep="\t", quote=F, row.names=F, col.names=T)

#(2) Power calculation
Expand Down
12 changes: 5 additions & 7 deletions R/writeExtras.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,22 @@
sign_labels <- unique(sign_hits$Label)
cat(sprintf("\tSELECTED HITS FOR PLOTS WITH LFC BETWEEN %s AND %s AT %s FDR:\t%s\n",lfc_lower, lfc_upper, config$output_extras$FDR, nrow(sign_hits)/length(sign_labels)))

cat(paste("output_file: ", config$files$output, "\n"))

## REPRESENTING RESULTS AS HEATMAP
if(config$output_extras$heatmap){
## plot heat map for all contrasts
cat(">> PLOTTING HEATMAP FOR ALL CONTRASTS\n")
cat(">> PLOTTING HEATMAP FOR SIGNIFICANT CHANGES\n")
heat_labels <- .artms_prettyPrintHeatmapLabels(uniprot_acs=sign_hits$Protein,uniprot_ids=sign_hits$name, gene_names=sign_hits$Gene.names)
heat_data_w <- plotHeat(mss_F = sign_hits, out_file = gsub('.txt','-sign.pdf',config$files$output), names=heat_labels, cluster_cols=config$output_extras$heatmap_cluster_cols, display = config$output_extras$heatmap_display)
heat_data_w <- .artms_plotHeat(mss_F = sign_hits, out_file = gsub('.txt','-sign.pdf',config$files$output), names=heat_labels, cluster_cols=config$output_extras$heatmap_cluster_cols, display = config$output_extras$heatmap_display)
}

if(config$output_extras$volcano){
cat(">> PLOTTING VOLCANO PLOT\n")
cat(">> PLOTTING VOLCANO PLOT\n")
file_name <- gsub('.txt','-volcano.pdf',config$files$output)
artms_volcanoPlot(results_ann[grep(selected_labels,results_ann$Label),], lfc_upper, lfc_lower, FDR=config$output_extras$FDR, file_name=file_name)
}
}

#

# ------------------------------------------------------------------------------
#' @title Annotate the files based on the Uniprot accession id
#' @description Annotate the files based on the Uniprot accession id
Expand All @@ -81,7 +79,7 @@
Uniprot = NULL
for(org in species_split){
cat(sprintf("\tLOADING %s\n",org))
tmp <- read.delim(sprintf("%s/uniprot_protein_descriptions_%s.txt",uniprot_dir,org), stringsAsFactors=F, quote="")
tmp <- read.delim(sprintf("%s/uniprot_protein_descriptions_%s.txt", uniprot_dir,org), stringsAsFactors=F, quote="")
if(is.null(Uniprot)){
Uniprot = as.data.frame(tmp)
}else{
Expand Down
6 changes: 3 additions & 3 deletions man/artms_mergeEvidenceKeysByFiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 6683b63

Please sign in to comment.