From 7d62553f110a978007c3ef05fcec5fc9e0b45f62 Mon Sep 17 00:00:00 2001 From: Marcelo Ponce Date: Sun, 9 Feb 2020 15:50:09 +0000 Subject: [PATCH] version 1.0 --- DESCRIPTION | 19 +++ MD5 | 10 ++ NAMESPACE | 4 + R/bioC_logs.R | 98 +++++++++++++ README.md | 59 ++++++++ build/vignette.rds | Bin 0 -> 206 bytes inst/doc/bioC.logs.R | 6 + inst/doc/bioC.logs.Rmd | 22 +++ inst/doc/bioC.logs.html | 308 ++++++++++++++++++++++++++++++++++++++++ man/bioC_downloads.Rd | 27 ++++ vignettes/bioC.logs.Rmd | 22 +++ 11 files changed, 575 insertions(+) create mode 100644 DESCRIPTION create mode 100644 MD5 create mode 100644 NAMESPACE create mode 100644 R/bioC_logs.R create mode 100644 README.md create mode 100644 build/vignette.rds create mode 100644 inst/doc/bioC.logs.R create mode 100644 inst/doc/bioC.logs.Rmd create mode 100644 inst/doc/bioC.logs.html create mode 100644 man/bioC_downloads.Rd create mode 100644 vignettes/bioC.logs.Rmd diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..b516dbb --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,19 @@ +Package: bioC.logs +Type: Package +Title: BioConductor Package Downloads Stats +Version: 1.0 +Date: 2020-01-30 +Author: Marcelo Ponce [aut, cre] +Maintainer: Marcelo Ponce +Description: Download stats reports from the BioConductor.org stats website. +Imports: +Suggests: knitr, devtools, roxygen2, testthat +License: GPL (>= 2) +URL: https://github.com/mponce0/bioC.logs +BugReports: https://github.com/mponce0/bioC.logs/issues +RoxygenNote: 6.1.99.9001 +VignetteBuilder: knitr +NeedsCompilation: no +Packaged: 2020-01-29 17:35:25 UTC; marcelo +Repository: CRAN +Date/Publication: 2020-02-09 16:50:09 UTC diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..0015ebb --- /dev/null +++ b/MD5 @@ -0,0 +1,10 @@ +db052d72f4bcfc140af23549ba03869a *DESCRIPTION +04745393c053f4bdc0ee97deb4e3c010 *NAMESPACE +88120b57df9509f28d27ed96b6537da3 *R/bioC_logs.R +5bddfb0fc14039966bc23db9a3e1b016 *README.md +c24c7ae9e7bbb60722f5c5fdd6d7841c *build/vignette.rds +3c9e2d06e8560272e30c98b0c827478b *inst/doc/bioC.logs.R +5110dbd41904072d7d9d356009f9f177 *inst/doc/bioC.logs.Rmd +d3eef214a95bea5353ed7eda390e1b9c *inst/doc/bioC.logs.html +706eb66f1ed2a9b8ea80254cd78e985c *man/bioC_downloads.Rd +5110dbd41904072d7d9d356009f9f177 *vignettes/bioC.logs.Rmd diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..5846712 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,4 @@ +# Generated by roxygen2: do not edit by hand + +export(bioC_downloads) +importFrom(utils,read.table) diff --git a/R/bioC_logs.R b/R/bioC_logs.R new file mode 100644 index 0000000..9b11626 --- /dev/null +++ b/R/bioC_logs.R @@ -0,0 +1,98 @@ +bioC_downloads <- function(pckg=NULL, format="bioC", verbose=TRUE) { +#' function to download logs from bioConductor stats +#' @param pckg list of packages names +#' @param format two options: "bioC" (default) will report the downloads as reported by bioconductor, ie. "Year Month Nb_of_distinct_IPs Nb_of_downloads"; or, "CRAN" will report as CRAN logs does, ie. "Date Nb_of_downloads package_Name" +#' @param verbose boolean flag indicating whether to print information about the processes... +#' +#' @return a list containing a dataframe per package entered with columns as indicated by the format argument +#' +#' @importFrom utils read.table +#' @export +#' +#' @examples +#' bioC_downloads(c("ABarray","a4Classif")) +#' bioC_downloads("edgeR",verbose=FALSE) +#' edgeR.logs <- bioC_downloads("edgeR",format="CRAN") +#' + + ## function for error handling + errorHandling.Msg <- function(condition,pck) { + message("A problem was detected when trying to retrieve the data for the package: ",pck) + if (grepl("404 Not Found",condition)) { + message("It is possible that you misspeled the name of this package! Please check!") + } else { + message("It is possible that your internet connection is down! Please check!") + } + message(condition,'\n') + + # update problems counter + pkg.env$problems <- pkg.env$problems + 1 + } + + + # Define bioConductor URL and file ending + bioC.url <- "http://bioconductor.org/packages/stats/bioc/" + ending <- "_stats.tab" + + # initialize container for results + pckgs.stats <-c() + + # check valid argument + if (is.null(pckg) || !is.character(pckg)) { + warning("Must specify a valid package name!") + return(NULL) + } + + # Counter for detection or problems, defined within the pckg environ to avoid global variables, ie. <<- + pkg.env <- new.env() + pkg.env$problems <- 0 + + # process package list + for (i in seq_along(pckg)) { + pck <- pckg[i] + + pckgFile <- paste0(pck,'/',pck,ending) + pckg.URL <- paste0(bioC.url,pckgFile) + + # Attempt to protect against bad internet conenction or misspelled package name + tryCatch( + { + pckg.data <- read.table(pckg.URL, header=TRUE) + + if (format=="CRAN") { + if (i==1 && verbose) message("Data will be returned as 'date downloads packageName'") + + # clean the entrie 'all' totals per year... + clean.data <- pckg.data[!as.character(pckg.data$Month)=="all",] + new.df <- data.frame(date=as.Date(paste0("28",clean.data$Month,clean.data$Year), "%d%b%Y"), + downloads=as.numeric(clean.data$Nb_of_downloads), package=pck) + pckg.data <- new.df + } + + pckgs.stats[[i]] <- pckg.data + }, + + # warning + warning = function(cond) { + errorHandling.Msg(cond,pck) + }, + # error + error = function(e){ + errorHandling.Msg(e,pck) + } + ) + } + + # final report + if (verbose) { + message(paste(i-pkg.env$problems)," packages processed from a total of ",i," requests!") + if ((i-pkg.env$problems) != 0) message("Data was retrieved from ",bioC.url," using the *",format,"* format.") + + # problems report + if (pkg.env$problems != 0) + message(pkg.env$problems," problems detected, the associated entry will be set to NULL") + } + + # return results + return(pckgs.stats) +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..e87e9b4 --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +# bioC.logs + +## Introduction +This package allows you to download the statistics of BioConductor packages' +downloads as reported by http://bioconductor.org/packages/stats/. + + +## Usage +The main function of this package is called ```bioC_downloads```. +The function accepts several arguments: `packages names`, `format` and `verbose`. + +argument | Description +-----------|--------------- +`packages names` | is the name(s) of the package(s) you want to download the stats, for multiple package it should be a list of the packages names +`format` | accepts two options: `"bioC"` (default) will report the downloads as reported by bioconductor, ie. *"Year Month Nb_of_distinct_IPs Nb_of_downloads"*; or, `"CRAN"` will report as CRAN logs does, ie. *"Date Nb_of_downloads package_Name"* +`verbose` | is a boolean flag indicating whether to print information about the processes +--------------------------- + +* The function will return a list containing a dataframe per package entered with columns as indicated by the `format` argument +Notice that when the `format` is set to "CRAN", the date will be formatted to days-month-year. Because BioConductor reports only totals per month the "day" in this case will be set to **28** for every month. + +* The function will also attempt to report when a package names has been misspelled or just the server is not reachable. +If you are receiving warning messages please check either of these situations. + + + +## Installation + +For using the "bioC.logs" package, first you will need to install it. + +Thes table version can be downloaded from the CRAN repository: +``` +install.packages("bioC.logs") +``` + +To obtain the development version you can get it from the github repository, i.e. +``` +# need devtools for installing from the github repo +install.packages("devtools") + +# install bioC.logs +devtools::install_github("mponce0/bioC.logs") + +# load bioC.logs +library(bioC.logs) +``` + + +## Examples +You will need an active internet connection, as bioC.logs will download the +reports from the BioConductor website on demand. + +``` +bioC_downloads(c("ABarray","a4Classif")) + +bioC_downloads("edgeR",verbose=FALSE) + +edgeR.logs <- bioC_downloads("edgeR",format="CRAN") +``` diff --git a/build/vignette.rds b/build/vignette.rds new file mode 100644 index 0000000000000000000000000000000000000000..da369dceaf735737c18891c7bf289492ca19856a GIT binary patch literal 206 zcmV;<05Sg`iwFP!000001B>8dU|?WkU}j}zU}6R`nT3G_8xRWsF(U&D11FH?P0Gx7 z*2~FHFV+jnO+k|sgi9&}BqnDkrl+DQ;6v!nD9O!1ljTM-8|V^YuoOfI5U?;IDP_$| z%uOvu)6edbT9BHT0uug*T|eW0G%Xz7sg>pVMKC=mYS^G^SllvmQepOnWR}1=%mFTL zP$pv#x*IUeM{z%AQGU4|%n!VvaBlz-|A7GAqscjm#poX8N=Yn9)JrP@>Ib + %\VignetteIndexEntry{bioC.logs Package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} + %\usepackage[UTF-8]{inputenc} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r child = "../README.md"} +``` + diff --git a/inst/doc/bioC.logs.html b/inst/doc/bioC.logs.html new file mode 100644 index 0000000..392da00 --- /dev/null +++ b/inst/doc/bioC.logs.html @@ -0,0 +1,308 @@ + + + + + + + + + + + + + + + + + +bioC.logs: Stats logs from BioConductor packages downloads + + + + + + + + + + + + + + + + + + +

bioC.logs: Stats logs from BioConductor packages downloads

+

Marcelo Ponce

+

2020-01-29

+ + + +
+

bioC.logs

+
+

Introduction

+

This package allows you to download the statistics of BioConductor packages’ downloads as reported by http://bioconductor.org/packages/stats/.

+
+
+

Usage

+

The main function of this package is called bioC_downloads. The function accepts several arguments: packages names, format and verbose.

+ ++++ + + + + + + + + + + + + + + + + + + + + +
argumentDescription
packages namesis the name(s) of the package(s) you want to download the stats, for multiple package it should be a list of the packages names
formataccepts two options: "bioC" (default) will report the downloads as reported by bioconductor, ie. “Year Month Nb_of_distinct_IPs Nb_of_downloads”; or, "CRAN" will report as CRAN logs does, ie. “Date Nb_of_downloads package_Name”
verboseis a boolean flag indicating whether to print information about the processes
+
+
    +
  • The function will return a list containing a dataframe per package entered with columns as indicated by the format argument Notice that when the format is set to “CRAN”, the date will be formatted to days-month-year. Because BioConductor reports only totals per month the “day” in this case will be set to 28 for every month.

  • +
  • The function will also attempt to report when a package names has been misspelled or just the server is not reachable. If you are receiving warning messages please check either of these situations.

  • +
+
+
+

Installation

+

For using the “bioC.logs” package, first you will need to install it.

+

Thes table version can be downloaded from the CRAN repository:

+
install.packages("bioC.logs")
+

To obtain the development version you can get it from the github repository, i.e.

+
# need devtools for installing from the github repo
+install.packages("devtools")
+
+# install bioC.logs
+devtools::install_github("mponce0/bioC.logs")
+
+# load bioC.logs
+library(bioC.logs)
+
+
+

Examples

+

You will need an active internet connection, as bioC.logs will download the reports from the BioConductor website on demand.

+
bioC_downloads(c("ABarray","a4Classif"))
+
+bioC_downloads("edgeR",verbose=FALSE)
+
+edgeR.logs <- bioC_downloads("edgeR",format="CRAN")
+
+
+ + + + + + + + + + + diff --git a/man/bioC_downloads.Rd b/man/bioC_downloads.Rd new file mode 100644 index 0000000..f7630cd --- /dev/null +++ b/man/bioC_downloads.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bioC_logs.R +\name{bioC_downloads} +\alias{bioC_downloads} +\title{function to download logs from bioConductor stats} +\usage{ +bioC_downloads(pckg = NULL, format = "bioC", verbose = TRUE) +} +\arguments{ +\item{pckg}{list of packages names} + +\item{format}{two options: "bioC" (default) will report the downloads as reported by bioconductor, ie. "Year Month Nb_of_distinct_IPs Nb_of_downloads"; or, "CRAN" will report as CRAN logs does, ie. "Date Nb_of_downloads package_Name"} + +\item{verbose}{boolean flag indicating whether to print information about the processes...} +} +\value{ +a list containing a dataframe per package entered with columns as indicated by the format argument +} +\description{ +function to download logs from bioConductor stats +} +\examples{ +bioC_downloads(c("ABarray","a4Classif")) +bioC_downloads("edgeR",verbose=FALSE) +edgeR.logs <- bioC_downloads("edgeR",format="CRAN") + +} diff --git a/vignettes/bioC.logs.Rmd b/vignettes/bioC.logs.Rmd new file mode 100644 index 0000000..3a6120e --- /dev/null +++ b/vignettes/bioC.logs.Rmd @@ -0,0 +1,22 @@ +--- +title: "bioC.logs: Stats logs from BioConductor packages downloads" +author: "Marcelo Ponce" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{bioC.logs Package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} + %\usepackage[UTF-8]{inputenc} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r child = "../README.md"} +``` +