From 98cbd286d8b5883acbbafe8c08d64d2404d0b6a6 Mon Sep 17 00:00:00 2001 From: Marcelo Ponce Date: Thu, 13 Feb 2020 14:30:07 +0000 Subject: [PATCH] version 1.1 --- DESCRIPTION | 8 +- MD5 | 25 +++-- NAMESPACE | 1 + NEWS | 2 + R/auxs-utils.R | 226 ++++++++++++++++++++++++++++++++++++++++ R/bioC_logs.R | 169 ++++++++++++++++++++++++++---- README.md | 47 ++++++++- inst/doc/bioC.logs.html | 64 ++++++++++-- man/Xyear.from.now.Rd | 12 +++ man/bioC_downloads.Rd | 26 ++++- man/checkDate.Rd | 15 +++ man/checkDates.Rd | 20 ++++ man/checkValidDates.Rd | 20 ++++ man/daysInMonth.Rd | 18 ++++ man/last.day.month.Rd | 18 ++++ man/lst.year.Rd | 12 +++ man/origin.of.times.Rd | 12 +++ man/today.Rd | 12 +++ man/year.from.now.Rd | 12 +++ man/year.to.date.Rd | 12 +++ 20 files changed, 682 insertions(+), 49 deletions(-) create mode 100644 NEWS create mode 100644 R/auxs-utils.R create mode 100644 man/Xyear.from.now.Rd create mode 100644 man/checkDate.Rd create mode 100644 man/checkDates.Rd create mode 100644 man/checkValidDates.Rd create mode 100644 man/daysInMonth.Rd create mode 100644 man/last.day.month.Rd create mode 100644 man/lst.year.Rd create mode 100644 man/origin.of.times.Rd create mode 100644 man/today.Rd create mode 100644 man/year.from.now.Rd create mode 100644 man/year.to.date.Rd diff --git a/DESCRIPTION b/DESCRIPTION index b516dbb..5dbffb9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: bioC.logs Type: Package Title: BioConductor Package Downloads Stats -Version: 1.0 -Date: 2020-01-30 +Version: 1.1 +Date: 2020-02-10 Author: Marcelo Ponce [aut, cre] Maintainer: Marcelo Ponce Description: Download stats reports from the BioConductor.org stats website. @@ -14,6 +14,6 @@ BugReports: https://github.com/mponce0/bioC.logs/issues RoxygenNote: 6.1.99.9001 VignetteBuilder: knitr NeedsCompilation: no -Packaged: 2020-01-29 17:35:25 UTC; marcelo +Packaged: 2020-02-12 17:03:11 UTC; marcelo Repository: CRAN -Date/Publication: 2020-02-09 16:50:09 UTC +Date/Publication: 2020-02-13 15:30:07 UTC diff --git a/MD5 b/MD5 index 0015ebb..d94dbfb 100644 --- a/MD5 +++ b/MD5 @@ -1,10 +1,23 @@ -db052d72f4bcfc140af23549ba03869a *DESCRIPTION -04745393c053f4bdc0ee97deb4e3c010 *NAMESPACE -88120b57df9509f28d27ed96b6537da3 *R/bioC_logs.R -5bddfb0fc14039966bc23db9a3e1b016 *README.md +cec4d32abf0e3d070a0b76b14394256f *DESCRIPTION +742ae3d1e47c80c1faf14b35923ae165 *NAMESPACE +6d593c08f881f2ef265bdab68ab34f03 *NEWS +99a5693c331121b6f5ba8e4139d53dc0 *R/auxs-utils.R +9ea96fcb093cadae59642a10cc8baafd *R/bioC_logs.R +857fe7fc8e1ff3ee0ed826625d077bcb *README.md c24c7ae9e7bbb60722f5c5fdd6d7841c *build/vignette.rds 3c9e2d06e8560272e30c98b0c827478b *inst/doc/bioC.logs.R 5110dbd41904072d7d9d356009f9f177 *inst/doc/bioC.logs.Rmd -d3eef214a95bea5353ed7eda390e1b9c *inst/doc/bioC.logs.html -706eb66f1ed2a9b8ea80254cd78e985c *man/bioC_downloads.Rd +83e74db85b382b6011b9e5e70c8f35f8 *inst/doc/bioC.logs.html +c2542c3889883ceeb60365696d671b92 *man/Xyear.from.now.Rd +d939c259b47a3f438d210cb5127527dd *man/bioC_downloads.Rd +58b7a6c109ebcfdf9d2578aa21bed243 *man/checkDate.Rd +72e1011f8b3c82df6458a83bdb8b034a *man/checkDates.Rd +8146f5833d2382c033c3c26239e521d6 *man/checkValidDates.Rd +206ebc429371876226b03db58715041a *man/daysInMonth.Rd +b9ed70eb152c62618e6b96e65f73a867 *man/last.day.month.Rd +0fe5347f1b041d058ec053cfbc9d7180 *man/lst.year.Rd +87c66280907da3c404fb9a9f095a7ef6 *man/origin.of.times.Rd +ae5ca6247aff0015022d53fdb964bd00 *man/today.Rd +0f9e38ce24a066f6e689e4cf66c06b87 *man/year.from.now.Rd +c1b77474fcefb739d9d48ae274b09d2a *man/year.to.date.Rd 5110dbd41904072d7d9d356009f9f177 *vignettes/bioC.logs.Rmd diff --git a/NAMESPACE b/NAMESPACE index 5846712..3ef82c2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,5 @@ # Generated by roxygen2: do not edit by hand export(bioC_downloads) +importFrom(stats,na.omit) importFrom(utils,read.table) diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..137e238 --- /dev/null +++ b/NEWS @@ -0,0 +1,2 @@ +February 2020: added options for indicating range of dates (to/from & when); adaptive last day of the month; more checks from CRAN format (remove future dates) _ ver 1.1 +February 2020: First release of "bioC.logs" package _ ver 1.0 diff --git a/R/auxs-utils.R b/R/auxs-utils.R new file mode 100644 index 0000000..c16b658 --- /dev/null +++ b/R/auxs-utils.R @@ -0,0 +1,226 @@ +# auxs-utils.R +# -- M.Ponce + + +################################################################################################# +## Auxiliary Utilities file for the bioC.logs package +# +# Generic auxiliary functions +# +################################################################################################# + + +### date's aux fns +year.to.date <- function(){ +#' function that returns the range dates for the period year-to-date +#' @keywords internal + cur.date <- Sys.Date() + + cur.year <- substr(cur.date,1,4) + return(list(paste(cur.year,"01","01",sep='-'),as.character(cur.date))) +} + +lst.year <- function() { +#' function that returns the range dates for the last year +#' @keywords internal + cur.date <- Sys.Date() + + last.year <- as.integer(substr(cur.date,1,4))-1 + return(list(paste(last.year,"01","01",sep='-'),paste(last.year+1,"01","01",sep='-'))) + +} + +year.from.now <- function() { +#' function that returns the date from one year ago +#' @keywords internal + cur.date <- Sys.Date() + cur.year <- substr(cur.date,1,4) + lst.year <- as.integer(cur.year) - 1 + t0 <- paste(lst.year,substr(cur.date,5,10),sep="") + + return(list(t0,cur.date)) +} + +today <- function() { +#' function that returns the current date +#' @keywords internal + t1 <- Sys.Date() + # Will substract 1 to do not consider today, but yesterday + # this matches the definition from cran_downloads too + t1 <- t1 - 1 + message("Ending date was not specifed, will assume today: ",t1) + return(format(t1,format="%d-%m-%Y")) +} + +origin.of.times <- function() { +#' function that provides a "beginning of times" default date +#' @keywords internal + t0 <- as.Date("1980-01-01",format="%Y-%m-%d") + message("Initial date was not specified, will assume:", t0) + return(format(t0,format="%d-%m-%Y")) +} + + + +Xyear.from.now <- function() { +#' function that returns the date from one year ago +#' @keywords internal + cur.date <- Sys.Date()-1 + cur.year <- substr(cur.date,1,4) + lst.year <- as.integer(cur.year) - 1 + t0 <- paste(lst.year,substr(cur.date,5,10),sep="") + + message("Starting date was not specified, will assume a year from now: ",t0) + return(t0) +} + +checkDates <- function(t0,t1) { +#' function to check dates, ie that t0 as.Date(t1)) { + # flip dates, t0 will be set to the older date + ttemp <- t0 + t0 <- t1 + t1 <- ttemp + } else if (as.Date(t0) == as.Date(t1)) { + # dates should be different + stop(t0," and ", t1," should be different!") + } + + return(list(t0,t1)) +} + +checkDate <- function(date.candidate) { +#' function to check date format for "MM-YYYY" +#' @param date.candidate date candidate +#' +#' +#' @keywords internal + + # bisecting month and year... + MM <- as.integer(substr(date.candidate,1,2)) + YYYY <- as.integer(substr(date.candidate,4,7)) + + if ( !is.numeric(MM) | ((MM<1) | (MM>12)) ) + stop(paste("Problem detected with date: ",date.candidate,"\n","Dates should be specified in 'MM-YYYY' format, with MM from '01' to '12'")) + + if (!is.numeric(YYYY)) + stop(paste("Problem detected with date: ",date.candidate,"\n","Dates should be specified in 'MM-YYYY' format, with MM from '01' to '12'")) +} + + +checkValidDates <- function(t0,t1) { +#' function to check dates, ie that t0 date.range[[1]]) & (pckg.dates < date.range[[2]])),]) +# } else { +# message("Unrecognized option for argument 'when', valid ones are: ", paste(when.opts,collapse=" ")) +# } } - pckgs.stats[[i]] <- pckg.data + + # reformat data into CRAN format + if (format=="CRAN") { + if (ind.pckg==1 && verbose) message("Data will be returned as 'date counts packageName'") + + # clean the entrie 'all' totals per year... + clean.data <- pckg.data[!as.character(pckg.data$Month)=="all",] + # set the date to the last day of the month + day.assign <- sapply(paste(clean.data$Month,clean.data$Year,sep='-'),last.day.month) + pckg.dates <- paste(clean.data$Year,clean.data$Month,day.assign,sep='-') + + #new.df <- data.frame( date=as.Date(paste0(day.assign,clean.data$Month,clean.data$Year), "%d%b%Y"), + new.df <- data.frame( date=as.Date(pckg.dates, '%Y-%b-%d'), + #.#.#new.df <- data.frame( date=as.Date(paste0(last.day.month(clean.data$Month,clean.data$Year),clean.data$Month,clean.data$Year), "%d%b%Y"), + # in cranlogs 'Nb_of_downloads' are referred as 'counts' + counts=as.numeric(clean.data$Nb_of_downloads), + package=pck ) + + # in cranlogs, also the dates are ordered... + new.df <- new.df[order(as.Date(new.df$date,format='%Y-%b-%d')),] + pckg.data <- new.df + } + + pckgs.stats[[ind.pckg]] <- pckg.data }, # warning @@ -85,8 +207,11 @@ bioC_downloads <- function(pckg=NULL, format="bioC", verbose=TRUE) { # final report if (verbose) { - message(paste(i-pkg.env$problems)," packages processed from a total of ",i," requests!") - if ((i-pkg.env$problems) != 0) message("Data was retrieved from ",bioC.url," using the *",format,"* format.") + message(paste(ind.pckg-pkg.env$problems)," packages processed from a total of ",ind.pckg," requests!") + if ((ind.pckg-pkg.env$problems) != 0) { + message("Data was retrieved from ",bioC.url," using the *",format,"* format.") + if (verbose) bioC_disclaimer() + } # problems report if (pkg.env$problems != 0) @@ -96,3 +221,7 @@ bioC_downloads <- function(pckg=NULL, format="bioC", verbose=TRUE) { # return results return(pckgs.stats) } + + +##### //////////////////////////////////////////////////////////////////////////// ##### + diff --git a/README.md b/README.md index e87e9b4..7d7f128 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # bioC.logs +[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version-last-release/bioC.logs)](https://cran.r-project.org/package=bioC.logs) +[![Downloads](https://cranlogs.r-pkg.org/badges/bioC.logs)](https://cran.r-project.org/package=bioC.logs) + ## Introduction This package allows you to download the statistics of BioConductor packages' downloads as reported by http://bioconductor.org/packages/stats/. @@ -13,22 +16,37 @@ argument | Description -----------|--------------- `packages names` | is the name(s) of the package(s) you want to download the stats, for multiple package it should be a list of the packages names `format` | accepts two options: `"bioC"` (default) will report the downloads as reported by bioconductor, ie. *"Year Month Nb_of_distinct_IPs Nb_of_downloads"*; or, `"CRAN"` will report as CRAN logs does, ie. *"Date Nb_of_downloads package_Name"* +`from/to` | optional arguments to indicate range of dates to recover the data within -- can NOT be used in combination with `when` +`when` | option argument to specify the range of dates to recover the data within -- can NOT be used in combination with `from/to`; possible options are "`ytd`","`year-to-date'","`year-from-now`","`last-year`" `verbose` | is a boolean flag indicating whether to print information about the processes --------------------------- -* The function will return a list containing a dataframe per package entered with columns as indicated by the `format` argument -Notice that when the `format` is set to "CRAN", the date will be formatted to days-month-year. Because BioConductor reports only totals per month the "day" in this case will be set to **28** for every month. + +## Features + +* The function will return a list containing a dataframe per package entered with columns as indicated by the `format` argument. +Notice that when the `format` is set to "CRAN", the date will be formatted to days-month-year. Because BioConductor reports only totals per month the "day" in this case will be set to the last date of the corresponding month/year. * The function will also attempt to report when a package names has been misspelled or just the server is not reachable. If you are receiving warning messages please check either of these situations. +### Obervations +* The BioConductor website reports total downloads per month. +* The original data also includes an 'all' entry, representing the total downloads per year. +* All the month are reported independently of whether this is the current year and data is still not available. +* You may notice this when requesting the data using the default format from BioConductor, ie. format="bioC". + +* When using `format="CRAN"`: + - the `all` entry is removed + - the last month to be reported is the previous to the current one, ie. months in the future with no data or the current one (with incomplete data) will not be reported + - the entries will be order cronologically from the oldest to the newest records ## Installation For using the "bioC.logs" package, first you will need to install it. -Thes table version can be downloaded from the CRAN repository: +The stable version can be downloaded from the CRAN repository: ``` install.packages("bioC.logs") ``` @@ -40,7 +58,11 @@ install.packages("devtools") # install bioC.logs devtools::install_github("mponce0/bioC.logs") +``` +For using the package, either the stable or developmemnt version, just load it +using the library function: +``` # load bioC.logs library(bioC.logs) ``` @@ -51,9 +73,28 @@ You will need an active internet connection, as bioC.logs will download the reports from the BioConductor website on demand. ``` +# it is possible to download multiple packages, the data will be returned in a list with one entry per package bioC_downloads(c("ABarray","a4Classif")) +# the 'verbose' option allow you to turn off information reported by the function bioC_downloads("edgeR",verbose=FALSE) +# setting format="CRAN", will structure the data "a-la-CRAN" edgeR.logs <- bioC_downloads("edgeR",format="CRAN") +# data is still returned in a list +str(edgeR.logs) +# access data from package +edgeR.logs[[1]] + +# get the data in a particular range of dates using 'when' +edgeR.logs <- bioC_downloads("edgeR", when='last-year', format='bioC') + +# get the data in a particular range of dates using 'to/from' +# not specifying 'from' will assume since the very first record +edgeR.logs <- bioC_downloads("edgeR", to='03-2015', format='bioC') + +# not specifying 'from' will assume until the current day +edgeR.logs <- bioC_downloads("edgeR", from='03-2015', format='bioC') + +edgeR.logs <- bioC_downloads("edgeR", from='03-2015',to='05-2016', format='bioC') ``` diff --git a/inst/doc/bioC.logs.html b/inst/doc/bioC.logs.html index 392da00..5e8dccc 100644 --- a/inst/doc/bioC.logs.html +++ b/inst/doc/bioC.logs.html @@ -13,7 +13,7 @@ - + bioC.logs: Stats logs from BioConductor packages downloads @@ -218,12 +218,13 @@

bioC.logs: Stats logs from BioConductor packages downloads

Marcelo Ponce

-

2020-01-29

+

2020-02-12

bioC.logs

+

CRAN_Status_Badge Downloads

Introduction

This package allows you to download the statistics of BioConductor packages’ downloads as reported by http://bioconductor.org/packages/stats/.

@@ -252,40 +253,81 @@

Usage

accepts two options: "bioC" (default) will report the downloads as reported by bioconductor, ie. “Year Month Nb_of_distinct_IPs Nb_of_downloads”; or, "CRAN" will report as CRAN logs does, ie. “Date Nb_of_downloads package_Name” -verbose -is a boolean flag indicating whether to print information about the processes +from/to +optional arguments to indicate range of dates to recover the data within – can NOT be used in combination with when + + +when +option argument to specify the range of dates to recover the data within – can NOT be used in combination with from/to; possible options are “ytd”,"year-to-date'","year-from-now","last-year"verbose`
+
+
+

Features

    -
  • The function will return a list containing a dataframe per package entered with columns as indicated by the format argument Notice that when the format is set to “CRAN”, the date will be formatted to days-month-year. Because BioConductor reports only totals per month the “day” in this case will be set to 28 for every month.

  • +
  • The function will return a list containing a dataframe per package entered with columns as indicated by the format argument. Notice that when the format is set to “CRAN”, the date will be formatted to days-month-year. Because BioConductor reports only totals per month the “day” in this case will be set to the last date of the corresponding month/year.

  • The function will also attempt to report when a package names has been misspelled or just the server is not reachable. If you are receiving warning messages please check either of these situations.

+
+

Obervations

+
    +
  • The BioConductor website reports total downloads per month.

  • +
  • The original data also includes an ‘all’ entry, representing the total downloads per year.

  • +
  • All the month are reported independently of whether this is the current year and data is still not available.

  • +
  • You may notice this when requesting the data using the default format from BioConductor, ie. format=“bioC”.

  • +
  • When using format="CRAN":

    +
      +
    • the all entry is removed
    • +
    • the last month to be reported is the previous to the current one, ie. months in the future with no data or the current one (with incomplete data) will not be reported
    • +
    • the entries will be order cronologically from the oldest to the newest records
    • +
  • +
+

Installation

For using the “bioC.logs” package, first you will need to install it.

-

Thes table version can be downloaded from the CRAN repository:

+

The stable version can be downloaded from the CRAN repository:

install.packages("bioC.logs")

To obtain the development version you can get it from the github repository, i.e.

# need devtools for installing from the github repo
 install.packages("devtools")
 
 # install bioC.logs
-devtools::install_github("mponce0/bioC.logs")
-
-# load bioC.logs
+devtools::install_github("mponce0/bioC.logs")
+

For using the package, either the stable or developmemnt version, just load it using the library function:

+
# load bioC.logs
 library(bioC.logs)

Examples

You will need an active internet connection, as bioC.logs will download the reports from the BioConductor website on demand.

-
bioC_downloads(c("ABarray","a4Classif"))
+
# it is possible to download multiple packages, the data will be returned in a list with one entry per package
+bioC_downloads(c("ABarray","a4Classif"))
 
+# the 'verbose' option allow you to turn off information reported by the function
 bioC_downloads("edgeR",verbose=FALSE)
 
-edgeR.logs <- bioC_downloads("edgeR",format="CRAN")
+# setting format="CRAN", will structure the data "a-la-CRAN" +edgeR.logs <- bioC_downloads("edgeR",format="CRAN") +# data is still returned in a list +str(edgeR.logs) +# access data from package +edgeR.logs[[1]] + +# get the data in a particular range of dates using 'when' +edgeR.logs <- bioC_downloads("edgeR", when='last-year', format='bioC') + +# get the data in a particular range of dates using 'to/from' +# not specifying 'from' will assume since the very first record +edgeR.logs <- bioC_downloads("edgeR", to='03-2015', format='bioC') + +# not specifying 'from' will assume until the current day +edgeR.logs <- bioC_downloads("edgeR", from='03-2015', format='bioC') + +edgeR.logs <- bioC_downloads("edgeR", from='03-2015',to='05-2016', format='bioC')
diff --git a/man/Xyear.from.now.Rd b/man/Xyear.from.now.Rd new file mode 100644 index 0000000..f53d431 --- /dev/null +++ b/man/Xyear.from.now.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/auxs-utils.R +\name{Xyear.from.now} +\alias{Xyear.from.now} +\title{function that returns the date from one year ago} +\usage{ +Xyear.from.now() +} +\description{ +function that returns the date from one year ago +} +\keyword{internal} diff --git a/man/bioC_downloads.Rd b/man/bioC_downloads.Rd index f7630cd..1ce715a 100644 --- a/man/bioC_downloads.Rd +++ b/man/bioC_downloads.Rd @@ -4,12 +4,25 @@ \alias{bioC_downloads} \title{function to download logs from bioConductor stats} \usage{ -bioC_downloads(pckg = NULL, format = "bioC", verbose = TRUE) +bioC_downloads( + pckg = NULL, + format = "bioC", + from = NULL, + to = NULL, + when = NULL, + verbose = TRUE +) } \arguments{ \item{pckg}{list of packages names} -\item{format}{two options: "bioC" (default) will report the downloads as reported by bioconductor, ie. "Year Month Nb_of_distinct_IPs Nb_of_downloads"; or, "CRAN" will report as CRAN logs does, ie. "Date Nb_of_downloads package_Name"} +\item{format}{two options: "bioC" (default) will report the downloads as reported by bioconductor, ie. "Year Month Nb_of_distinct_IPs Nb_of_downloads"; or, "CRAN" will report as CRAN logs does, ie. "Date counts package_Name" (in cranlogs 'Nb_of_downloads' are referred as 'counts')} + +\item{from}{date in "MM-YYYY" format, specifying the initial date to be considered (optional argument)} + +\item{to}{date in "MM-YYYY" format, specifying the final date to be considered (optional argument)} + +\item{when}{optional argument, to specify pre-defined range dates; ie. 'ytd', 'year-to-date', 'last-year'} \item{verbose}{boolean flag indicating whether to print information about the processes...} } @@ -21,7 +34,10 @@ function to download logs from bioConductor stats } \examples{ bioC_downloads(c("ABarray","a4Classif")) -bioC_downloads("edgeR",verbose=FALSE) -edgeR.logs <- bioC_downloads("edgeR",format="CRAN") - +bioC_downloads("edgeR", verbose=FALSE) +edgeR.logs <- bioC_downloads("edgeR", format="CRAN") +edgeR.logs <- bioC_downloads("edgeR", when='last-year', format='bioC') +edgeR.logs <- bioC_downloads("edgeR", to='03-2015', format='bioC') +edgeR.logs <- bioC_downloads("edgeR", from='03-2015', format='bioC') +edgeR.logs <- bioC_downloads("edgeR", from='03-2015',to='05-2016', format='bioC') } diff --git a/man/checkDate.Rd b/man/checkDate.Rd new file mode 100644 index 0000000..09d7323 --- /dev/null +++ b/man/checkDate.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/auxs-utils.R +\name{checkDate} +\alias{checkDate} +\title{function to check date format for "MM-YYYY"} +\usage{ +checkDate(date.candidate) +} +\arguments{ +\item{date.candidate}{date candidate} +} +\description{ +function to check date format for "MM-YYYY" +} +\keyword{internal} diff --git a/man/checkDates.Rd b/man/checkDates.Rd new file mode 100644 index 0000000..a38e51c --- /dev/null +++ b/man/checkDates.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/auxs-utils.R +\name{checkDates} +\alias{checkDates} +\title{function to check dates, ie that t0