Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/all digits (closes #29) #30

Merged
merged 6 commits into from
Nov 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
2016-11-05 Dirk Eddelbuettel <edd@debian.org>

* src/anytime.cpp (sformats[]): Add fractional seconds to four more
datetimes; (isLengthEightAndAllDigits): Another refinement;
(setDebug): add simple debug routine

2016-11-04 Dirk Eddelbuettel <edd@debian.org>

* src/anytime.cpp (convertToTime): Add logic to cope with the extended
'yyyymmdd hhmm[ss[.fff]]' format which needs treatment before parsing
* tests/allFormats.R: Added tests

2016-10-30 Dirk Eddelbuettel <edd@debian.org>

* R/anytime.R (testOutput): New simple (unexported) string output function
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,7 @@ testOutput_impl <- function(fmt, s) {
.Call('anytime_testOutput_impl', PACKAGE = 'anytime', fmt, s)
}

setDebug <- function(mode) {
.Call('anytime_setDebug', PACKAGE = 'anytime', mode)
}

12 changes: 10 additions & 2 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,19 @@
added to parse input as coordinated universal time; the
functionality is also available in \code{anytime()} and
\code{anydate()} via a new argument \code{asUTC} (PR \ghpr{22})
\item New (date)time format for RFC822-alike dates.
\item New (date)time format for RFC822-alike dates, and expanded
more existing datetime formats to all support fractional seconds
\item Extended functionality to support not only \sQuote{YYYYMMDD}
(without a separator, and not covered by Boost) but also with
\sQuote{HHMM}, \sQuote{HHMMSS} and \sQuote{HHMMSS.ffffff}
\item Documentation and tests have been expanded; typos corrected
\item New (unexported) helper functions \code{setTZ}, \code{testOutput}
\item New (unexported) helper functions \code{setTZ},
\code{testOutput}, \code{setDebug}
\item The \code{testFormat} (and \code{testOutput}) functions cannot
be called under RStudio (PR \ghpr{27} fixing issue \ghit{25})
\item Two functions are now protected from being called from within
RStudio to avoid interactions which we suspect stem from concurrent
use of two different (compiled) Boost versions
}
}

Expand Down
11 changes: 11 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,14 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// setDebug
bool setDebug(const bool mode);
RcppExport SEXP anytime_setDebug(SEXP modeSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const bool >::type mode(modeSEXP);
rcpp_result_gen = Rcpp::wrap(setDebug(mode));
return rcpp_result_gen;
END_RCPP
}
84 changes: 72 additions & 12 deletions src/anytime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

namespace bt = boost::posix_time;

static bool debug = false;

const std::string sformats[] = {
"%Y-%m-%d %H:%M:%S%f",
"%Y/%m/%d %H:%M:%S%f",
Expand All @@ -44,10 +46,10 @@ const std::string sformats[] = {
"%b-%d-%Y %H:%M:%S%f",
"%d.%b.%Y %H:%M:%S%f",

"%d%b%Y %H%M%S",
"%d%b%Y %H:%M:%S",
"%d-%b-%Y %H%M%S",
"%d-%b-%Y %H:%M:%S",
"%d%b%Y %H%M%S%f",
"%d%b%Y %H:%M:%S%f",
"%d-%b-%Y %H%M%S%f",
"%d-%b-%Y %H:%M:%S%f",

"%Y-%B-%d %H:%M:%S%f",
"%Y/%B/%d %H:%M:%S%f",
Expand Down Expand Up @@ -180,6 +182,39 @@ double stringToTime(const std::string s, const bool asUTC=false) {
return ptToDouble(pt);
}

// helper to peel off first two tokens, if any, of a string
// use to do two things:
// i) split yyyymmdd hhmmss[.fff] into date and time parts
// ii) for time part, split possible fractional seconds off
void stringSplitter(/*const*/ std::string & in, const char split,
std::string & tok1, std::string& tok2) {

char *txt = const_cast<char*>(in.c_str());
tok1 = tok2 = "";

char *token = std::strtok(txt, &split);
if (token != NULL) {
tok1 = token;
token = std::strtok(NULL, &split);
if (token != NULL) {
tok2 = token;
}
}
if (debug) Rcpp::Rcout << "In: " << in << " out: " << tok1 << " and " << tok2 << std::endl;
}

// yes, we could use regular expression -- but then we'd either be C++11 or would
// require an additional library with header / linking requirement (incl boost regex)
bool isLengthEightAndAllDigits(const std::string& s) {
bool res = s.size() == 8;
size_t i = 0;
while (res && i < 8) {
res = res && s[i] >= '0' && s[i] <= '9';
i++;
}
return res;
}

template <class T, int RTYPE>
Rcpp::NumericVector convertToTime(const Rcpp::Vector<RTYPE>& sxpvec,
const std::string& tz = "UTC",
Expand All @@ -199,21 +234,40 @@ Rcpp::NumericVector convertToTime(const Rcpp::Vector<RTYPE>& sxpvec,
// but with templating to T this is straightforward enough
T val = sxpvec[i];
std::string s = boost::lexical_cast<std::string>(val);

if (s == "NA") {
pv[i] = NA_REAL;

} else {
if (debug) Rcpp::Rcout << "before tests: " << s << std::endl;
// Boost Date_Time gets the 'YYYYMMDD' format wrong, even
// when given as an explicit argument. So we need to test here.
// While we're at it, may as well test for obviously wrong data.
int l = s.size();
if (l < 8) { // too short
Rcpp::stop("Inadmissable input: %s", s);
} else if (l == 8) { // turn YYYYMMDD into YYYY/MM/DD
s = s.substr(0, 4) + "/" + s.substr(4, 2) + "/" + s.substr(6,2);
std::string one = "", two = "", three = "", inp = s;
stringSplitter(inp, ' ', one, two);
if (isLengthEightAndAllDigits(one)) {
one = one.substr(0, 4) + "-" + one.substr(4, 2) + "-" + one.substr(6,2);

inp = two;

// The 'YYYYMMDD' format can of course be follow by either
// 'HHMMSS' or 'HHMM' or 'HHMMSS.fffffff' so we cover these cases
stringSplitter(inp, '.', two, three);
if (two.size() == 6) {
two = two.substr(0, 2) + ":" + two.substr(2, 2) + ":" + two.substr(4,2);
} else if (two.size() == 4) {
two = two.substr(0, 2) + ":" + two.substr(2, 2);
}
s = one + " " + two;
if (three != "") {
s = s + "." + three;
}
if (debug) Rcpp::Rcout << "s: " << s
<< " one: " << one
<< " two: " << two << " "
<< " three: " << three << std::endl;
}

if (debug) Rcpp::Rcout << "before parse: " << s << std::endl;

// Given the string, convert to a POSIXct using an interim double
// of fractional seconds since the epoch
pv[i] = stringToTime(s, asUTC);
Expand Down Expand Up @@ -323,3 +377,9 @@ std::string testOutput_impl(const std::string fmt,
os << pt;
return os.str();
}

// [[Rcpp::export]]
bool setDebug(const bool mode) {
debug = mode;
return debug;
}
3 changes: 3 additions & 0 deletions tests/allFormats.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,6 @@ anytime(c("2016-September-01", "2016September01", "September/01/2016", "Septembe

cat("\n")
anytime(c(NA, NaN, Inf, as.numeric(as.POSIXct("2016-09-01 10:11:12"))))

cat("\n")
anytime(c("20160911", "20160911 1011", "20160911 101112", "20160911 101112.345678"))