diff --git a/DESCRIPTION b/DESCRIPTION index d942675..c583924 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: RcppMsgPack Type: Package Title: 'MsgPack' C++ Header Files and Interface Functions for R -Version: 0.2.2 -Date: 2018-05-06 +Version: 0.2.3 +Date: 2018-06-21 Author: Travers Ching and Dirk Eddelbuettel; the authors and contributors of MsgPack Maintainer: Dirk Eddelbuettel Description: 'MsgPack' header files are provided for use by R packages, along diff --git a/NAMESPACE b/NAMESPACE index f3b5ae7..3dc671a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,4 +3,5 @@ importFrom("Rcpp", "evalCpp") export("msgpack_format", "msgpack_map", "msgpack_pack", "msgpack_simplify", "msgpack_unpack", "msgpackFormat", "msgpackMap", "msgpackPack", "msgpackSimplify", "msgpackUnpack", "msgpack_timestamp_encode", "msgpackTimestampEncode", "msgpack_timestamp_decode", "msgpackTimestampDecode", + "msgpack_write", "msgpackWrite", "msgpack_read", "msgpackRead", "arrayEx", "enumEx") diff --git a/R/functions.r b/R/functions.r index 74c503b..3f28e22 100644 --- a/R/functions.r +++ b/R/functions.r @@ -201,4 +201,99 @@ msgpack_timestamp_decode <- function(x, posix=T, tz="UTC") { } #' @rdname msgpack_timestamp_decode -msgpackTimestampDecode <- msgpack_timestamp_decode \ No newline at end of file +msgpackTimestampDecode <- msgpack_timestamp_decode + +#' 'MsgPack' write +#' @description A helper function to serialize an object and write it to a file, or a connection. +#' @param ... Serializable R objects. +#' @param msg Message to write to file. If not NULL and a raw vector, write it instead of the R objects. Default: NULL. +#' @param file A connection, or a string describing the file or pipe to write to, depending on the mode. +#' @param mode One of "auto", "file", "gzip" or "pipe". If "auto", detects based on the file string (any space == pipe, ".gz" == gzip, file otherwise). Ignored if file is a connection. +#' @examples +#' tmp <- tempfile(fileext=".gz") +#' msgpack_write(1:10, file=tmp) +#' x <- msgpack_read(tmp, simplify=TRUE) +msgpack_write <- function(..., msg=NULL, file, mode="auto") { + if(is.null(msg)) { + msg <- msgpack_pack(...) + } + stopifnot("raw" %in% class(msg)) + if("connection" %in% class(file)) { + con <- file + } else if(mode=="auto") { + if(grepl("\\s", file)) { + con <- pipe(file, open="wb") + } else if(grepl("\\.gz$", file)) { + con <- gzfile(file, open="wb") + } else { + con <- file(file, open="wb") + } + } else if(mode=="gzip") { + con <- gzfile(file, open="wb") + } else if(mode=="pipe") { + con <- pipe(file, open="wb") + } else { + con <- file(file, open="wb") + } + writeBin(msg, con=con, useBytes=T) + close(con) + invisible(NULL) +} + +#' @rdname msgpack_write +msgpackWrite <- msgpack_write + +#' 'MsgPack' read +#' @description A helper function to de-serialize an object read from a file or a connection. +#' @param file A connection, or a string describing the file or pipe to write to, depending on the mode. +#' @param simplify Passed to msgpack_unpack. Default: FALSE. +#' @param mode One of "auto", "file", "gzip" or "pipe". If "auto", detects based on the file string (any space == pipe, ".gz" == gzip, file otherwise). Ignored if file is a connection. +#' @param nbytes If reading from a pipe or gzip, how many bytes to read at a time. Default: 16777216 +#' @examples +#' tmp <- tempfile(fileext=".gz") +#' msgpack_write(1:10, file=tmp) +#' x <- msgpack_read(tmp, simplify=TRUE) +msgpack_read <- function(file, simplify=F, mode="auto", nbytes=16777216) { + is_file <- F + if("connection" %in% class(file)) { + con <- file + } else if(mode=="auto") { + if(grepl("\\s", file)) { + con <- pipe(file, open="rb") + } else if(grepl("\\.gz$", file)) { + con <- gzfile(file, open="rb") + } else { + is_file <- T + con <- file(file, open="rb") + file_size <- file.info(file)$size + } + } else if(mode=="gzip") { + con <- gzfile(file, open="rb") + } else if(mode=="pipe") { + con <- pipe(file, open="rb") + } else { + is_file <- T + con <- file(file, open="rb") + file_size <- file.info(file)$size + } + + if(is_file) { + bin <- readBin(con = con, what="raw", n=file_size) + } else { + bin_list <- list() + i <- 1 + bin_list[[i]] <- readBin(con = con, what="raw", n=nbytes) + while(length(bin_list[[i]]) != 0) { + i <- i + 1 + bin_list[[i]] <- readBin(con = con, what="raw", n=nbytes) + } + bin <- do.call(c, bin_list) + } + close(con) + msgpack_unpack(bin, simplify=simplify) +} + +#' @rdname msgpack_read +msgpackRead <- msgpack_read + + diff --git a/man/msgpack_read.Rd b/man/msgpack_read.Rd new file mode 100644 index 0000000..26b5713 --- /dev/null +++ b/man/msgpack_read.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions.r +\name{msgpack_read} +\alias{msgpack_read} +\alias{msgpackRead} +\title{'MsgPack' read} +\usage{ +msgpack_read(file, simplify = F, mode = "auto", nbytes = 16777216) + +msgpackRead(file, simplify = F, mode = "auto", nbytes = 16777216) +} +\arguments{ +\item{file}{A connection, or a string describing the file or pipe to write to, depending on the mode.} + +\item{simplify}{Passed to msgpack_unpack. Default: FALSE.} + +\item{mode}{One of "auto", "file", "gzip" or "pipe". If "auto", detects based on the file string (any space == pipe, ".gz" == gzip, file otherwise). Ignored if file is a connection.} + +\item{nbytes}{If reading from a pipe or gzip, how many bytes to read at a time. Default: 16777216} +} +\description{ +A helper function to de-serialize an object read from a file or a connection. +} +\examples{ +tmp <- tempfile(fileext=".gz") +msgpack_write(1:10, file=tmp) +x <- msgpack_read(tmp, simplify=TRUE) +} diff --git a/man/msgpack_write.Rd b/man/msgpack_write.Rd new file mode 100644 index 0000000..d3a37f5 --- /dev/null +++ b/man/msgpack_write.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions.r +\name{msgpack_write} +\alias{msgpack_write} +\alias{msgpackWrite} +\title{'MsgPack' write} +\usage{ +msgpack_write(..., msg = NULL, file, mode = "auto") + +msgpackWrite(..., msg = NULL, file, mode = "auto") +} +\arguments{ +\item{...}{Serializable R objects.} + +\item{msg}{Message to write to file. If not NULL and a raw vector, write it instead of the R objects. Default: NULL.} + +\item{file}{A connection, or a string describing the file or pipe to write to, depending on the mode.} + +\item{mode}{One of "auto", "file", "gzip" or "pipe". If "auto", detects based on the file string (any space == pipe, ".gz" == gzip, file otherwise). Ignored if file is a connection.} +} +\description{ +A helper function to serialize an object and write it to a file, or a connection. +} +\examples{ +tmp <- tempfile(fileext=".gz") +msgpack_write(1:10, file=tmp) +x <- msgpack_read(tmp, simplify=TRUE) +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 5c7684f..f703a7a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -49,13 +49,13 @@ BEGIN_RCPP END_RCPP } // c_timestamp_encode -RawVector c_timestamp_encode(double seconds, uint32_t nanoseconds); +RawVector c_timestamp_encode(double seconds, u_int32_t nanoseconds); RcppExport SEXP _RcppMsgPack_c_timestamp_encode(SEXP secondsSEXP, SEXP nanosecondsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< double >::type seconds(secondsSEXP); - Rcpp::traits::input_parameter< uint32_t >::type nanoseconds(nanosecondsSEXP); + Rcpp::traits::input_parameter< u_int32_t >::type nanoseconds(nanosecondsSEXP); rcpp_result_gen = Rcpp::wrap(c_timestamp_encode(seconds, nanoseconds)); return rcpp_result_gen; END_RCPP diff --git a/src/rcppmsgpack_c_functions.cpp b/src/rcppmsgpack_c_functions.cpp index 4a816ec..de54f28 100644 --- a/src/rcppmsgpack_c_functions.cpp +++ b/src/rcppmsgpack_c_functions.cpp @@ -1,430 +1,456 @@ -#define MSGPACK_USE_BOOST - -#include "anyvector.h" -#include "msgpack.hpp" -#include -#include -#include -#include -#include -#include - -using namespace Rcpp; - -bool temp_bool; -double temp_double; -int temp_int; -std::vector temp_int_vec; -std::vector temp_double_vec; -std::string temp_string; -std::vector temp_unsigned_char; - -const double R_INT_MAX = 2147483647; -const double R_INT_MIN = -2147483648; - -SEXP c_unpack(std::vector char_message); -AnyVector unpackVector(const std::vector &obj_vector, bool const &simplify); -SEXP unpackVisitor(const msgpack::object &obj, bool const &simplify); -RawVector c_pack(SEXP root_obj); -void addToPack(const SEXP &obj, msgpack::packer& pkr); -void packElement(const AnyVector &vec, const LogicalVector & navec, const int &j, msgpack::packer& pkr); - -void packElement(const AnyVector & vec, const LogicalVector & navec, const int & j, msgpack::packer& pkr) { - switch(getType(vec)) { - case LGLSXP: - if(navec[j]) { - pkr.pack_nil(); - } else { - temp_bool = boost::get(vec)[j]; - pkr.pack(temp_bool); - } - break; - case INTSXP: - temp_int = boost::get(vec)[j]; - pkr.pack(temp_int); - break; - case REALSXP: - temp_double = boost::get(vec)[j]; - pkr.pack(temp_double); - break; - case STRSXP: - if(navec[j]) { - pkr.pack_nil(); - } else { - temp_string = boost::get(vec)[j]; - pkr.pack(temp_string); - } - break; - case VECSXP: - addToPack(boost::get(vec)[j], pkr); - } -} - -void addToPack(const SEXP &obj, msgpack::packer& pkr) { - if(Rf_isVectorList(obj)) { - List temp_list = List(obj); - // Map - if(temp_list.hasAttribute("class") && (as< std::vector >(temp_list.attr("class")))[0] == "map") { - // std::cout << "map:" << std::endl; - AnyVector key = sexpToAnyVector(temp_list[0]); - AnyVector value = sexpToAnyVector(temp_list[1]); - LogicalVector nakey = is_na(key); - LogicalVector navalue = is_na(value); - int len = size(key); - pkr.pack_map(len); - for(int j=0; j(keys[j])); - } - addToPack(temp_list[j], pkr); - } - // Array - } else { - List temp_list = List(obj); - pkr.pack_array(temp_list.size()); - for(int j=0; j rvvu = as< std::vector >(obj); - std::vector rvv = std::vector(rvvu.begin(), rvvu.end()); - int8_t ext_type = static_cast (as >(rv.attr("EXT"))[0]); - size_t ext_l = rv.size(); - pkr.pack_ext(ext_l, ext_type); - pkr.pack_ext_body(rvv.data(), ext_l); - // BIN - } else { - pkr.pack(as< std::vector >(obj)); - } - // NIL - } else if(TYPEOF(obj) == NILSXP) { - pkr.pack_nil(); - } else { - AnyVector vec = sexpToAnyVector(obj); - int vec_size = size(vec); - LogicalVector navec = is_na(vec); - // Map again - if(hasAttribute(vec, "names")) { - CharacterVector key = attr(vec, "names"); - LogicalVector nakey = is_na(key); - pkr.pack_map(vec_size); - for(int j=0; j < vec_size; j++) { - if(nakey[j]) { - pkr.pack_nil(); - } else { - pkr.pack(as(key[j])); - } - packElement(vec, navec, j, pkr); - } - // Array and atomic types - } else { - if(vec_size != 1) { - pkr.pack_array(vec_size); - } - for(int j=0; j < vec_size; j++) { - packElement(vec, navec, j, pkr); - } - } - } -} - -// [[Rcpp::export]] -RawVector c_pack(SEXP root_obj) { - std::stringstream buffer; - msgpack::packer pk(&buffer); - if(Rf_isVectorList(root_obj)) { - List root_list = List(root_obj); - if(root_list.hasAttribute("class") && (as< std::vector >(root_list.attr("class")))[0] == "msgpack_set") { - for(int i=0; i &obj_vector, bool const &simplify) { - bool list_type = false; - bool numeric_type = false; - bool integer_type = false; - bool logical_type = false; - bool character_type = false; - bool null_type = false; - int sum_types = 0; - if(simplify) { - for(unsigned int j=0; j= R_INT_MIN) { - integer_type = true; - } else { - numeric_type = true; - } - break; - case msgpack::type::FLOAT32: - case msgpack::type::FLOAT64: - numeric_type = true; - break; - case msgpack::type::STR: - character_type = true; - break; - default: //bin, ext, array, map - list_type = true; - break; - } - sum_types = (numeric_type || integer_type) + logical_type + character_type + list_type; - if(list_type) break; - if((numeric_type || integer_type) && null_type) break; - if(sum_types > 1) break; - } - if(sum_types == 1) { - if(numeric_type && !null_type) { - NumericVector v = NumericVector(obj_vector.size()); - for(unsigned int j=0; j temp_vector; - obj.convert(temp_vector); - return anyVectorToSexp(unpackVector(temp_vector, simplify)); - } else if(obj.type == msgpack::type::MAP) { - int msize = obj.via.map.size; - std::vector key_vector(msize); - std::vector value_vector(msize); - msgpack::object_kv* p = obj.via.map.ptr; - msgpack::object_kv* const pend = obj.via.map.ptr + obj.via.map.size; - int i = 0; - for (; p < pend; ++p) { - key_vector[i] = p->key.as(); - value_vector[i] = p->val.as(); - i++; - } - AnyVector keys = unpackVector(key_vector, simplify); - AnyVector values = unpackVector(value_vector, simplify); - if(simplify && getType(keys) == STRSXP) { - setAttr(values, "names", boost::get(keys)); - // map[1].attr("names") = CharacterVector(map[0]); - return anyVectorToSexp(values); - } else { - List map = List(2); - map[0] = anyVectorToSexp(keys); - map[1] = anyVectorToSexp(values); - map.attr("class") = CharacterVector::create("map", "data.frame"); - map.attr("row.names") = seq_len(msize); - map.names() = CharacterVector::create("key", "value"); - return map; - } - } else if(obj.type == msgpack::type::EXT) { - int8_t vtype = obj.via.ext.type(); - uint32_t vsize = obj.via.ext.size; - const char* vdata = obj.via.ext.data(); - RawVector rv = RawVector(vdata, vdata + vsize); - rv.attr("EXT") = IntegerVector::create(vtype); - return rv; - } else { - switch (obj.type) { - case msgpack::type::NIL: - // std::cout << "nil" << std::endl; - return R_NilValue; - case msgpack::type::BOOLEAN: - // std::cout << "boolean" << std::endl; - obj.convert(temp_bool); - return wrap(temp_bool); - case msgpack::type::POSITIVE_INTEGER: - case msgpack::type::NEGATIVE_INTEGER: - // std::cout << "integer" << std::endl; - obj.convert(temp_double); - if(temp_double <= R_INT_MAX && temp_double >= R_INT_MIN) { - obj.convert(temp_int); - return wrap(temp_int); - } else { - return wrap(temp_double); - } - case msgpack::type::FLOAT32: - case msgpack::type::FLOAT64: - // std::cout << "float" << std::endl; - obj.convert(temp_double); - return wrap(temp_double); - case msgpack::type::STR: - // std::cout << "string" << std::endl; - obj.convert(temp_string); - return wrap(temp_string); - case msgpack::type::BIN: - obj.convert(temp_unsigned_char); - return RawVector(temp_unsigned_char.begin(), temp_unsigned_char.end()); - default: - break; - } - } - return LogicalVector::create(); //should never reach -} - - -// [[Rcpp::export]] -SEXP c_unpack(std::vector char_message, bool simplify) { - // std::vector char_message = as< std::vector >(raw_message); // cast from RawVector - std::string message(char_message.begin(), char_message.end()); - // msgpack::object_handle oh = msgpack::unpack(message.data(), message.size()); - //msgpack::object_handle oh; - std::size_t off = 0; - std::size_t len = message.size(); - // const char* dat = ss.str().data(); - List L = List(0); - while(off != len) { - msgpack::object_handle oh; - msgpack::unpack(oh, message.data(), len, off); - msgpack::object obj = oh.get(); - L.push_back(unpackVisitor(obj, simplify)); - } - if(L.size() != 1) { - L.attr("class") = "msgpack_set"; - return L; - } - return L[0]; - // msgpack::type::variant v = oh.get().as(); - // List L = List(1); - // SEXP ret = boost::apply_visitor(unpack_visitor(), v); - // return ret; -} - -// 0xd6 | -1 | seconds in 32-bit unsigned int -// 0xd7 | -1 | nanoseconds in 30-bit unsigned int | seconds in 34-bit unsigned int -// 0xc7 | 12 | -1 | nanoseconds in 32-bit unsigned int | seconds in 64-bit signed int -//Bit operations: https://stackoverflow.com/questions/47981/how-do-you-set-clear-and-toggle-a-single-bit -// [[Rcpp::export]] -RawVector c_timestamp_encode(double seconds, uint32_t nanoseconds) { - int64_t secint = round(seconds); - RawVector rv; - if((nanoseconds == 0) & (seconds <= 4294967295) & (seconds >= 0)) { //2^32-1 - std::vector msg(4); - for(int i=0; i<32; i++) { - if((secint >> i) & 1) msg[(31-i)/8] |= 1 << (i % 8); - } - rv = RawVector(msg.begin(), msg.end()); - } else if((seconds <= 17179869183) & (seconds >= 0)) { //2^34-1 - std::vector msg(8); - for(int i=0; i<34; i++) { - if((secint >> i) & 1) msg[(63-i)/8] |= 1 << (i % 8); - } - for(int i=0; i<30; i++) { - if((nanoseconds >> i) & 1) msg[(29-i)/8] |= 1 << ((i+2) % 8); - } - rv = RawVector(msg.begin(), msg.end()); - } else { - std::vector msg(12); - for(int i=0; i<64; i++) { - if((secint >> i) & 1) msg[(95-i)/8] |= 1 << (i % 8); - } - for(int i=0; i<32; i++) { - if((nanoseconds >> i) & 1) msg[(31-i)/8] |= 1 << (i % 8); - } - rv = RawVector(msg.begin(), msg.end()); - } - rv.attr("EXT") = IntegerVector::create(-1); - return rv; -} - -// [[Rcpp::export]] -List c_timestamp_decode(std::vector v) { - int64_t seconds; - int nanoseconds; - if(v.size() == 4) { - seconds = (v[0] << 24) | (v[1] << 16) | (v[2] << 8) | v[3]; - nanoseconds = 0; - } else if(v.size() == 8) { - nanoseconds = (v[0] << 22) | (v[1] << 14) | (v[2] << 6) | (v[3] & 252); - seconds = ((static_cast(v[3]) & 3) << 32) | - (static_cast(v[4]) << 24) | - (static_cast(v[5]) << 16) | - (static_cast(v[6]) << 8) | - static_cast(v[7]); - } else { - nanoseconds = (v[0] << 24) | (v[1] << 16) | (v[2] << 8) | v[3]; - seconds = (static_cast(v[4]) << 56) | - (static_cast(v[5]) << 48) | - (static_cast(v[6]) << 40) | - (static_cast(v[7]) << 32) | - (static_cast(v[8]) << 24) | - (static_cast(v[9]) << 16) | - (static_cast(v[10]) << 8) | - static_cast(v[11]); - } - List l = List(2); - l[0] = static_cast(seconds); - l[1] = nanoseconds; - l.attr("names") = CharacterVector::create("seconds", "nanoseconds"); - return l; -} +#define MSGPACK_USE_BOOST + +#include "anyvector.h" +#include "msgpack.hpp" +#include +#include +#include +#include +#include +#include + +using namespace Rcpp; + +const double R_INT_MAX = 2147483647; +const double R_INT_MIN = -2147483648; + +SEXP c_unpack(std::vector char_message); +AnyVector unpackVector(const std::vector &obj_vector, bool const simplify); +SEXP unpackVisitor(const msgpack::object &obj, bool const simplify); +RawVector c_pack(SEXP root_obj); +template void addToPack(const SEXP &obj, msgpack::packer& pkr); +template void packElement(const AnyVector &vec, const LogicalVector & navec, const int j, msgpack::packer& pkr); + +template void packElement(const AnyVector & vec, const LogicalVector & navec, const int j, msgpack::packer& pkr) { + bool temp_bool; + double temp_double; + int temp_int; + std::string temp_string; + + switch(getType(vec)) { + case LGLSXP: + if(navec[j]) { + pkr.pack_nil(); + } else { + temp_bool = boost::get(vec)[j]; + pkr.pack(temp_bool); + } + break; + case INTSXP: + temp_int = boost::get(vec)[j]; + pkr.pack(temp_int); + // pkr.pack( as< std::vector >(boost::get(vec))[j] ); + break; + case REALSXP: + temp_double = boost::get(vec)[j]; + pkr.pack(temp_double); + break; + case STRSXP: + if(navec[j]) { + pkr.pack_nil(); + } else { + temp_string = boost::get(vec)[j]; + pkr.pack(temp_string); + } + break; + case VECSXP: + addToPack(boost::get(vec)[j], pkr); + } +} + +template void addToPack(const SEXP &obj, msgpack::packer& pkr) { + if(Rf_isVectorList(obj)) { + List temp_list = List(obj); + // Map + if(temp_list.hasAttribute("class") && (as< std::vector >(temp_list.attr("class")))[0] == "map") { + // std::cout << "map:" << std::endl; + AnyVector key = sexpToAnyVector(temp_list[0]); + AnyVector value = sexpToAnyVector(temp_list[1]); + LogicalVector nakey = is_na(key); + LogicalVector navalue = is_na(value); + int len = size(key); + pkr.pack_map(len); + for(int j=0; j(keys[j])); + } + addToPack(temp_list[j], pkr); + } + // Array + } else { + List temp_list = List(obj); + pkr.pack_array(temp_list.size()); + for(int j=0; j rvvu = as< std::vector >(obj); + std::vector rvv = std::vector(rvvu.begin(), rvvu.end()); + int8_t ext_type = static_cast (as >(rv.attr("EXT"))[0]); + size_t ext_l = rv.size(); + pkr.pack_ext(ext_l, ext_type); + pkr.pack_ext_body(rvv.data(), ext_l); + // BIN + } else { + pkr.pack(as< std::vector >(obj)); + } + // NIL + } else if(TYPEOF(obj) == NILSXP) { + pkr.pack_nil(); + } else { + AnyVector vec = sexpToAnyVector(obj); + int vec_size = size(vec); + LogicalVector navec = is_na(vec); + // Map again + if(hasAttribute(vec, "names")) { + CharacterVector key = attr(vec, "names"); + LogicalVector nakey = is_na(key); + pkr.pack_map(vec_size); + for(int j=0; j < vec_size; j++) { + if(nakey[j]) { + pkr.pack_nil(); + } else { + pkr.pack(as(key[j])); + } + packElement(vec, navec, j, pkr); + } + // Array and atomic types + } else { + if(vec_size != 1) { + pkr.pack_array(vec_size); + } + for(int j=0; j < vec_size; j++) { + packElement(vec, navec, j, pkr); + } + } + } +} + +// [[Rcpp::export]] +RawVector c_pack(SEXP root_obj) { + //std::stringstream buffer; + // msgpack::packer pk(&buffer); + msgpack::sbuffer sbuf; + msgpack::packer pk(&sbuf); + if(Rf_isVectorList(root_obj)) { + List root_list = List(root_obj); + if(root_list.hasAttribute("class") && (as< std::vector >(root_list.attr("class")))[0] == "msgpack_set") { + for(int i=0; i &obj_vector, bool const simplify) { + + // reusable objects for converting from msgpack::object + bool temp_bool; + double temp_double; + int temp_int; + std::string temp_string; + // std::vector temp_unsigned_char; + + // variables for determining vector type during unpacking + bool list_type = false; + bool numeric_type = false; + bool integer_type = false; + bool logical_type = false; + bool character_type = false; + bool null_type = false; + int sum_types = 0; + + if(simplify) { + for(int j=0; j= R_INT_MIN) { + integer_type = true; + } else { + numeric_type = true; + } + break; + case msgpack::type::FLOAT32: + case msgpack::type::FLOAT64: + numeric_type = true; + break; + case msgpack::type::STR: + character_type = true; + break; + default: //bin, ext, array, map + list_type = true; + break; + } + sum_types = (numeric_type || integer_type) + logical_type + character_type + list_type; + if(list_type) break; + if((numeric_type || integer_type) && null_type) break; + if(sum_types > 1) break; + } + if(sum_types == 1) { + if(numeric_type && !null_type) { + NumericVector v = NumericVector(obj_vector.size()); + for(int j=0; j temp_vector; + obj.convert(temp_vector); + return anyVectorToSexp(unpackVector(temp_vector, simplify)); + } else if(obj.type == msgpack::type::MAP) { + int msize = obj.via.map.size; + std::vector key_vector(msize); + std::vector value_vector(msize); + msgpack::object_kv* p = obj.via.map.ptr; + msgpack::object_kv* const pend = obj.via.map.ptr + obj.via.map.size; + int i = 0; + for (; p < pend; ++p) { + key_vector[i] = p->key.as(); + value_vector[i] = p->val.as(); + i++; + } + AnyVector keys = unpackVector(key_vector, simplify); + AnyVector values = unpackVector(value_vector, simplify); + if(simplify && getType(keys) == STRSXP) { + setAttr(values, "names", boost::get(keys)); + // map[1].attr("names") = CharacterVector(map[0]); + return anyVectorToSexp(values); + } else { + List map = List(2); + map[0] = anyVectorToSexp(keys); + map[1] = anyVectorToSexp(values); + map.attr("class") = CharacterVector::create("map", "data.frame"); + map.attr("row.names") = seq_len(msize); + map.names() = CharacterVector::create("key", "value"); + return map; + } + } else if(obj.type == msgpack::type::EXT) { + int8_t vtype = obj.via.ext.type(); + uint32_t vsize = obj.via.ext.size; + const char* vdata = obj.via.ext.data(); + RawVector rv = RawVector(vdata, vdata + vsize); + rv.attr("EXT") = IntegerVector::create(vtype); + return rv; + } else { + + // reusable objects for converting from msgpack::object + bool temp_bool; + double temp_double; + int temp_int; + std::string temp_string; + std::vector temp_unsigned_char; + + switch (obj.type) { + case msgpack::type::NIL: + // std::cout << "nil" << std::endl; + return R_NilValue; + case msgpack::type::BOOLEAN: + // std::cout << "boolean" << std::endl; + obj.convert(temp_bool); + return wrap(temp_bool); + case msgpack::type::POSITIVE_INTEGER: + case msgpack::type::NEGATIVE_INTEGER: + // std::cout << "integer" << std::endl; + obj.convert(temp_double); + if(temp_double <= R_INT_MAX && temp_double >= R_INT_MIN) { + obj.convert(temp_int); + return wrap(temp_int); + } else { + return wrap(temp_double); + } + case msgpack::type::FLOAT32: + case msgpack::type::FLOAT64: + // std::cout << "float" << std::endl; + obj.convert(temp_double); + return wrap(temp_double); + case msgpack::type::STR: + // std::cout << "string" << std::endl; + obj.convert(temp_string); + return wrap(temp_string); + case msgpack::type::BIN: + obj.convert(temp_unsigned_char); + return RawVector(temp_unsigned_char.begin(), temp_unsigned_char.end()); + default: + break; + } + } + return LogicalVector::create(); //should never reach +} + + +// [[Rcpp::export]] +SEXP c_unpack(std::vector char_message, bool simplify) { + // std::vector char_message = as< std::vector >(raw_message); // cast from RawVector + // std::string message(char_message.begin(), char_message.end()); + // msgpack::object_handle oh = msgpack::unpack(message.data(), message.size()); + //msgpack::object_handle oh; + + std::size_t off = 0; + std::size_t len = char_message.size(); + char* mdata = reinterpret_cast(char_message.data()); + // const char* dat = ss.str().data(); + std::vector L(0); + while(off != len) { + msgpack::object_handle oh; + msgpack::unpack(oh, mdata, len, off); + msgpack::object obj = oh.get(); + L.push_back(unpackVisitor(obj, simplify)); + } + if(L.size() != 1) { + List LL = List(L.size()); + for(uint i=0; i(); + // List L = List(1); + // SEXP ret = boost::apply_visitor(unpack_visitor(), v); + // return ret; +} + +// 0xd6 | -1 | seconds in 32-bit unsigned int +// 0xd7 | -1 | nanoseconds in 30-bit unsigned int | seconds in 34-bit unsigned int +// 0xc7 | 12 | -1 | nanoseconds in 32-bit unsigned int | seconds in 64-bit signed int +//Bit operations: https://stackoverflow.com/questions/47981/how-do-you-set-clear-and-toggle-a-single-bit +// [[Rcpp::export]] +RawVector c_timestamp_encode(double seconds, u_int32_t nanoseconds) { + int64_t secint = round(seconds); + RawVector rv; + if(nanoseconds == 0 & seconds <= 4294967295 & seconds >= 0) { //2^32-1 + std::vector msg(4); + for(int i=0; i<32; i++) { + if((secint >> i) & 1) msg[(31-i)/8] |= 1 << (i % 8); + } + rv = RawVector(msg.begin(), msg.end()); + } else if(seconds <= 17179869183 & seconds >= 0) { //2^34-1 + std::vector msg(8); + for(int i=0; i<34; i++) { + if((secint >> i) & 1) msg[(63-i)/8] |= 1 << (i % 8); + } + for(int i=0; i<30; i++) { + if((nanoseconds >> i) & 1) msg[(29-i)/8] |= 1 << ((i+2) % 8); + } + rv = RawVector(msg.begin(), msg.end()); + } else { + std::vector msg(12); + for(int i=0; i<64; i++) { + if((secint >> i) & 1) msg[(95-i)/8] |= 1 << (i % 8); + } + for(int i=0; i<32; i++) { + if((nanoseconds >> i) & 1) msg[(31-i)/8] |= 1 << (i % 8); + } + rv = RawVector(msg.begin(), msg.end()); + } + rv.attr("EXT") = IntegerVector::create(-1); + return rv; +} + +// [[Rcpp::export]] +List c_timestamp_decode(std::vector v) { + int64_t seconds; + int nanoseconds; + if(v.size() == 4) { + seconds = (v[0] << 24) | (v[1] << 16) | (v[2] << 8) | v[3]; + nanoseconds = 0; + } else if(v.size() == 8) { + nanoseconds = (v[0] << 22) | (v[1] << 14) | (v[2] << 6) | (v[3] & 252); + seconds = ((static_cast(v[3]) & 3) << 32) | + (static_cast(v[4]) << 24) | + (static_cast(v[5]) << 16) | + (static_cast(v[6]) << 8) | + static_cast(v[7]); + } else { + nanoseconds = (v[0] << 24) | (v[1] << 16) | (v[2] << 8) | v[3]; + seconds = (static_cast(v[4]) << 56) | + (static_cast(v[5]) << 48) | + (static_cast(v[6]) << 40) | + (static_cast(v[7]) << 32) | + (static_cast(v[8]) << 24) | + (static_cast(v[9]) << 16) | + (static_cast(v[10]) << 8) | + static_cast(v[11]); + } + List l = List(2); + l[0] = static_cast(seconds); + l[1] = nanoseconds; + l.attr("names") = CharacterVector::create("seconds", "nanoseconds"); + return l; +} diff --git a/tests/tests.r b/tests/tests.r index 7ddd13f..267e05a 100644 --- a/tests/tests.r +++ b/tests/tests.r @@ -1,161 +1,182 @@ -# Tests for testing out the functionality of the package, to make sure it isn't broken - -# some references - -# //https://github.com/msgpack/msgpack-c/blob/401460b7d99e51adc06194ceb458934b359d2139/include/msgpack/v1/adaptor/boost/msgpack_variant.hpp -# //https://stackoverflow.com/questions/44725299/messagepack-c-how-to-iterate-through-an-unknown-data-structure -# //https://stackoverflow.com/questions/37665361/how-to-determine-the-class-of-object-stored-in-sexp-in-rcpp -# //https://stackoverflow.com/questions/12954852/booststatic-visitor-with-multiple-arguments -# //https://stackoverflow.com/questions/25172419/how-can-i-get-the-sexptype-of-an-sexp-value -# //https://github.com/wch/r-source/blob/48536f1756a88830076023db9566fbb2c1dbb29b/src/include/Rinternals.h#L1178-L1214 -# //https://stackoverflow.com/questions/16131462/how-to-use-boost-library-in-c-with-rcpp - - -library(RcppMsgPack) - -# stopifnot <- function(...) cat(..., "\n") - -# Test atomic types - -# integer -xpk <- msgpack_pack(1L) -stopifnot(identical(msgpack_unpack(xpk), 1L)) - -# double -xpk <- msgpack_pack(1.54) -stopifnot(identical(msgpack_unpack(xpk), 1.54)) - -# string -xpk <- msgpack_pack("sdfsdf") -stopifnot(identical(msgpack_unpack(xpk), "sdfsdf")) - -# raw -xpk <- msgpack_pack(as.raw(c(0x28, 0x4F))) -stopifnot(identical(msgpack_unpack(xpk), as.raw(c(0x28, 0x4F)))) - -# boolean -xpk <- msgpack_pack(T) -stopifnot(msgpack_unpack(xpk)) - -# nil -xpk <- msgpack_pack(NULL) -stopifnot(is.null(msgpack_unpack(xpk))) - -# ext -x <- as.raw(c(0x28, 0x4F)) -attr(x, "EXT") <- 1L -xpk <- msgpack_pack(x) -stopifnot(identical(msgpack_unpack(xpk), x)) - - -if (Sys.info()[['sysname']] != "Windows") { - ## unicode or something characters - note this doesn't always work if you copy/paste into a terminal because of how terminals encode text, but if you source this file it works - x <- list('图书,通常在狭义上的理解是带有文字和图像的纸张的集合。书通常由墨水、纸张、羊皮纸或者其他材料固定在书脊上组成。组成书的一张纸称为一张,一张的一面称为一页。但随着科学技术的发展,狭义图书的概念也在扩展,制作书的材料也在改变,如电子格式的电子书。从广义理解的图书,则是一切传播讯息的媒介。书也指文学作品或者其中的一部分。在图书馆信息学中,书被称为专著,以区别于杂志、学术期刊、报纸等连载期刊。所有的书面作品(包括图书)的主体是文学。在小说和一些类型(如传记)中,书可能还要分成卷。对书特别喜爱的人被称为爱书者或藏书家,更随意的称呼是书虫或者书呆子。买书的地方叫书店,图书馆则是可以借阅书籍的地方。2010年,谷歌公司估计,从印刷术发明至今,大概出版了一亿三千万本不同书名的书籍。[1]') - xpk <- msgpack_pack(x) - xu <- msgpack_unpack(xpk) - stopifnot(identical(x, xu)) -} - -# Complex nested object with lists and map -x <- as.raw(c(0x28, 0x4F)) -attr(x, "EXT") <- 1L -xmap <- msgpack_map(key=letters[1:10], value=1:10) -xmap$value[[3]] <- list(NULL) -xmap$value[[4]] <- as.list(1:10) -xmap$value[[4]][[3]] <- xmap -xmap$value[[5]] <- x -y <- 1:10 -names(y) <- letters[1:10] -x <- list(1:10, y, "a", list(3,raw(4)), xmap) -x <- msgpack_format(x) -xpk <- msgpack_pack(x) -xu <- msgpack_unpack(xpk) - -xs <- msgpack_simplify(x) -xus <- msgpack_simplify(xu) - -stopifnot(identical(xs, xus)) - -# named list can be used directly as input - should come out to a map, simplify to get a named vector -x <- list(a=1L, b=2L) -xpk <- msgpack_pack(x) -stopifnot(identical(msgpack_simplify(msgpack_unpack(xpk)),c(a=1L, b=2L))) - -# multiple objects -xpk <- msgpack_pack(1,2,3,5,"a", msgpack_format(1:10)) -xu <- msgpack_unpack(xpk) -stopifnot(identical(msgpack_simplify(xu[[6]]), 1:10)) - -# speed test -if (requireNamespace("microbenchmark", quietly=TRUE)) { - x <- as.list(1:1e6) - print(microbenchmark::microbenchmark(xpk <- msgpack_pack(x), times=10)) # 500 ms - print(microbenchmark::microbenchmark(xu <- msgpack_unpack(xpk), times=10)) # 150 ms - stopifnot(identical(xu, x)) - - ## vector input - x <- 1:1e7 - print(microbenchmark::microbenchmark(xpk2 <- msgpack_pack(x), times=10)) # 50 ms - print(microbenchmark::microbenchmark(xu <- msgpack_unpack(xpk2, simplify=T), times=10)) # 50 ms - stopifnot(identical(xu, x)) -} - -# packed list and vector should be identical -# stopifnot(identical(msgpack_simplify(xpk), xpk2)) - -# vector with NAs -x <- c(1:3,NA,5) -xpk <- msgpack_pack(x) -stopifnot(identical(msgpack_simplify(msgpack_unpack(xpk)),x)) -stopifnot(identical(msgpack_unpack(xpk, simplify=T),x)) - -# named vector is serialized to map -x <- c(1:4); names(x) <- c("z",letters[1:3]) -xpk <- msgpack_pack(x) -stopifnot(identical(msgpack_simplify(msgpack_unpack(xpk)),x)) -stopifnot(identical(msgpack_unpack(xpk, simplify=T),x)) - -# array length zero -x <- list() -xpk <- msgpack_pack(x) -stopifnot(identical(msgpack_unpack(xpk),x)) - -# map length zero -x <- msgpack_map(key=list(), value=list()) -xpk <- msgpack_pack(x) -stopifnot(identical(msgpack_unpack(xpk),x)) - -# special numeric values -x <- c(NA_real_, NaN, -NaN, Inf, -Inf, .Machine$double.xmax, .Machine$double.xmin, -0., 0.) -xpk <- msgpack_pack(x) -xu <- msgpack_unpack(xpk, simplify=T) -stopifnot(identical(x, xu, num.eq=F, single.NA=F)) - -x <- c(.Machine$integer.max, NA_integer_) -xpk <- msgpack_pack(x) -xu <- msgpack_unpack(xpk, simplify=T) -stopifnot(identical(x, xu)) - -#timestamps -mt <- Sys.time() -attr(mt, "tzone") <- "UTC" -mp <- msgpack_pack(msgpack_timestamp_encode(mt)) -mtu <- msgpack_timestamp_decode(msgpack_unpack(mp)) -stopifnot(all.equal(mt, mtu)) # less stringent than identical and all we can guarantee here - -secs <- round(as.numeric(mt)) -mp <- msgpack_pack(msgpack_timestamp_encode(seconds=secs, nanoseconds=0)) -mtu <- msgpack_timestamp_decode(msgpack_unpack(mp), posix=F) -stopifnot(identical(secs, mtu$seconds)) - -secs <- -2^50 -nanoseconds <- 999999999L -mp <- msgpack_pack(msgpack_timestamp_encode(seconds=secs, nanoseconds=nanoseconds)) -mtu <- msgpack_timestamp_decode(msgpack_unpack(mp), posix=F) -stopifnot(identical(secs, mtu$seconds)) -stopifnot(identical(nanoseconds, mtu$nanoseconds)) - -# memory profiling using profvis -# profvis({x <- msgpack_pack(1:1e7)}, torture=0) -# profvis({x <- msgpack_unpack(x, simplify=T)}, torture=0) - +# Tests for testing out the functionality of the package, to make sure it isn't broken + +# some references + +# //https://github.com/msgpack/msgpack-c/blob/401460b7d99e51adc06194ceb458934b359d2139/include/msgpack/v1/adaptor/boost/msgpack_variant.hpp +# //https://stackoverflow.com/questions/44725299/messagepack-c-how-to-iterate-through-an-unknown-data-structure +# //https://stackoverflow.com/questions/37665361/how-to-determine-the-class-of-object-stored-in-sexp-in-rcpp +# //https://stackoverflow.com/questions/12954852/booststatic-visitor-with-multiple-arguments +# //https://stackoverflow.com/questions/25172419/how-can-i-get-the-sexptype-of-an-sexp-value +# //https://github.com/wch/r-source/blob/48536f1756a88830076023db9566fbb2c1dbb29b/src/include/Rinternals.h#L1178-L1214 +# //https://stackoverflow.com/questions/16131462/how-to-use-boost-library-in-c-with-rcpp + + +library(RcppMsgPack) + +# stopifnot <- function(...) cat(..., "\n") + +# Test atomic types + +# integer +xpk <- msgpack_pack(1L) +stopifnot(identical(msgpack_unpack(xpk), 1L)) + +# double +xpk <- msgpack_pack(1.54) +stopifnot(identical(msgpack_unpack(xpk), 1.54)) + +# string +xpk <- msgpack_pack("sdfsdf") +stopifnot(identical(msgpack_unpack(xpk), "sdfsdf")) + +# raw +xpk <- msgpack_pack(as.raw(c(0x28, 0x4F))) +stopifnot(identical(msgpack_unpack(xpk), as.raw(c(0x28, 0x4F)))) + +# boolean +xpk <- msgpack_pack(T) +stopifnot(msgpack_unpack(xpk)) + +# nil +xpk <- msgpack_pack(NULL) +stopifnot(is.null(msgpack_unpack(xpk))) + +# ext +x <- as.raw(c(0x28, 0x4F)) +attr(x, "EXT") <- 1L +xpk <- msgpack_pack(x) +stopifnot(identical(msgpack_unpack(xpk), x)) + + +if (Sys.info()[['sysname']] != "Windows") { + ## unicode or something characters - note this doesn't always work if you copy/paste into a terminal because of how terminals encode text, but if you source this file it works + x <- list('图书,通常在狭义上的理解是带有文字和图像的纸张的集合。书通常由墨水、纸张、羊皮纸或者其他材料固定在书脊上组成。组成书的一张纸称为一张,一张的一面称为一页。但随着科学技术的发展,狭义图书的概念也在扩展,制作书的材料也在改变,如电子格式的电子书。从广义理解的图书,则是一切传播讯息的媒介。书也指文学作品或者其中的一部分。在图书馆信息学中,书被称为专著,以区别于杂志、学术期刊、报纸等连载期刊。所有的书面作品(包括图书)的主体是文学。在小说和一些类型(如传记)中,书可能还要分成卷。对书特别喜爱的人被称为爱书者或藏书家,更随意的称呼是书虫或者书呆子。买书的地方叫书店,图书馆则是可以借阅书籍的地方。2010年,谷歌公司估计,从印刷术发明至今,大概出版了一亿三千万本不同书名的书籍。[1]') + xpk <- msgpack_pack(x) + xu <- msgpack_unpack(xpk) + stopifnot(identical(x, xu)) +} + +# Complex nested object with lists and map +x <- as.raw(c(0x28, 0x4F)) +attr(x, "EXT") <- 1L +xmap <- msgpack_map(key=letters[1:10], value=1:10) +xmap$value[[3]] <- list(NULL) +xmap$value[[4]] <- as.list(1:10) +xmap$value[[4]][[3]] <- xmap +xmap$value[[5]] <- x +y <- 1:10 +names(y) <- letters[1:10] +x <- list(1:10, y, "a", list(3,raw(4)), xmap) +x <- msgpack_format(x) +xpk <- msgpack_pack(x) +xu <- msgpack_unpack(xpk) + +xs <- msgpack_simplify(x) +xus <- msgpack_simplify(xu) + +stopifnot(identical(xs, xus)) + +# named list can be used directly as input - should come out to a map, simplify to get a named vector +x <- list(a=1L, b=2L) +xpk <- msgpack_pack(x) +stopifnot(identical(msgpack_simplify(msgpack_unpack(xpk)),c(a=1L, b=2L))) + +# multiple objects +xpk <- msgpack_pack(1,2,3,5,"a", msgpack_format(1:10)) +xu <- msgpack_unpack(xpk) +stopifnot(identical(msgpack_simplify(xu[[6]]), 1:10)) + +# speed test +if (requireNamespace("microbenchmark", quietly=TRUE)) { + x <- as.list(1:1e6) + print(microbenchmark::microbenchmark(xpk <- msgpack_pack(x), times=10)) # 500 ms + print(microbenchmark::microbenchmark(xu <- msgpack_unpack(xpk), times=10)) # 150 ms + stopifnot(identical(xu, x)) + + ## vector input + x <- 1:1e7 + print(microbenchmark::microbenchmark(xpk2 <- msgpack_pack(x), times=10)) # 50 ms + print(microbenchmark::microbenchmark(xu <- msgpack_unpack(xpk2, simplify=T), times=10)) # 50 ms + stopifnot(identical(xu, x)) +} + +# packed list and vector should be identical +# stopifnot(identical(msgpack_simplify(xpk), xpk2)) + +# vector with NAs +x <- c(1:3,NA,5) +xpk <- msgpack_pack(x) +stopifnot(identical(msgpack_simplify(msgpack_unpack(xpk)),x)) +stopifnot(identical(msgpack_unpack(xpk, simplify=T),x)) + +# named vector is serialized to map +x <- c(1:4); names(x) <- c("z",letters[1:3]) +xpk <- msgpack_pack(x) +stopifnot(identical(msgpack_simplify(msgpack_unpack(xpk)),x)) +stopifnot(identical(msgpack_unpack(xpk, simplify=T),x)) + +# array length zero +x <- list() +xpk <- msgpack_pack(x) +stopifnot(identical(msgpack_unpack(xpk),x)) + +# map length zero +x <- msgpack_map(key=list(), value=list()) +xpk <- msgpack_pack(x) +stopifnot(identical(msgpack_unpack(xpk),x)) + +# special numeric values +x <- c(NA_real_, NaN, -NaN, Inf, -Inf, .Machine$double.xmax, .Machine$double.xmin, -0., 0.) +xpk <- msgpack_pack(x) +xu <- msgpack_unpack(xpk, simplify=T) +stopifnot(identical(x, xu, num.eq=F, single.NA=F)) + +x <- c(.Machine$integer.max, NA_integer_) +xpk <- msgpack_pack(x) +xu <- msgpack_unpack(xpk, simplify=T) +stopifnot(identical(x, xu)) + +#timestamps +mt <- Sys.time() +attr(mt, "tzone") <- "UTC" +mp <- msgpack_pack(msgpack_timestamp_encode(mt)) +mtu <- msgpack_timestamp_decode(msgpack_unpack(mp)) +stopifnot(all.equal(mt, mtu)) # less stringent than identical and all we can guarantee here + +secs <- round(as.numeric(mt)) +mp <- msgpack_pack(msgpack_timestamp_encode(seconds=secs, nanoseconds=0)) +mtu <- msgpack_timestamp_decode(msgpack_unpack(mp), posix=F) +stopifnot(identical(secs, mtu$seconds)) + +secs <- -2^50 +nanoseconds <- 999999999L +mp <- msgpack_pack(msgpack_timestamp_encode(seconds=secs, nanoseconds=nanoseconds)) +mtu <- msgpack_timestamp_decode(msgpack_unpack(mp), posix=F) +stopifnot(identical(secs, mtu$seconds)) +stopifnot(identical(nanoseconds, mtu$nanoseconds)) + +# memory profiling using profvis +# profvis({x <- msgpack_pack(1:1e7)}, torture=0) +# profvis({x <- msgpack_unpack(x, simplify=T)}, torture=0) + +##### msgpack_write/read +# pipe -- depends on OS +if(F) { + wcon <- "zstd --format=zstd -f -3 -T4 -o /tmp/temp.mp.zstd" + rcon <- "zstd --format=zstd -d -c -T4 /tmp/temp.mp.zstd" + msgpack_write(1:1e7, file=wcon) + xu <- msgpack_read(file=rcon, simplify=T) + stopifnot(identical(1:1e7, xu)) +} + +# gzfile +tmp <- tempfile(fileext = ".mp.gz") +msgpack_write(1:1e7, file=tmp) +xu <- msgpack_read(file=tmp, simplify=T) +stopifnot(identical(1:1e7, xu)) + +# file +tmp <- tempfile(fileext = ".mp") +msgpack_write(1:1e7, file=tmp) +xu <- msgpack_read(file=tmp, simplify=T) +stopifnot(identical(1:1e7, xu))