Skip to content

Commit

Permalink
'read_json_file()' now supports '.gz' compressed files
Browse files Browse the repository at this point in the history
  • Loading branch information
coolbutuseless committed Jan 31, 2024
1 parent fa191d8 commit 366f648
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 2 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,7 +1,7 @@
Package: yyjsonr
Type: Package
Title: Fast JSON Parser and Generator
Version: 0.1.18.9000
Version: 0.1.18.9001
Authors@R: c(
person("Mike", "Cheng", role = c("aut", "cre", 'cph'),
email = "mikefc@coolbutuseless.com"),
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
@@ -1,4 +1,7 @@

# yyjsonr 0.1.18.9001 2024-02-01

* Read JSON from '.gz' files in `read_json_file()`

# yyjsonr 0.1.18.9000 2024-01-25

Expand Down
59 changes: 58 additions & 1 deletion src/R-yyjson-parse.c
Expand Up @@ -9,6 +9,7 @@
#include <stdlib.h>
#include <unistd.h>

#include "zlib.h"
#include "yyjson.h"
#include "R-yyjson-parse.h"

Expand Down Expand Up @@ -1938,15 +1939,71 @@ SEXP parse_from_raw_(SEXP raw_, SEXP parse_opts_) {
return parse_json_from_str(str, (size_t)length(raw_), &opt);
}


//===========================================================================
// Parse from file given as a filename - ending in ".gz"
//===========================================================================
SEXP parse_from_gzfile_(SEXP filename_, SEXP parse_opts_) {

const char *filename = (const char *)CHAR( STRING_ELT(filename_, 0) );
filename = R_ExpandFileName(filename);
parse_options opt = create_parse_options(parse_opts_);

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Read tail end of .gz file to get length.
// If uncompressed length > 4GB this method will fail as there are
// only 4-bytes reserved for the field!
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
error("couldn't open file: %s", filename);
}

fseek(fp, -4, SEEK_END);
int32_t uncompressed_len;
fread(&uncompressed_len, 4, 1, fp);
fclose(fp);

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Allocate a buffer to hold the uncompressed file.
// Note: this approach will change if/when yyjson implements streaming
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
char *buf = (char *)malloc(uncompressed_len + 1);
if (buf == 0) {
error("Couldn't allocate buffer for reading json.gz file: %s", filename);
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Uncompress file to buffer
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
gzFile gzfp = gzopen(filename, "r");
gzread(gzfp, (void *)buf, uncompressed_len);
gzclose(gzfp);

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Parse buffer as string
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
SEXP res_ = PROTECT(parse_json_from_str(buf, (size_t)uncompressed_len, &opt));
free(buf);

UNPROTECT(1);
return res_;
}

//===========================================================================
// Parse from file given as a filename
//===========================================================================
SEXP parse_from_file_(SEXP filename_, SEXP parse_opts_) {

const char *filename = (const char *)CHAR( STRING_ELT(filename_, 0) );
filename = R_ExpandFileName(filename);
parse_options opt = create_parse_options(parse_opts_);

size_t len = strlen(filename);
if (strncmp(filename + len - 3, ".gz", 3) == 0) {
return parse_from_gzfile_(filename_, parse_opts_);
}

parse_options opt = create_parse_options(parse_opts_);
return parse_json_from_file(filename, &opt);
}

Expand Down
1 change: 1 addition & 0 deletions tests/testthat/examples/mtcars.json
@@ -0,0 +1 @@
[{"mpg":21.0,"cyl":6.0,"disp":160.0,"hp":110.0,"drat":3.9,"wt":2.62,"qsec":16.46,"vs":0.0,"am":1.0,"gear":4.0,"carb":4.0},{"mpg":21.0,"cyl":6.0,"disp":160.0,"hp":110.0,"drat":3.9,"wt":2.875,"qsec":17.02,"vs":0.0,"am":1.0,"gear":4.0,"carb":4.0},{"mpg":22.8,"cyl":4.0,"disp":108.0,"hp":93.0,"drat":3.85,"wt":2.32,"qsec":18.61,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":21.4,"cyl":6.0,"disp":258.0,"hp":110.0,"drat":3.08,"wt":3.215,"qsec":19.44,"vs":1.0,"am":0.0,"gear":3.0,"carb":1.0},{"mpg":18.7,"cyl":8.0,"disp":360.0,"hp":175.0,"drat":3.15,"wt":3.44,"qsec":17.02,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":18.1,"cyl":6.0,"disp":225.0,"hp":105.0,"drat":2.76,"wt":3.46,"qsec":20.22,"vs":1.0,"am":0.0,"gear":3.0,"carb":1.0},{"mpg":14.3,"cyl":8.0,"disp":360.0,"hp":245.0,"drat":3.21,"wt":3.57,"qsec":15.84,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":24.4,"cyl":4.0,"disp":146.7,"hp":62.0,"drat":3.69,"wt":3.19,"qsec":20.0,"vs":1.0,"am":0.0,"gear":4.0,"carb":2.0},{"mpg":22.8,"cyl":4.0,"disp":140.8,"hp":95.0,"drat":3.92,"wt":3.15,"qsec":22.9,"vs":1.0,"am":0.0,"gear":4.0,"carb":2.0},{"mpg":19.2,"cyl":6.0,"disp":167.6,"hp":123.0,"drat":3.92,"wt":3.44,"qsec":18.3,"vs":1.0,"am":0.0,"gear":4.0,"carb":4.0},{"mpg":17.8,"cyl":6.0,"disp":167.6,"hp":123.0,"drat":3.92,"wt":3.44,"qsec":18.9,"vs":1.0,"am":0.0,"gear":4.0,"carb":4.0},{"mpg":16.4,"cyl":8.0,"disp":275.8,"hp":180.0,"drat":3.07,"wt":4.07,"qsec":17.4,"vs":0.0,"am":0.0,"gear":3.0,"carb":3.0},{"mpg":17.3,"cyl":8.0,"disp":275.8,"hp":180.0,"drat":3.07,"wt":3.73,"qsec":17.6,"vs":0.0,"am":0.0,"gear":3.0,"carb":3.0},{"mpg":15.2,"cyl":8.0,"disp":275.8,"hp":180.0,"drat":3.07,"wt":3.78,"qsec":18.0,"vs":0.0,"am":0.0,"gear":3.0,"carb":3.0},{"mpg":10.4,"cyl":8.0,"disp":472.0,"hp":205.0,"drat":2.93,"wt":5.25,"qsec":17.98,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":10.4,"cyl":8.0,"disp":460.0,"hp":215.0,"drat":3.0,"wt":5.424,"qsec":17.82,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":14.7,"cyl":8.0,"disp":440.0,"hp":230.0,"drat":3.23,"wt":5.345,"qsec":17.42,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":32.4,"cyl":4.0,"disp":78.7,"hp":66.0,"drat":4.08,"wt":2.2,"qsec":19.47,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":30.4,"cyl":4.0,"disp":75.7,"hp":52.0,"drat":4.93,"wt":1.615,"qsec":18.52,"vs":1.0,"am":1.0,"gear":4.0,"carb":2.0},{"mpg":33.9,"cyl":4.0,"disp":71.1,"hp":65.0,"drat":4.22,"wt":1.835,"qsec":19.9,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":21.5,"cyl":4.0,"disp":120.1,"hp":97.0,"drat":3.7,"wt":2.465,"qsec":20.01,"vs":1.0,"am":0.0,"gear":3.0,"carb":1.0},{"mpg":15.5,"cyl":8.0,"disp":318.0,"hp":150.0,"drat":2.76,"wt":3.52,"qsec":16.87,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":15.2,"cyl":8.0,"disp":304.0,"hp":150.0,"drat":3.15,"wt":3.435,"qsec":17.3,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":13.3,"cyl":8.0,"disp":350.0,"hp":245.0,"drat":3.73,"wt":3.84,"qsec":15.41,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":19.2,"cyl":8.0,"disp":400.0,"hp":175.0,"drat":3.08,"wt":3.845,"qsec":17.05,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":27.3,"cyl":4.0,"disp":79.0,"hp":66.0,"drat":4.08,"wt":1.935,"qsec":18.9,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":26.0,"cyl":4.0,"disp":120.3,"hp":91.0,"drat":4.43,"wt":2.14,"qsec":16.7,"vs":0.0,"am":1.0,"gear":5.0,"carb":2.0},{"mpg":30.4,"cyl":4.0,"disp":95.1,"hp":113.0,"drat":3.77,"wt":1.513,"qsec":16.9,"vs":1.0,"am":1.0,"gear":5.0,"carb":2.0},{"mpg":15.8,"cyl":8.0,"disp":351.0,"hp":264.0,"drat":4.22,"wt":3.17,"qsec":14.5,"vs":0.0,"am":1.0,"gear":5.0,"carb":4.0},{"mpg":19.7,"cyl":6.0,"disp":145.0,"hp":175.0,"drat":3.62,"wt":2.77,"qsec":15.5,"vs":0.0,"am":1.0,"gear":5.0,"carb":6.0},{"mpg":15.0,"cyl":8.0,"disp":301.0,"hp":335.0,"drat":3.54,"wt":3.57,"qsec":14.6,"vs":0.0,"am":1.0,"gear":5.0,"carb":8.0},{"mpg":21.4,"cyl":4.0,"disp":121.0,"hp":109.0,"drat":4.11,"wt":2.78,"qsec":18.6,"vs":1.0,"am":1.0,"gear":4.0,"carb":2.0}]
Binary file added tests/testthat/examples/mtcars.json.gz
Binary file not shown.
8 changes: 8 additions & 0 deletions tests/testthat/test-read-json-file-gz.R
@@ -0,0 +1,8 @@

test_that("reading from gz compressed files works", {

expect_identical(
read_json_file(testthat::test_path("examples/mtcars.json")),
read_json_file(testthat::test_path("examples/mtcars.json.gz"))
)
})

0 comments on commit 366f648

Please sign in to comment.