/
fst_compress.cpp
137 lines (98 loc) · 3.39 KB
/
fst_compress.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*
fst - R package for ultra fast storage and retrieval of datasets
Copyright (C) 2017-present, Mark AJ Klik
This file is part of the fst R package.
The fst R package is free software: you can redistribute it and/or modify it
under the terms of the GNU Affero General Public License version 3 as
published by the Free Software Foundation.
The fst R package is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
for more details.
You should have received a copy of the GNU Affero General Public License along
with the fst R package. If not, see <http://www.gnu.org/licenses/>.
You can contact the author at:
- fst R package source repository : https://github.com/fstpackage/fst
*/
#include <memory>
#include <Rcpp.h>
#include <interface/fstcompressor.h>
#include <interface/fsthash.h>
#include <fst_type_factory.h>
#include <fst_error.h>
SEXP fsthasher(SEXP rawVec, SEXP seed)
{
FstHasher hasher;
SEXP res = PROTECT(Rf_allocVector(INTSXP, 1));
unsigned int* uintP = (unsigned int*)(INTEGER(res));
*uintP = 5;
if (Rf_isNull(seed))
{
*uintP = hasher.HashBlob((unsigned char*) RAW(rawVec), Rf_length(rawVec));
UNPROTECT(1);
return res;
}
*uintP = hasher.HashBlob((unsigned char*) RAW(rawVec), Rf_length(rawVec),
*((unsigned int*) INTEGER(seed)));
UNPROTECT(1);
return res;
}
SEXP fstcomp(SEXP rawVec, SEXP compressor, SEXP compression, SEXP hash)
{
std::unique_ptr<TypeFactory> typeFactoryP(new TypeFactory());
COMPRESSION_ALGORITHM algo;
if (!Rf_isLogical(hash))
{
Rf_error("Please specify true of false for parameter hash.");
}
if (Rf_NonNullStringMatch(STRING_ELT(compressor, 0), Rf_mkChar("LZ4")))
{
algo = COMPRESSION_ALGORITHM::ALGORITHM_LZ4;
} else if (Rf_NonNullStringMatch(STRING_ELT(compressor, 0), Rf_mkChar("ZSTD")))
{
algo = COMPRESSION_ALGORITHM::ALGORITHM_ZSTD;
} else
{
return fst_error("Unknown compression algorithm selected");
}
FstCompressor fstcompressor(algo, *INTEGER(compression), (ITypeFactory*) typeFactoryP.get());
unsigned long long vecLength = Rf_xlength(rawVec);
unsigned char* data = (unsigned char*) RAW(rawVec);
std::unique_ptr<IBlobContainer> blobContainerP;
try
{
blobContainerP = std::unique_ptr<IBlobContainer>(fstcompressor.CompressBlob(data, vecLength, *LOGICAL(hash)));
}
catch(const std::runtime_error& e)
{
return fst_error(e.what());
}
catch ( ... )
{
return fst_error("Unexpected error detected while compressing data.");
}
SEXP resVec = ((BlobContainer*)(blobContainerP.get()))->RVector();
return resVec;
}
SEXP fstdecomp(SEXP rawVec)
{
std::unique_ptr<ITypeFactory> typeFactoryP(new TypeFactory());
FstCompressor fstcompressor((ITypeFactory*) typeFactoryP.get());
unsigned long long vecLength = Rf_xlength(rawVec);
unsigned char* data = (unsigned char*) (RAW(rawVec));
std::unique_ptr<BlobContainer> resultContainerP;
try
{
resultContainerP = std::unique_ptr<BlobContainer>(static_cast<BlobContainer*>(fstcompressor.DecompressBlob(data, vecLength)));
}
catch(const std::runtime_error& e)
{
return fst_error(e.what());
}
catch ( ... )
{
return fst_error("Error detected while decompressing data.");
}
SEXP resVec = resultContainerP->RVector();
return resVec;
}