Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality for MR metadata reading from SAV #313

Open
wants to merge 35 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
b96798d
Add functionality for MR metadata reading from SAV
slobodan-ilic Apr 24, 2024
850f0df
Try fixing build
slobodan-ilic May 5, 2024
bae8721
Fix issues with null-termination of mr string
slobodan-ilic Jun 3, 2024
55af2f2
Refactor of mr parsing
slobodan-ilic Jun 4, 2024
e471605
Try fixing fuzzifier
slobodan-ilic Jun 4, 2024
789511a
wip
slobodan-ilic Jun 4, 2024
622301c
fixup! Try fixing fuzzifier
slobodan-ilic Jun 4, 2024
8b453bd
fixup! wip
slobodan-ilic Jun 4, 2024
0a83ade
fixup! fixup! wip
slobodan-ilic Jun 4, 2024
26e96c7
Fix error found by fuzzifier
slobodan-ilic Jun 12, 2024
481a7d1
Fix another malloc issue found with fuzzer
slobodan-ilic Jun 13, 2024
ec778f5
Another malloc fix
slobodan-ilic Jun 13, 2024
d30f048
try fix oom found with fuzzer
slobodan-ilic Jun 13, 2024
7211183
free memory
slobodan-ilic Jun 14, 2024
7cc0ef8
Fail early on bad MR string
slobodan-ilic Jun 14, 2024
41a7ac4
Fix Win build
slobodan-ilic Jun 14, 2024
04c3d33
Test fuzzer with freeing memory
slobodan-ilic Jun 14, 2024
a8f252a
Try debug fuzzer on CI (amend)
slobodan-ilic Jun 14, 2024
a1a69bc
Test Fuzzer hatchet style
slobodan-ilic Jun 15, 2024
1b0b133
Fix accidental delete
slobodan-ilic Jun 15, 2024
1c78b3c
Un-hatchet after successful fuzz run
slobodan-ilic Jun 15, 2024
fac517b
Un-hatchet pt2
slobodan-ilic Jun 15, 2024
0a76076
Un-hatchet pt3
slobodan-ilic Jun 15, 2024
213a76a
Fix actual logic
slobodan-ilic Jun 15, 2024
68b2ecb
Rewrite parsing logic with Ragel
slobodan-ilic Jun 20, 2024
12fa4b2
try fixing appveyor build
slobodan-ilic Jun 20, 2024
0a11d5c
Try fix build pt2
slobodan-ilic Jun 20, 2024
8975ade
Try fix build pt3
slobodan-ilic Jun 20, 2024
1c92bd2
Fix attempt pt 4
slobodan-ilic Jun 20, 2024
db6164e
Try fix build pt5
slobodan-ilic Jun 20, 2024
07e323f
Fix build pt6
slobodan-ilic Jun 20, 2024
b0a99ef
Fix functionality
slobodan-ilic Jun 21, 2024
0fbca90
try fix build
slobodan-ilic Jun 21, 2024
fc836e7
Try fix build
slobodan-ilic Jun 21, 2024
6f500cb
Change parser to full-ragel
slobodan-ilic Jun 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ libreadstat_la_SOURCES = \
src/spss/readstat_sav_compress.c \
src/spss/readstat_sav_parse.c \
src/spss/readstat_sav_parse_timestamp.c \
src/spss/readstat_sav_parse_mr_name.c \
src/spss/readstat_sav_read.c \
src/spss/readstat_sav_write.c \
src/spss/readstat_spss.c \
Expand Down Expand Up @@ -103,6 +104,7 @@ noinst_HEADERS = \
src/spss/readstat_sav_compress.h \
src/spss/readstat_sav_parse.h \
src/spss/readstat_sav_parse_timestamp.h \
src/spss/readstat_sav_parse_mr_name.h \
src/spss/readstat_spss.h \
src/spss/readstat_spss_parse.h \
src/spss/readstat_zsav_compress.h \
Expand Down
2 changes: 2 additions & 0 deletions VS17/ReadStat.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@
<ClCompile Include="..\src\spss\readstat_sav_compress.c" />
<ClCompile Include="..\src\spss\readstat_sav_parse.c" />
<ClCompile Include="..\src\spss\readstat_sav_parse_timestamp.c" />
<ClCompile Include="..\src\spss\readstat_sav_parse_mr_name.c" />
<ClCompile Include="..\src\spss\readstat_sav_read.c" />
<ClCompile Include="..\src\spss\readstat_sav_write.c" />
<ClCompile Include="..\src\spss\readstat_spss.c" />
Expand Down Expand Up @@ -251,6 +252,7 @@
<ClInclude Include="..\src\spss\readstat_sav_compress.h" />
<ClInclude Include="..\src\spss\readstat_sav_parse.h" />
<ClInclude Include="..\src\spss\readstat_sav_parse_timestamp.h" />
<ClInclude Include="..\src\spss\readstat_sav_parse_mr_name.h" />
<ClInclude Include="..\src\spss\readstat_spss.h" />
<ClInclude Include="..\src\spss\readstat_spss_parse.h" />
<ClInclude Include="..\src\spss\readstat_zsav_compress.h" />
Expand Down
6 changes: 6 additions & 0 deletions VS17/ReadStat.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@
<ClCompile Include="..\src\spss\readstat_sav_parse_timestamp.c">
<Filter>Source Files\spss</Filter>
</ClCompile>
<ClCompile Include="..\src\spss\readstat_sav_parse_mr_name.c">
<Filter>Source Files\spss</Filter>
</ClCompile>
<ClCompile Include="..\src\spss\readstat_sav_read.c">
<Filter>Source Files\spss</Filter>
</ClCompile>
Expand Down Expand Up @@ -218,6 +221,9 @@
<ClInclude Include="..\src\spss\readstat_sav_parse_timestamp.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\spss\readstat_sav_parse_mr_name.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\spss\readstat_spss.h">
<Filter>Header Files</Filter>
</ClInclude>
Expand Down
17 changes: 16 additions & 1 deletion src/readstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,22 @@ typedef enum readstat_error_e {
READSTAT_ERROR_TOO_FEW_COLUMNS,
READSTAT_ERROR_TOO_MANY_COLUMNS,
READSTAT_ERROR_NAME_IS_ZERO_LENGTH,
READSTAT_ERROR_BAD_TIMESTAMP_VALUE
READSTAT_ERROR_BAD_TIMESTAMP_VALUE,
READSTAT_ERROR_BAD_MR_STRING
} readstat_error_t;

const char *readstat_error_message(readstat_error_t error_code);

typedef struct mr_set_s {
char type;
char *name;
char *label;
int is_dichotomy;
int counted_value;
char **subvariables;
int num_subvars;
} mr_set_t;

typedef struct readstat_metadata_s {
int64_t row_count;
int64_t var_count;
Expand All @@ -121,6 +132,8 @@ typedef struct readstat_metadata_s {
const char *file_label;
const char *file_encoding;
unsigned int is64bit:1;
size_t multiple_response_sets_length;
mr_set_t *mr_sets;
} readstat_metadata_t;

/* If the row count is unknown (e.g. it's an XPORT or POR file, or an SAV
Expand All @@ -138,6 +151,8 @@ readstat_endian_t readstat_get_endianness(readstat_metadata_t *metadata);
const char *readstat_get_table_name(readstat_metadata_t *metadata);
const char *readstat_get_file_label(readstat_metadata_t *metadata);
const char *readstat_get_file_encoding(readstat_metadata_t *metadata);
const mr_set_t *readstat_get_mr_sets(readstat_metadata_t *metadata);
size_t readstat_get_multiple_response_sets_length(readstat_metadata_t *metadata);

typedef struct readstat_value_s {
union {
Expand Down
8 changes: 8 additions & 0 deletions src/readstat_metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,11 @@ const char *readstat_get_file_encoding(readstat_metadata_t *metadata) {
const char *readstat_get_table_name(readstat_metadata_t *metadata) {
return metadata->table_name;
}

size_t readstat_get_multiple_response_sets_length(readstat_metadata_t *metadata) {
return metadata->multiple_response_sets_length;
}

const mr_set_t *readstat_get_mr_sets(readstat_metadata_t *metadata) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function should be called readstat_get_multiple_response_sets

return metadata->mr_sets;
}
21 changes: 21 additions & 0 deletions src/spss/readstat_sav.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ sav_ctx_t *sav_ctx_init(sav_file_header_record_t *header, readstat_io_t *io) {
return NULL;
}

ctx->mr_sets = NULL;

ctx->io = io;

return ctx;
Expand Down Expand Up @@ -89,6 +91,25 @@ void sav_ctx_free(sav_ctx_t *ctx) {
if (ctx->variable_display_values) {
free(ctx->variable_display_values);
}
if (ctx->mr_sets) {
for (size_t i = 0; i < ctx->multiple_response_sets_length; i++) {
if (ctx->mr_sets[i].name) {
free(ctx->mr_sets[i].name);
}
if (ctx->mr_sets[i].label) {
free(ctx->mr_sets[i].label);
}
if (ctx->mr_sets[i].subvariables) {
for (size_t j = 0; j < ctx->mr_sets[i].num_subvars; j++) {
if (ctx->mr_sets[i].subvariables[j]) {
free(ctx->mr_sets[i].subvariables[j]);
}
}
free(ctx->mr_sets[i].subvariables);
}
}
free(ctx->mr_sets);
}
free(ctx);
}

5 changes: 5 additions & 0 deletions src/spss/readstat_sav.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//

#include "readstat_spss.h"
#include "../readstat.h"

#pragma pack(push, 1)

Expand Down Expand Up @@ -100,6 +101,9 @@ typedef struct sav_ctx_s {
uint64_t lowest_double;
uint64_t highest_double;

size_t multiple_response_sets_length;
mr_set_t *mr_sets;

double bias;
int format_version;

Expand All @@ -117,6 +121,7 @@ typedef struct sav_ctx_s {

#define SAV_RECORD_SUBTYPE_INTEGER_INFO 3
#define SAV_RECORD_SUBTYPE_FP_INFO 4
#define SAV_RECORD_SUBTYPE_MULTIPLE_RESPONSE_SETS 7
#define SAV_RECORD_SUBTYPE_PRODUCT_INFO 10
#define SAV_RECORD_SUBTYPE_VAR_DISPLAY 11
#define SAV_RECORD_SUBTYPE_LONG_VAR_NAME 13
Expand Down
Loading
Loading