Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Literal matching unknown error #303

Closed
danlark1 opened this issue Feb 27, 2021 · 7 comments
Closed

Literal matching unknown error #303

danlark1 opened this issue Feb 27, 2021 · 7 comments

Comments

@danlark1
Copy link

danlark1 commented Feb 27, 2021

Reproducible example

#undef NDEBUG
#include <hs.h>
#include <cassert>

/*
Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
Version: 5.4.0
*/

int on_match(unsigned int id,
             unsigned long long from,
             unsigned long long to,
             unsigned int /* flags */,
             void * context) {
    return 0;
}

int main() {
    hs_database_t *db = nullptr;
    hs_compile_error_t *compile_err = nullptr;
    const char* data = "\x2a\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
    const size_t size = 43;
    const char *expr[] = {
        "\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
        "\x2a\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    };
    unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_SOM_LEFTMOST, 0};
    unsigned ids[] = {0, 1, 2, 3, 4};
    size_t lens[] = {46, 12, 10, 41, 45};
    hs_error_t err = hs_compile_lit_multi(expr, flags, ids, lens, sizeof(ids)/sizeof(*ids), HS_MODE_BLOCK, nullptr, &db, &compile_err);

    assert(HS_SUCCESS == err);
    assert(db != nullptr);

    hs_scratch_t *scratch = nullptr;
    err = hs_alloc_scratch(db, &scratch);
    assert(HS_SUCCESS == err);
    assert(scratch != nullptr);

    err = hs_scan(db, data, size, 0, scratch, on_match, nullptr);
    assert(HS_UNKNOWN_ERROR == err); // Got
    assert(HS_SUCCESS == err); // Expected

    hs_free_database(db);
    err = hs_free_scratch(scratch);
    assert(HS_SUCCESS == err);
}
@danlark1
Copy link
Author

danlark1 commented Feb 27, 2021

In case you need inputs without zero bytes

// Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
// Haystack: 
const char* data = "\xf0\x66\x01\x76\x01\x01\x01\x76\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x76\x0d";
const size_t size = 49;
// Patterns: 
const char *expr[] = {
    "\x66\x01\x76\x01\x01\x01\x76\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
    "\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\xff\xff\xff",
};
unsigned ids[] = {0, 1, 2, 3};
size_t lens[] = {44, 45, 40, 3};
unsigned flags[] = {HS_FLAG_CASELESS, 0, HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS};
// Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
// Haystack: 
const char* data = "\x47\x47\x2c\x47\x29\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20";
const size_t size = 49;
// Patterns: 
const char *expr[] = {
    "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\x29\x1d\x47\x47\x01\x01\x01\x01\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20",
    "\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20",
    "\x47\x2c\x47\x29\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20",
};
unsigned ids[] = {0, 1, 2, 3, 4};
size_t lens[] = {46, 32, 14, 28, 47};
unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SOM_LEFTMOST};

@danlark1
Copy link
Author

danlark1 commented Mar 1, 2021

The problem is in switch in roseRunProgram_l

For ROSE_INSTR_TRIGGER_SUFFIX

// Haystack: 
const char* data = "\xcb\xcb\xcb\xcb\x00\x00\x00\x00\x00\x00\x00\x00\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xbf\xff\xff\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0";
const size_t size = 35;
// Patterns: 
const char *expr[] = {
    "\x61",
    "\xbf\xff\xff\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0",
};
unsigned ids[] = {0, 1};
size_t lens[] = {1, 38};
unsigned flags[] = {HS_FLAG_SOM_LEFTMOST, 0};

For ROSE_INSTR_CATCH_UP_MPV

// Haystack: 
const char* data = "\x0a\x41\x61\x61\xec\xf2\x41\x41";
const size_t size = 8;
// Patterns: 
const char *expr[] = {
    "\x61",
    "\x19\x24\x03\x00\x41",
    "\x41\x00\x28\x41\x41\x41\x41\x41\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79",
};
unsigned ids[] = {0, 1, 2};
size_t lens[] = {1, 5, 48};
unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH};

@Nor7th
Copy link

Nor7th commented Mar 9, 2021

Will try to give a fix in next release.

@danlark1
Copy link
Author

danlark1 commented Jun 19, 2021

One more thing. Streams do not deallocate memory on hs_close_stream if UNKNOWN_ERROR was returned

HS_PUBLIC_API
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
                                    match_event_handler onEvent,
                                    void *context) {
    if (!id) {
        return HS_INVALID;
    }

    if (onEvent) {
        if (!scratch || !validScratch(id->rose, scratch)) {
            return HS_INVALID;
        }
        if (unlikely(markScratchInUse(scratch))) {
            return HS_SCRATCH_IN_USE;
        }
        report_eod_matches(id, scratch, onEvent, context);
        if (unlikely(internal_matching_error(scratch))) {
            unmarkScratchInUse(scratch); // hs_stream_free?
            return HS_UNKNOWN_ERROR;
        }
        unmarkScratchInUse(scratch);
    }

    hs_stream_free(id);

    return HS_SUCCESS;
}

Like, there is no way to close stream without memory leak

@hongyang7
Copy link
Contributor

hongyang7 commented Jun 1, 2022

Please refer to latest develop branch for literal matching unknown error.
We'll look into the memory deallocation issue then.

markos referenced this issue in VectorCamp/vectorscan Aug 29, 2022
@hongyang7
Copy link
Contributor

The last memory deallocation issue will be fixed in develop branch soon,

fatchanghao pushed a commit that referenced this issue Oct 27, 2022
@hongyang7
Copy link
Contributor

Please refer to latest develop branch.
Commit id: 85f68b8

fatchanghao pushed a commit that referenced this issue Feb 15, 2023
fatchanghao pushed a commit that referenced this issue Feb 15, 2023
markos referenced this issue in VectorCamp/vectorscan Sep 5, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants