Skip to content

Commit

Permalink
Path prefix search (draios#660)
Browse files Browse the repository at this point in the history
* Add ability to test a path against many prefixes

Add a data structure that allows testing a path aka /var/log/messages
against a set of path prefixes (/usr, /bin, /var/log, ...).

Thinking ahead to use in libsinsp, the search structure uses pointer +
length pairs instead of strings, meaning that it does not copy any data,
only refers to it. This way the structure doesn't copy filtercheck
values.

In order to share the idea of a pair of pointer + length between this
structure and the unordered_set used by "in (...)" set membership tests,
move the hashing and equality function from filterchecks to a standalone
header filter_value.h and use it for the unordered_map.

The paths are held in a tree-like structure. At each level, an unordered
map has the path components for that level and a sub-tree of paths for
that root.

The tree can change if new search paths are prefixes of any of the
current paths (the sub-tree for the longer path is replaced by the
prefix). If a new search path is a suffix of any existing path no change
is made, as it is already covered by the prefix.

Matching involves splitting off the first directory component and
testing it against the values at that level. If a match is found, it
recursively calls match for the subtree.

* Add pmatch operator, using prefix_search struct.

Remove the definitions of filter_value_member_t/hash function/equality
function from filterchecks.h, renaming it to filter_value_t along the
way.

Add an operator CO_PMATCH/"pmatch" which takes a set of values like
CO_IN does, but requires that the left hand side of the comparison is a
PT_CHARBUF. When filter values are added, they are added to the new
path_prefix_search object m_val_storages_paths.

in ::flt_compare, when the operator is CO_PMATCH, test the value against
m_val_storages_paths.

As a result, you can run sysdig using a command line like:

sudo ./userspace/sysdig/sysdig "evt.type=open and fd.directory
pmatch (/var, /usr)"

and see all the file opens for files below either /var or /usr.
  • Loading branch information
mstemm authored and Damian Myerscough committed Mar 3, 2017
1 parent fe066b5 commit f887c2f
Show file tree
Hide file tree
Showing 8 changed files with 392 additions and 55 deletions.
1 change: 1 addition & 0 deletions userspace/libsinsp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ add_library(sinsp STATIC
"${JSONCPP_LIB_SRC}"
logger.cpp
parsers.cpp
prefix_search.cpp
protodecoder.cpp
threadinfo.cpp
sinsp.cpp
Expand Down
55 changes: 41 additions & 14 deletions userspace/libsinsp/filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@ void sinsp_filter_check::add_filter_value(const char* str, uint32_t len, uint16_

// XXX/mstemm this doesn't work if someone called
// add_filter_value more than once for a given index.
filter_value_member_t item(filter_value_p(i), len);
filter_value_t item(filter_value_p(i), len);
m_val_storages_members.insert(item);

if(len < m_val_storages_min_size)
Expand All @@ -1036,8 +1036,13 @@ void sinsp_filter_check::add_filter_value(const char* str, uint32_t len, uint16_
{
m_val_storages_max_size = len;
}
}

// If the operator is CO_PMATCH, also add the value to the paths set.
if (m_cmpop == CO_PMATCH)
{
m_val_storages_paths.add_search_path(item);
}
}

void sinsp_filter_check::parse_filter_value(const char* str, uint32_t len, uint8_t *storage, uint32_t storage_len)
{
Expand Down Expand Up @@ -1066,21 +1071,33 @@ const filtercheck_field_info* sinsp_filter_check::get_field_info()

bool sinsp_filter_check::flt_compare(cmpop op, ppm_param_type type, void* operand1, uint32_t op1_len, uint32_t op2_len)
{
if (op == CO_IN)
if (op == CO_IN || op == CO_PMATCH)
{
// For raw strings, the length may not be set. So we do a strlen to find it.
if(type == PT_CHARBUF && op1_len == 0)
{
op1_len = strlen((char *) operand1);
}

filter_value_member_t item((uint8_t *) operand1, op1_len);
if(op1_len >= m_val_storages_min_size &&
op1_len <= m_val_storages_max_size &&
m_val_storages_members.find(item) != m_val_storages_members.end())
filter_value_t item((uint8_t *) operand1, op1_len);

if (op == CO_IN)
{
return true;
if(op1_len >= m_val_storages_min_size &&
op1_len <= m_val_storages_max_size &&
m_val_storages_members.find(item) != m_val_storages_members.end())
{
return true;
}
}
else
{
if (m_val_storages_paths.match(item))
{
return true;
}
}

return false;
}
else
Expand Down Expand Up @@ -1355,7 +1372,7 @@ char sinsp_filter_compiler::next()
}
}

vector<char> sinsp_filter_compiler::next_operand(bool expecting_first_operand, bool in_clause)
vector<char> sinsp_filter_compiler::next_operand(bool expecting_first_operand, bool in_or_pmatch_clause)
{
vector<char> res;
bool is_quoted = false;
Expand Down Expand Up @@ -1406,7 +1423,7 @@ vector<char> sinsp_filter_compiler::next_operand(bool expecting_first_operand, b
}
else
{
is_end_of_word = (!is_quoted && (isblank(curchar) || is_bracket(curchar) || (in_clause && curchar == ','))) ||
is_end_of_word = (!is_quoted && (isblank(curchar) || is_bracket(curchar) || (in_or_pmatch_clause && curchar == ','))) ||
(is_quoted && escape_state != PES_SLASH && (curchar == '"' || curchar == '\''));
}

Expand All @@ -1423,7 +1440,7 @@ vector<char> sinsp_filter_compiler::next_operand(bool expecting_first_operand, b
//
ASSERT(m_scanpos >= start);

if(curchar == '(' || curchar == ')' || (in_clause && curchar == ','))
if(curchar == '(' || curchar == ')' || (in_or_pmatch_clause && curchar == ','))
{
m_scanpos--;
}
Expand Down Expand Up @@ -1604,6 +1621,11 @@ cmpop sinsp_filter_compiler::next_comparison_operator()
m_scanpos += 2;
return CO_IN;
}
else if(compare_no_consume("pmatch"))
{
m_scanpos += 6;
return CO_PMATCH;
}
else if(compare_no_consume("exists"))
{
m_scanpos += 6;
Expand Down Expand Up @@ -1644,7 +1666,7 @@ void sinsp_filter_compiler::parse_check()

chk->parse_field_name((char *)&operand1[0], true);

if(co == CO_IN)
if(co == CO_IN || co == CO_PMATCH)
{
//
// Skip spaces
Expand All @@ -1656,7 +1678,7 @@ void sinsp_filter_compiler::parse_check()

if(m_fltstr[m_scanpos] != '(')
{
throw sinsp_exception("expected '(' after 'in' operand");
throw sinsp_exception("expected '(' after 'in/pmatch' operand");
}

//
Expand Down Expand Up @@ -1695,11 +1717,16 @@ void sinsp_filter_compiler::parse_check()
}
else
{
throw sinsp_exception("expected either ')' or ',' after a value inside the 'in' clause");
throw sinsp_exception("expected either ')' or ',' after a value inside the 'in/pmatch' clause");
}
}
m_filter->add_check(chk);
}
else if (co == CO_PMATCH)
{
// the pmatch operator can only work on charbufs
throw sinsp_exception("pmatch requires all charbuf arguments");
}
else
{
//
Expand Down
3 changes: 2 additions & 1 deletion userspace/libsinsp/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ enum cmpop {
CO_EXISTS = 9,
CO_ICONTAINS = 10,
CO_STARTSWITH = 11,
CO_GLOB = 12
CO_GLOB = 12,
CO_PMATCH = 13
};

enum boolop
Expand Down
60 changes: 60 additions & 0 deletions userspace/libsinsp/filter_value.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
Copyright (C) 2013-2016 Draios inc.
This file is part of sysdig.
sysdig is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 as
published by the Free Software Foundation.
sysdig is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with sysdig. If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

#include <string.h>
#include <utility>

// Used for CO_IN/CO_PMATCH filterchecks using PT_CHARBUFs to allow
// for quick multi-value comparisons. Should also work for any
// filtercheck with a buffer and length. When compiling with gnu
// compilers, use the built in but not standard _hash_impl::hash
// function, which uses murmurhash2 and is quite fast. Otherwise, uses
// http://www.cse.yorku.ca/~oz/hash.html.

typedef std::pair<uint8_t *, uint32_t> filter_value_t;

struct g_hash_membuf
{
size_t operator()(filter_value_t val) const
{
#ifdef __GNUC__
return std::_Hash_impl::hash(val.first, val.second);
#else
size_t hash = 5381;
for(uint8_t *p = val.first; p-val.first < val.second; p++)
{
int c = *p;

hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash;
#endif
}
};

struct g_equal_to_membuf
{
bool operator()(filter_value_t a, filter_value_t b) const
{
return (a.second == b.second &&
memcmp(a.first, b.first, a.second) == 0);
}
};

44 changes: 5 additions & 39 deletions userspace/libsinsp/filterchecks.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ along with sysdig. If not, see <http://www.gnu.org/licenses/>.
#pragma once
#include <unordered_set>
#include <json/json.h>
#include "filter_value.h"
#include "prefix_search.h"
#include "k8s.h"
#include "mesos.h"

Expand All @@ -43,44 +45,6 @@ class operand_info
string m_description;
};

// Used for CO_IN filterchecks using PT_CHARBUFs to allow for quick
// multi-value comparisons. Should also work for any filtercheck with
// a buffer and length. When compiling with gnu compilers, use the
// built in but not standard _hash_impl::hash function, which uses
// murmurhash2 and is quite fast. Otherwise, uses
// http://www.cse.yorku.ca/~oz/hash.html.

// Used by m_val_storages_members
typedef pair<uint8_t *, uint32_t> filter_value_member_t;

struct g_hash_membuf
{
size_t operator()(filter_value_member_t val) const
{
#ifdef __GNUC__
return std::_Hash_impl::hash(val.first, val.second);
#else
size_t hash = 5381;
for(uint8_t *p = val.first; p-val.first < val.second; p++)
{
int c = *p;

hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash;
#endif
}
};

struct g_equal_to_membuf
{
bool operator()(filter_value_member_t a, filter_value_member_t b) const
{
return (a.second == b.second &&
memcmp(a.first, b.first, a.second) == 0);
}
};

///////////////////////////////////////////////////////////////////////////////
// The filter check interface
// NOTE: in order to add a new type of filter check, you need to add a class for
Expand Down Expand Up @@ -189,10 +153,12 @@ class sinsp_filter_check
inline uint8_t* filter_value_p(uint16_t i = 0) { return &m_val_storages[i][0]; }
inline vector<uint8_t> filter_value(uint16_t i = 0) { return m_val_storages[i]; }

unordered_set<filter_value_member_t,
unordered_set<filter_value_t,
g_hash_membuf,
g_equal_to_membuf> m_val_storages_members;

path_prefix_search m_val_storages_paths;

uint32_t m_val_storages_min_size;
uint32_t m_val_storages_max_size;

Expand Down
6 changes: 5 additions & 1 deletion userspace/libsinsp/lua_parser_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ cmpop string_to_cmpop(const char* str)
{
return CO_IN;
}
else if(strcmp(str, "pmatch") == 0)
{
return CO_PMATCH;
}
else if(strcmp(str, "exists") == 0)
{
return CO_EXISTS;
Expand Down Expand Up @@ -216,7 +220,7 @@ int lua_parser_cbacks::rel_expr(lua_State *ls)
// "exists" is the only unary comparison op
if(strcmp(cmpop, "exists"))
{
if (strcmp(cmpop, "in") == 0)
if (strcmp(cmpop, "in") == 0 || strcmp(cmpop, "pmatch") == 0)
{
if (!lua_istable(ls, 3))
{
Expand Down
Loading

0 comments on commit f887c2f

Please sign in to comment.