From f65f63c4d86a48be042a3ad242fffe5fe8347ff0 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Sat, 18 Nov 2023 20:35:59 -0500 Subject: [PATCH] analyzer: new warning: -Wanalyzer-undefined-behavior-strtok [PR107573] This patch: - adds support to the analyzer for tracking API-private state or which we don't have a decl (such as strtok's internal state), - uses it to implement a new -Wanalyzer-undefined-behavior-strtok which warns when strtok (NULL, delim) is called as the first call to strtok after main. gcc/analyzer/ChangeLog: PR analyzer/107573 * analyzer.h (register_known_functions): Add region_model_manager param. * analyzer.opt (Wanalyzer-undefined-behavior-strtok): New. * call-summary.cc (call_summary_replay::convert_region_from_summary_1): Handle RK_PRIVATE. * engine.cc (impl_run_checkers): Pass model manager to register_known_functions. * kf.cc (class undefined_function_behavior): New. (class kf_strtok): New. (register_known_functions): Add region_model_manager param. Use it to register "strtok". * region-model-manager.cc (region_model_manager::get_or_create_conjured_svalue): Add "idx" param. * region-model-manager.h (region_model_manager::get_or_create_conjured_svalue): Add "idx" param. (region_model_manager::get_root_region): New accessor. * region-model.cc (region_model::scan_for_null_terminator): Handle "expr" being null. (region_model::get_representative_path_var_1): Handle RK_PRIVATE. * region-model.h (region_model::called_from_main_p): Make public. * region.cc (region::get_memory_space): Handle RK_PRIVATE. (region::can_have_initial_svalue_p): Handle MEMSPACE_PRIVATE. (private_region::dump_to_pp): New. * region.h (MEMSPACE_PRIVATE): New. (RK_PRIVATE): New. (class private_region): New. (is_a_helper ::test): New. * store.cc (store::replay_call_summary_cluster): Handle RK_PRIVATE. * svalue.h (struct conjured_svalue::key_t): Add "idx" param to ctor and "m_idx" field. (class conjured_svalue::conjured_svalue): Likewise. gcc/ChangeLog: PR analyzer/107573 * doc/invoke.texi: Add -Wanalyzer-undefined-behavior-strtok. gcc/testsuite/ChangeLog: PR analyzer/107573 * c-c++-common/analyzer/strtok-1.c: New test. * c-c++-common/analyzer/strtok-2.c: New test. * c-c++-common/analyzer/strtok-3.c: New test. * c-c++-common/analyzer/strtok-4.c: New test. * c-c++-common/analyzer/strtok-cppreference.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/analyzer.h | 3 +- gcc/analyzer/analyzer.opt | 4 + gcc/analyzer/call-summary.cc | 1 + gcc/analyzer/engine.cc | 3 +- gcc/analyzer/kf.cc | 320 +++++++++++++++++- gcc/analyzer/region-model-manager.cc | 10 +- gcc/analyzer/region-model-manager.h | 4 +- gcc/analyzer/region-model.cc | 7 +- gcc/analyzer/region-model.h | 3 +- gcc/analyzer/region.cc | 14 + gcc/analyzer/region.h | 41 ++- gcc/analyzer/store.cc | 1 + gcc/analyzer/svalue.h | 13 +- gcc/doc/invoke.texi | 14 + .../c-c++-common/analyzer/strtok-1.c | 62 ++++ .../c-c++-common/analyzer/strtok-2.c | 18 + .../c-c++-common/analyzer/strtok-3.c | 26 ++ .../c-c++-common/analyzer/strtok-4.c | 42 +++ .../analyzer/strtok-cppreference.c | 50 +++ 19 files changed, 619 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/analyzer/strtok-1.c create mode 100644 gcc/testsuite/c-c++-common/analyzer/strtok-2.c create mode 100644 gcc/testsuite/c-c++-common/analyzer/strtok-3.c create mode 100644 gcc/testsuite/c-c++-common/analyzer/strtok-4.c create mode 100644 gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index 777293ff4b9df..f08572bb633ef 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -326,7 +326,8 @@ class pure_known_function_with_default_return : public known_function void impl_call_pre (const call_details &cd) const override; }; -extern void register_known_functions (known_function_manager &mgr); +extern void register_known_functions (known_function_manager &kfm, + region_model_manager &rmm); extern void register_known_analyzer_functions (known_function_manager &kfm); extern void register_known_fd_functions (known_function_manager &kfm); extern void register_known_file_functions (known_function_manager &kfm); diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index fae2649389a87..a3c30caf2abe5 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -222,6 +222,10 @@ Wanalyzer-tainted-size Common Var(warn_analyzer_tainted_size) Init(1) Warning Warn about code paths in which an unsanitized value is used as a size. +Wanalyzer-undefined-behavior-strtok +Common Var(warn_analyzer_undefined_behavior_strtok) Init(1) Warning +Warn about code paths in in which a call is made to strtok with undefined behavior. + Wanalyzer-use-after-free Common Var(warn_analyzer_use_after_free) Init(1) Warning Warn about code paths in which a freed value is used. diff --git a/gcc/analyzer/call-summary.cc b/gcc/analyzer/call-summary.cc index a18a1b1b40a7a..ecb6fb13c9ece 100644 --- a/gcc/analyzer/call-summary.cc +++ b/gcc/analyzer/call-summary.cc @@ -585,6 +585,7 @@ call_summary_replay::convert_region_from_summary_1 (const region *summary_reg) case RK_STRING: case RK_ERRNO: case RK_UNKNOWN: + case RK_PRIVATE: /* We can reuse these regions directly. */ return summary_reg; diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 041fe6a1d4006..b4e855fcf24a4 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -6174,7 +6174,8 @@ impl_run_checkers (logger *logger) auto_delete_vec checkers; make_checkers (checkers, logger); - register_known_functions (*eng.get_known_function_manager ()); + register_known_functions (*eng.get_known_function_manager (), + *eng.get_model_manager ()); plugin_analyzer_init_impl data (&checkers, eng.get_known_function_manager (), diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc index 92959891fe440..5d8e04d97263b 100644 --- a/gcc/analyzer/kf.cc +++ b/gcc/analyzer/kf.cc @@ -40,6 +40,40 @@ along with GCC; see the file COPYING3. If not see namespace ana { +/* Abstract subclass for describing undefined behavior of an API. */ + +class undefined_function_behavior + : public pending_diagnostic_subclass +{ +public: + undefined_function_behavior (const call_details &cd) + : m_call_stmt (cd.get_call_stmt ()), + m_callee_fndecl (cd.get_fndecl_for_call ()) + { + gcc_assert (m_call_stmt); + gcc_assert (m_callee_fndecl); + } + + const char *get_kind () const final override + { + return "undefined_behavior"; + } + + bool operator== (const undefined_function_behavior &other) const + { + return (m_call_stmt == other.m_call_stmt + && m_callee_fndecl == other.m_callee_fndecl); + } + + bool terminate_path_p () const final override { return true; } + + tree get_callee_fndecl () const { return m_callee_fndecl; } + +private: + const gimple *m_call_stmt; + tree m_callee_fndecl; +}; + /* class pure_known_function_with_default_return : public known_function. */ void @@ -1679,6 +1713,288 @@ kf_strstr::impl_call_post (const call_details &cd) const } } +/* Handle calls to "strtok". + See e.g. + https://en.cppreference.com/w/c/string/byte/strtok + https://man7.org/linux/man-pages/man3/strtok.3.html */ + +class kf_strtok : public known_function +{ +public: + class undefined_behavior : public undefined_function_behavior + { + public: + undefined_behavior (const call_details &cd) + : undefined_function_behavior (cd) + { + } + int get_controlling_option () const final override + { + return OPT_Wanalyzer_undefined_behavior_strtok; + } + + bool emit (rich_location *rich_loc, logger *) final override + { + /* CWE-476: NULL Pointer Dereference. */ + diagnostic_metadata m; + m.add_cwe (476); + if (warning_meta + (rich_loc, m, get_controlling_option (), + "calling %qD for first time with NULL as argument 1" + " has undefined behavior", + get_callee_fndecl ())) + { + inform (rich_loc->get_loc (), + "some implementations of %qD may crash on such input", + get_callee_fndecl ()); + return true; + } + return false; + } + + label_text describe_final_event (const evdesc::final_event &ev) + final override + { + return ev.formatted_print + ("calling %qD for first time with NULL as argument 1" + " has undefined behavior", + get_callee_fndecl ()); + } + }; + + /* An outcome of a "strtok" call. + We have a four-way bifurcation of the analysis via the + 4 combinations of two flags: + - m_nonnull_str covers whether the "str" param was null or non-null + - m_found covers whether the result is null or non-null + */ + class strtok_call_info : public call_info + { + public: + strtok_call_info (const call_details &cd, + const private_region &private_reg, + bool nonnull_str, + bool found) + : call_info (cd), + m_private_reg (private_reg), + m_nonnull_str (nonnull_str), + m_found (found) + { + } + + label_text get_desc (bool can_colorize) const final override + { + if (m_nonnull_str) + { + if (m_found) + return make_label_text + (can_colorize, + "when %qE on non-NULL string returns non-NULL", + get_fndecl ()); + else + return make_label_text + (can_colorize, + "when %qE on non-NULL string returns NULL", + get_fndecl ()); + } + else + { + if (m_found) + return make_label_text + (can_colorize, + "when %qE with NULL string (using prior) returns non-NULL", + get_fndecl ()); + else + return make_label_text + (can_colorize, + "when %qE with NULL string (using prior) returns NULL", + get_fndecl ()); + } + } + + bool update_model (region_model *model, + const exploded_edge *, + region_model_context *ctxt) const final override + { + region_model_manager *mgr = model->get_manager (); + const call_details cd (get_call_details (model, ctxt)); + const svalue *str_sval = cd.get_arg_svalue (0); + /* const svalue *delim_sval = cd.get_arg_svalue (1); */ + + cd.check_for_null_terminated_string_arg (1); + /* We check that either arg 0 or the private region is null + terminated below. */ + + const svalue *null_ptr_sval + = mgr->get_or_create_null_ptr (cd.get_arg_type (0));; + if (!model->add_constraint (str_sval, + m_nonnull_str ? NE_EXPR : EQ_EXPR, + null_ptr_sval, + cd.get_ctxt ())) + return false; + + if (m_nonnull_str) + { + /* Update internal buffer. */ + model->set_value (&m_private_reg, + mgr->get_or_create_unmergeable (str_sval), + ctxt); + } + else + { + /* Read from internal buffer. */ + str_sval = model->get_store_value (&m_private_reg, ctxt); + + /* The initial value of the private region is NULL when we're + on a path from main. */ + if (const initial_svalue *initial_sval + = str_sval->dyn_cast_initial_svalue ()) + if (initial_sval->get_region () == &m_private_reg + && model->called_from_main_p ()) + { + /* Implementations of strtok do not necessarily check for NULL + here, and may crash; see PR analyzer/107573. + Warn for this, if we were definitely passed NULL. */ + if (cd.get_arg_svalue (0)->all_zeroes_p ()) + { + if (ctxt) + ctxt->warn (::make_unique (cd)); + } + + /* Assume that "str" was actually non-null; terminate + this path. */ + return false; + } + + /* Now assume str_sval is non-null. */ + if (!model->add_constraint (str_sval, + NE_EXPR, + null_ptr_sval, + cd.get_ctxt ())) + return false; + } + + const region *buf_reg = model->deref_rvalue (str_sval, NULL_TREE, ctxt); + model->scan_for_null_terminator (buf_reg, + NULL_TREE, + nullptr, + ctxt); + + if (m_found) + { + const region *str_reg + = model->deref_rvalue (str_sval, cd.get_arg_tree (0), + cd.get_ctxt ()); + /* We want to figure out the start and nul terminator + for the token. + For each, we want str_sval + OFFSET for some unknown OFFSET. + Use a conjured_svalue to represent the offset, + using the str_reg as the id of the conjured_svalue. */ + const svalue *start_offset + = mgr->get_or_create_conjured_svalue (size_type_node, + cd.get_call_stmt (), + str_reg, + conjured_purge (model, + ctxt), + 0); + const svalue *nul_offset + = mgr->get_or_create_conjured_svalue (size_type_node, + cd.get_call_stmt (), + str_reg, + conjured_purge (model, + ctxt), + 1); + + tree char_ptr_type = build_pointer_type (char_type_node); + const svalue *result + = mgr->get_or_create_binop (char_ptr_type, POINTER_PLUS_EXPR, + str_sval, start_offset); + cd.maybe_set_lhs (result); + + /* nul_offset + 1; the offset to use for the next call. */ + const svalue *next_offset + = mgr->get_or_create_binop (size_type_node, PLUS_EXPR, + nul_offset, + mgr->get_or_create_int_cst + (char_type_node, 1)); + + /* Write '\0' to str_sval[nul_offset]. */ + const svalue *ptr_to_term + = mgr->get_or_create_binop (char_ptr_type, POINTER_PLUS_EXPR, + str_sval, nul_offset); + const region *terminator_reg + = model->deref_rvalue (ptr_to_term, NULL_TREE, cd.get_ctxt ()); + model->set_value (terminator_reg, + mgr->get_or_create_unmergeable + (mgr->get_or_create_int_cst (char_type_node, + 0)), + cd.get_ctxt ()); + + /* Update saved ptr to be at [nul_offset + 1]. */ + const svalue *ptr_to_next + = mgr->get_or_create_binop (cd.get_lhs_type (), POINTER_PLUS_EXPR, + str_sval, next_offset); + model->set_value (&m_private_reg, ptr_to_next, ctxt); + } + else + if (tree lhs_type = cd.get_lhs_type ()) + { + const svalue *result + = mgr->get_or_create_int_cst (lhs_type, 0); + cd.maybe_set_lhs (result); + } + return true; + } + private: + const private_region &m_private_reg; + bool m_nonnull_str; + bool m_found; + }; // class strtok_call_info + + kf_strtok (region_model_manager &mgr) + : m_private_reg (mgr.alloc_symbol_id (), + mgr.get_root_region (), + get_region_type (), + "strtok buffer") + { + } + + bool matches_call_types_p (const call_details &cd) const final override + { + return (cd.num_args () == 2 + && POINTER_TYPE_P (cd.get_arg_type (0)) + && POINTER_TYPE_P (cd.get_arg_type (1))); + } + + void impl_call_post (const call_details &cd) const final override + { + if (cd.get_ctxt ()) + { + /* Four-way bifurcation, based on whether: + - the str is non-null + - the result is non-null + Typically the str is either null or non-null at a particular site, + so hopefully this will generally just lead to two out-edges. */ + cd.get_ctxt ()->bifurcate + (make_unique (cd, m_private_reg, false, false)); + cd.get_ctxt ()->bifurcate + (make_unique (cd, m_private_reg, false, true)); + cd.get_ctxt ()->bifurcate + (make_unique (cd, m_private_reg, true, false)); + cd.get_ctxt ()->bifurcate + (make_unique (cd, m_private_reg, true, true)); + cd.get_ctxt ()->terminate_path (); + } + } + +private: + static tree get_region_type () + { + return build_pointer_type (char_type_node); + } + const private_region m_private_reg; +}; + class kf_ubsan_bounds : public internal_known_function { /* Empty. */ @@ -1823,7 +2139,8 @@ register_atomic_builtins (known_function_manager &kfm) analyzer (as opposed to plugins). */ void -register_known_functions (known_function_manager &kfm) +register_known_functions (known_function_manager &kfm, + region_model_manager &rmm) { /* Debugging/test support functions, all with a "__analyzer_" prefix. */ register_known_analyzer_functions (kfm); @@ -1911,6 +2228,7 @@ register_known_functions (known_function_manager &kfm) { kfm.add ("fopen", make_unique ()); kfm.add ("putenv", make_unique ()); + kfm.add ("strtok", make_unique (rmm)); register_known_fd_functions (kfm); register_known_file_functions (kfm); diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index 22246876f8f92..921edc558681a 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -1261,7 +1261,8 @@ conjured_purge::purge (const conjured_svalue *sval) const } /* Return the svalue * of type TYPE for the value conjured for ID_REG - at STMT, creating it if necessary. + at STMT (using IDX for any further disambiguation), + creating it if necessary. Use P to purge existing state from the svalue, for the case where a conjured_svalue would be reused along an execution path. */ @@ -1269,9 +1270,10 @@ const svalue * region_model_manager::get_or_create_conjured_svalue (tree type, const gimple *stmt, const region *id_reg, - const conjured_purge &p) + const conjured_purge &p, + unsigned idx) { - conjured_svalue::key_t key (type, stmt, id_reg); + conjured_svalue::key_t key (type, stmt, id_reg, idx); if (conjured_svalue **slot = m_conjured_values_map.get (key)) { const conjured_svalue *sval = *slot; @@ -1283,7 +1285,7 @@ region_model_manager::get_or_create_conjured_svalue (tree type, return sval; } conjured_svalue *conjured_sval - = new conjured_svalue (alloc_symbol_id (), type, stmt, id_reg); + = new conjured_svalue (alloc_symbol_id (), type, stmt, id_reg, idx); RETURN_UNKNOWN_IF_TOO_COMPLEX (conjured_sval); m_conjured_values_map.put (key, conjured_sval); return conjured_sval; diff --git a/gcc/analyzer/region-model-manager.h b/gcc/analyzer/region-model-manager.h index a5281819a69be..2ddd7de7ccf9d 100644 --- a/gcc/analyzer/region-model-manager.h +++ b/gcc/analyzer/region-model-manager.h @@ -79,7 +79,8 @@ class region_model_manager const binding_map &map); const svalue *get_or_create_conjured_svalue (tree type, const gimple *stmt, const region *id_reg, - const conjured_purge &p); + const conjured_purge &p, + unsigned idx = 0); const svalue * get_or_create_asm_output_svalue (tree type, const gasm *asm_stmt, @@ -105,6 +106,7 @@ class region_model_manager const svalue *create_unique_svalue (tree type); /* region consolidation. */ + const root_region *get_root_region () const { return &m_root_region; } const stack_region * get_stack_region () const { return &m_stack_region; } const heap_region *get_heap_region () const { return &m_heap_region; } const code_region *get_code_region () const { return &m_code_region; } diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 052c47d38f179..420c10380a4e7 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -3884,8 +3884,10 @@ region_model::scan_for_null_terminator (const region *reg, byte_range bytes_to_read (src_byte_offset, 1); const svalue *sval = get_store_bytes (base_reg, bytes_to_read, ctxt); tree byte_expr - = get_tree_for_byte_offset (expr, - src_byte_offset - initial_src_byte_offset); + = (expr + ? get_tree_for_byte_offset (expr, + src_byte_offset - initial_src_byte_offset) + : NULL_TREE); check_for_poison (sval, byte_expr, nullptr, ctxt); if (base_reg->can_have_initial_svalue_p ()) { @@ -5077,6 +5079,7 @@ region_model::get_representative_path_var_1 (const region *reg, case RK_VAR_ARG: case RK_ERRNO: case RK_UNKNOWN: + case RK_PRIVATE: return path_var (NULL_TREE, 0); } } diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index e26d18713b1de..2e15924fddb8f 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -559,6 +559,8 @@ class region_model callback (model, prev_model, retval, ctxt); } + bool called_from_main_p () const; + private: const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const; const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt) const; @@ -606,7 +608,6 @@ class region_model bool nonnull, region_model_context *ctxt); - bool called_from_main_p () const; const svalue *get_initial_value_for_global (const region *reg) const; const region * get_region_for_poisoned_expr (tree expr) const; diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 730dab3d707bb..4feb9721ae78d 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -514,6 +514,8 @@ region::get_memory_space () const return MEMSPACE_HEAP; case RK_STRING: return MEMSPACE_READONLY_DATA; + case RK_PRIVATE: + return MEMSPACE_PRIVATE; } if (iter->get_kind () == RK_CAST) iter = iter->dyn_cast_cast_region ()->get_original_region (); @@ -543,6 +545,7 @@ region::can_have_initial_svalue_p () const case MEMSPACE_CODE: case MEMSPACE_GLOBALS: case MEMSPACE_READONLY_DATA: + case MEMSPACE_PRIVATE: /* Such regions have initial_svalues. */ return true; @@ -2259,6 +2262,17 @@ errno_region::dump_to_pp (pretty_printer *pp, bool simple) const pp_string (pp, "errno_region()"); } +/* class private_region : public region. */ + +void +private_region::dump_to_pp (pretty_printer *pp, bool simple) const +{ + if (simple) + pp_printf (pp, "PRIVATE_REG(%qs)", m_desc); + else + pp_printf (pp, "private_region(%qs)", m_desc); +} + /* class unknown_region : public region. */ /* Implementation of region::dump_to_pp vfunc for unknown_region. */ diff --git a/gcc/analyzer/region.h b/gcc/analyzer/region.h index 47242385fd156..fee161cf86301 100644 --- a/gcc/analyzer/region.h +++ b/gcc/analyzer/region.h @@ -35,7 +35,8 @@ enum memory_space MEMSPACE_STACK, MEMSPACE_HEAP, MEMSPACE_READONLY_DATA, - MEMSPACE_THREAD_LOCAL + MEMSPACE_THREAD_LOCAL, + MEMSPACE_PRIVATE }; /* An enum for discriminating between the different concrete subclasses @@ -65,6 +66,7 @@ enum region_kind RK_BIT_RANGE, RK_VAR_ARG, RK_ERRNO, + RK_PRIVATE, RK_UNKNOWN, }; @@ -108,6 +110,7 @@ enum region_kind var_arg_region (RK_VAR_ARG): a region for the N-th vararg within a frame_region for a variadic call errno_region (RK_ERRNO): a region for holding "errno" + private_region (RK_PRIVATE): a region for internal state of an API unknown_region (RK_UNKNOWN): for handling unimplemented tree codes. */ /* Abstract base class for representing ways of accessing chunks of memory. @@ -1434,6 +1437,42 @@ is_a_helper ::test (const region *reg) namespace ana { +/* Similar to a decl region, but we don't have the decl. + For implementing e.g. static buffers of known_functions, + or other internal state of an API. + + These are owned by known_function instances, rather than the + region_model_manager. */ + +class private_region : public region +{ +public: + private_region (unsigned id, const region *parent, tree type, + const char *desc) + : region (complexity (parent), id, parent, type), + m_desc (desc) + {} + + enum region_kind get_kind () const final override { return RK_PRIVATE; } + + void dump_to_pp (pretty_printer *pp, bool simple) const final override; + +private: + const char *m_desc; +}; + +} // namespace ana + +template <> +template <> +inline bool +is_a_helper ::test (const region *reg) +{ + return reg->get_kind () == RK_PRIVATE; +} + +namespace ana { + /* An unknown region, for handling unimplemented tree codes. */ class unknown_region : public region diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index aeea69311378d..602508598cc95 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -3372,6 +3372,7 @@ store::replay_call_summary_cluster (call_summary_replay &r, case RK_HEAP_ALLOCATED: case RK_DECL: case RK_ERRNO: + case RK_PRIVATE: { const region *caller_dest_reg = r.convert_region_from_summary (summary_base_reg); diff --git a/gcc/analyzer/svalue.h b/gcc/analyzer/svalue.h index 263a0d7af6f58..9415e9808959b 100644 --- a/gcc/analyzer/svalue.h +++ b/gcc/analyzer/svalue.h @@ -1360,8 +1360,8 @@ class conjured_svalue : public svalue /* A support class for uniquifying instances of conjured_svalue. */ struct key_t { - key_t (tree type, const gimple *stmt, const region *id_reg) - : m_type (type), m_stmt (stmt), m_id_reg (id_reg) + key_t (tree type, const gimple *stmt, const region *id_reg, unsigned idx) + : m_type (type), m_stmt (stmt), m_id_reg (id_reg), m_idx (idx) {} hashval_t hash () const @@ -1377,7 +1377,8 @@ class conjured_svalue : public svalue { return (m_type == other.m_type && m_stmt == other.m_stmt - && m_id_reg == other.m_id_reg); + && m_id_reg == other.m_id_reg + && m_idx == other.m_idx); } /* Use m_stmt to mark empty/deleted, as m_type can be NULL for @@ -1393,12 +1394,13 @@ class conjured_svalue : public svalue tree m_type; const gimple *m_stmt; const region *m_id_reg; + unsigned m_idx; }; conjured_svalue (symbol::id_t id, tree type, const gimple *stmt, - const region *id_reg) + const region *id_reg, unsigned idx) : svalue (complexity (id_reg), id, type), - m_stmt (stmt), m_id_reg (id_reg) + m_stmt (stmt), m_id_reg (id_reg), m_idx (idx) { gcc_assert (m_stmt != NULL); } @@ -1419,6 +1421,7 @@ class conjured_svalue : public svalue private: const gimple *m_stmt; const region *m_id_reg; + unsigned m_idx; }; } // namespace ana diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 557d613a1e63a..1f109679c7030 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -490,6 +490,7 @@ Objective-C and Objective-C++ Dialects}. -Wno-analyzer-tainted-offset -Wno-analyzer-tainted-size -Wanalyzer-too-complex +-Wno-analyzer-undefined-behavior-strtok -Wno-analyzer-unsafe-call-within-signal-handler -Wno-analyzer-use-after-free -Wno-analyzer-use-of-pointer-in-stale-stack-frame @@ -10424,6 +10425,7 @@ Enabling this option effectively enables the following warnings: -Wanalyzer-tainted-divisor -Wanalyzer-tainted-offset -Wanalyzer-tainted-size +-Wanalyzer-undefined-behavior-strtok -Wanalyzer-unsafe-call-within-signal-handler -Wanalyzer-use-after-free -Wanalyzer-use-of-pointer-in-stale-stack-frame @@ -11060,6 +11062,18 @@ attacker could inject an out-of-bounds access. See @uref{https://cwe.mitre.org/data/definitions/129.html, CWE-129: Improper Validation of Array Index}. +@opindex Wanalyzer-undefined-behavior-strtok +@opindex Wno-analyzer-undefined-behavior-strtok +@item -Wno-analyzer-undefined-behavior-strtok +This warning requires @option{-fanalyzer}, which enables it; use +@option{-Wno-analyzer-undefined-behavior-strtok} to disable it. + +This diagnostic warns for paths through the code in which a +call is made to @code{strtok} with undefined behavior. + +Specifically, passing NULL as the first parameter for the initial +call to @code{strtok} within a process has undefined behavior. + @opindex Wanalyzer-unsafe-call-within-signal-handler @opindex Wno-analyzer-unsafe-call-within-signal-handler @item -Wno-analyzer-unsafe-call-within-signal-handler diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-1.c b/gcc/testsuite/c-c++-common/analyzer/strtok-1.c new file mode 100644 index 0000000000000..33150ced90a54 --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-1.c @@ -0,0 +1,62 @@ +/* { dg-additional-options "-fpermissive" { target c++ } } */ + +#include "../../gcc.dg/analyzer/analyzer-decls.h" + +typedef __SIZE_TYPE__ size_t; + +extern char *strtok (char *str, const char *delim) + __attribute__((nonnull (2))); + +char * +test_passthrough (char *str, const char *delim) +{ + return strtok (str, delim); +} + +char * +test_null_str (const char *delim) +{ + return strtok (NULL, delim); +} + +char * +test_null_delim (char *str) +{ + return strtok (str, NULL); /* { dg-warning "use of NULL where non-null expected" } */ + /* This is from the attribute. */ +} + +char * +test_write_to_literal (void) +{ + const char *str = "hello world"; + return strtok ((char *)str, " "); /* { dg-warning "write to string literal" } */ +} + +char * +test_unterminated_str (const char *delim) +{ + char str[3] = "abc"; /* { dg-warning "initializer-string for '\[^\n\]*' is too long" "" { target c++ } } */ + return strtok (str, delim); /* { dg-warning "stack-based buffer over-read" } */ +} + +char * +test_unterminated_delimstr (char *str) +{ + char delim[3] = "abc"; /* { dg-warning "initializer-string for '\[^\n\]*' is too long" "" { target c++ } } */ + return strtok (str, delim); /* { dg-warning "stack-based buffer over-read" } */ + /* { dg-message "while looking for null terminator for argument 2" "" { target *-*-* } .-1 } */ +} + +size_t +test_use_after_free_via_null_2 (char *p) +{ + strtok (p, " "); + __builtin_free (p); + + char *q = strtok (NULL, " "); /* TODO: should complain about this. */ + if (q) + return __builtin_strlen (q); /* TODO: should complain about this. */ + else + return 0; +} diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-2.c b/gcc/testsuite/c-c++-common/analyzer/strtok-2.c new file mode 100644 index 0000000000000..0336bf0cfe9d6 --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-2.c @@ -0,0 +1,18 @@ +#include "../../gcc.dg/analyzer/analyzer-decls.h" + +extern char *strtok (char *str, const char *delim) + __attribute__((nonnull (2))); + +int +main(int argc, char *argv[]) +{ + char *cmd; + char *arg; + + if (argc < 2) + return -1; + + cmd = strtok (argv[1], " "); /* { dg-bogus "undefined behavior" } */ + arg = strtok (NULL, " "); + return 0; +} diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-3.c b/gcc/testsuite/c-c++-common/analyzer/strtok-3.c new file mode 100644 index 0000000000000..f18f1a9eebe82 --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-3.c @@ -0,0 +1,26 @@ +#include "../../gcc.dg/analyzer/analyzer-decls.h" + +extern char *strtok (char *str, const char *delim) + __attribute__((nonnull (2))); + +int +main (int argc, char *argv[]) +{ + char *cmd; + char *arg; + + if (argc < 2) + return -1; + + cmd = strtok (NULL, " "); /* { dg-line "first_call" } */ + arg = strtok (NULL, " "); + return 0; + + /* C: + { dg-warning "calling 'strtok' for first time with NULL as argument 1 has undefined behavior \\\[CWE-476\\\] \\\[-Wanalyzer-undefined-behavior-strtok\\\]" "" { target c } first_call } + { dg-message "some implementations of 'strtok' may crash on such input" "" { target c } first_call } */ + + /* C++: + { dg-warning "calling 'char\\* strtok\\(char\\*, const char\\*\\)' for first time with NULL as argument 1 has undefined behavior \\\[CWE-476\\\] \\\[-Wanalyzer-undefined-behavior-strtok\\\]" "" { target c++ } first_call } + { dg-message "some implementations of 'char\\* strtok\\(char\\*, const char\\*\\)' may crash on such input" "" { target c++ } first_call } */ +} diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-4.c b/gcc/testsuite/c-c++-common/analyzer/strtok-4.c new file mode 100644 index 0000000000000..b6b7d49e3c3c2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-4.c @@ -0,0 +1,42 @@ +#include "../../gcc.dg/analyzer/analyzer-decls.h" + +extern char *strtok (char *str, const char *delim); + +void test (void) +{ + char buffer[] = { 'a', 'x', 'b', 'y', 'c', '\0' }; + + char *p1 = strtok (buffer, "x"); + /* Should result in: + | buffer[] = { 'a', '\0', 'b', 'y', 'c', '\0' }, + | ^ ^ ^ + | | | | + | | | internal ptr + | p1 modified. */ + + char *p2 = strtok (NULL, "y"); /* note new delimiter. */ + /* Should result in: + | buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' }, + | ^ ^ ^ + | | | | + | | | internal ptr + | p2 modified. */ + + char *p3 = strtok (NULL, "z"); /* again new delimiter. */ + /* Should result in: + | buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' }, + | ^ ^~ + | | | + | | internal ptr + | p3. */ + + char *p4 = strtok (NULL, "z"); + /* Should result in p4 == NULL, and: + | buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' }, + | ^~ + | | + | internal ptr. */ + + /* We don't yet model strtok closely enough to capture + these exact behaviors. */ +} diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c new file mode 100644 index 0000000000000..a2e912341d687 --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c @@ -0,0 +1,50 @@ +/* Example of strtok adapted from: + https://en.cppreference.com/w/c/string/byte/strtok + which is + "licensed under Creative Commons Attribution-Sharealike 3.0 + Unported License (CC-BY-SA) and by the GNU Free Documentation License + (GFDL) (unversioned, with no invariant sections, front-cover texts, or + back-cover texts). That means that you can use this site in almost any way + you like, including mirroring, copying, translating, etc. All we would ask + is to provide link back to cppreference.com so that people know where to + get the most up-to-date content. In addition to that, any modified content + should be released under an equivalent license so that everyone could + benefit from the modified versions. " */ + +#define __STDC_WANT_LIB_EXT1__ 0 +#include +#include + +int main(void) +{ + char input[] = "A bird came down the walk"; + printf("Parsing the input string '%s'\n", input); + char *token = strtok(input, " "); + while(token) { + puts(token); + token = strtok(NULL, " "); + } + + printf("Contents of the input string now: '"); + for(size_t n = 0; n < sizeof input; ++n) + input[n] ? putchar(input[n]) : fputs("\\0", stdout); + puts("'"); + +#ifdef __STDC_LIB_EXT1__ + char str[] = "A bird came down the walk"; + rsize_t strmax = sizeof str; + const char *delim = " "; + char *next_token; + printf("Parsing the input string '%s'\n", str); + token = strtok_s(str, &strmax, delim, &next_token); + while(token) { + puts(token); + token = strtok_s(NULL, &strmax, delim, &next_token); + } + + printf("Contents of the input string now: '"); + for(size_t n = 0; n < sizeof str; ++n) + str[n] ? putchar(str[n]) : fputs("\\0", stdout); + puts("'"); +#endif +}