From c7fd1cd4f918bbe46b24249f39169d03d2bc9f59 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Sat, 4 Mar 2023 11:47:07 +0100 Subject: [PATCH 1/5] writer-json-common: common utils for specific JSON writers This commit moves sanitizeUTF8() from writer-json.cc to writer-json-common.cc. Related: https://github.com/csutils/csdiff/pull/115 --- src/lib/CMakeLists.txt | 1 + src/lib/writer-json-common.cc | 31 +++++++++++++++++++++++++++++++ src/lib/writer-json-common.hh | 28 ++++++++++++++++++++++++++++ src/lib/writer-json.cc | 11 +---------- 4 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 src/lib/writer-json-common.cc create mode 100644 src/lib/writer-json-common.hh diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 28ee9ce4..b370d08a 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -44,4 +44,5 @@ add_library(cs STATIC writer-cov.cc writer-html.cc writer-json.cc + writer-json-common.cc ) diff --git a/src/lib/writer-json-common.cc b/src/lib/writer-json-common.cc new file mode 100644 index 00000000..af788838 --- /dev/null +++ b/src/lib/writer-json-common.cc @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2011 - 2023 Red Hat, Inc. + * + * This file is part of csdiff. + * + * csdiff is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * csdiff is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with csdiff. If not, see . + */ + +#include "writer-json-common.hh" + +#include + +std::string sanitizeUTF8(const std::string &str) +{ + using boost::nowide::utf::convert_string; + + // every non-UTF8 sequence will be replaced with 0xEF 0xBF 0xBD which + // corresponds to REPLACEMENT CHARACTER U+FFFD + return convert_string(str.data(), str.data() + str.size()); +} diff --git a/src/lib/writer-json-common.hh b/src/lib/writer-json-common.hh new file mode 100644 index 00000000..9f159d3c --- /dev/null +++ b/src/lib/writer-json-common.hh @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2011 - 2023 Red Hat, Inc. + * + * This file is part of csdiff. + * + * csdiff is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * csdiff is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with csdiff. If not, see . + */ + +#ifndef H_GUARD_WRITER_JSON_COMMON_H +#define H_GUARD_WRITER_JSON_COMMON_H + +#include + +/// sanitize byte sequences that are not valid in UTF-8 encoding +std::string sanitizeUTF8(const std::string &str); + +#endif /* H_GUARD_WRITER_JSON_COMMON_H */ diff --git a/src/lib/writer-json.cc b/src/lib/writer-json.cc index 51c7719e..fe484ea4 100644 --- a/src/lib/writer-json.cc +++ b/src/lib/writer-json.cc @@ -22,25 +22,16 @@ #include "abstract-tree.hh" #include "regex.hh" #include "version.hh" +#include "writer-json-common.hh" #include #include #include #include -#include using namespace boost::json; -static inline std::string sanitizeUTF8(const std::string &str) -{ - using boost::nowide::utf::convert_string; - - // every non-UTF8 sequence will be replaced with 0xEF 0xBF 0xBD which - // corresponds to REPLACEMENT CHARACTER U+FFFD - return convert_string(str.data(), str.data() + str.size()); -} - static void prettyPrint(std::ostream&, const value&, std::string* = nullptr); static inline void prettyPrintArray( From 184c906270a36e8c823cdc4d1f551d8dfd676170 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Sat, 4 Mar 2023 11:59:46 +0100 Subject: [PATCH 2/5] writer-json-common: adopt jsonSerializeScanProps() ... from the writer-json module Related: https://github.com/csutils/csdiff/pull/115 --- src/lib/writer-json-common.cc | 21 +++++++++++++++++++++ src/lib/writer-json-common.hh | 7 +++++++ src/lib/writer-json.cc | 22 ++-------------------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/lib/writer-json-common.cc b/src/lib/writer-json-common.cc index af788838..00e4e3e8 100644 --- a/src/lib/writer-json-common.cc +++ b/src/lib/writer-json-common.cc @@ -20,6 +20,9 @@ #include "writer-json-common.hh" #include +#include + +using namespace boost::json; std::string sanitizeUTF8(const std::string &str) { @@ -29,3 +32,21 @@ std::string sanitizeUTF8(const std::string &str) // corresponds to REPLACEMENT CHARACTER U+FFFD return convert_string(str.data(), str.data() + str.size()); } + +// TODO: This should not necessary! TScanProps should be able to contain +// any type so that no conversions here are needed. +object jsonSerializeScanProps(const TScanProps &scanProps) +{ + static auto isDigit = [](unsigned char c){ return std::isdigit(c); }; + + object scan; + for (const auto &prop : scanProps) { + const auto &val = prop.second; + if (std::all_of(val.begin(), val.end(), isDigit)) + scan[prop.first] = boost::lexical_cast(val); + else + scan[prop.first] = val; + } + + return scan; +} diff --git a/src/lib/writer-json-common.hh b/src/lib/writer-json-common.hh index 9f159d3c..8bfaad36 100644 --- a/src/lib/writer-json-common.hh +++ b/src/lib/writer-json-common.hh @@ -20,9 +20,16 @@ #ifndef H_GUARD_WRITER_JSON_COMMON_H #define H_GUARD_WRITER_JSON_COMMON_H +#include "parser.hh" // for TScanProps + #include +#include + /// sanitize byte sequences that are not valid in UTF-8 encoding std::string sanitizeUTF8(const std::string &str); +/// serialize scan properties as a JSON object +boost::json::object jsonSerializeScanProps(const TScanProps &scanProps); + #endif /* H_GUARD_WRITER_JSON_COMMON_H */ diff --git a/src/lib/writer-json.cc b/src/lib/writer-json.cc index fe484ea4..35dc5675 100644 --- a/src/lib/writer-json.cc +++ b/src/lib/writer-json.cc @@ -28,7 +28,6 @@ #include #include -#include using namespace boost::json; @@ -132,23 +131,6 @@ static void prettyPrint( os << "\n"; } -// TODO: This should not necessary! TScanProps should be able to contain -// any type so that no conversions here are needed. -static object serializeScanProps(const TScanProps &scanProps) { - static auto isDigit = [](unsigned char c){ return std::isdigit(c); }; - - object scan; - for (const auto &prop : scanProps) { - const auto &val = prop.second; - if (std::all_of(val.begin(), val.end(), isDigit)) - scan[prop.first] = boost::lexical_cast(val); - else - scan[prop.first] = val; - } - - return scan; -} - class SimpleTreeEncoder: public AbstractTreeEncoder { public: /// import supported scan properties @@ -170,7 +152,7 @@ void SimpleTreeEncoder::importScanProps(const TScanProps &scanProps) if (scanProps.empty()) return; - root_["scan"] = serializeScanProps(scanProps); + root_["scan"] = jsonSerializeScanProps(scanProps); } void SimpleTreeEncoder::appendDef(const Defect &def) @@ -572,7 +554,7 @@ void SarifTreeEncoder::writeTo(std::ostream &str) if (!scanProps_.empty()) { // scan props root["inlineExternalProperties"] = { - {{ "externalizedProperties", serializeScanProps(scanProps_) }} + {{ "externalizedProperties", jsonSerializeScanProps(scanProps_) }} }; } From c436c719f89e7f071c423c5e658a42b6b48a6801 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Sat, 4 Mar 2023 12:27:40 +0100 Subject: [PATCH 3/5] writer-json-common: adopt jsonPrettyPrint() ... from the writer-json module Related: https://github.com/csutils/csdiff/pull/115 --- src/lib/writer-json-common.cc | 97 +++++++++++++++++++++++++++++++ src/lib/writer-json-common.hh | 6 ++ src/lib/writer-json.cc | 104 +--------------------------------- 3 files changed, 105 insertions(+), 102 deletions(-) diff --git a/src/lib/writer-json-common.cc b/src/lib/writer-json-common.cc index 00e4e3e8..3de30f8b 100644 --- a/src/lib/writer-json-common.cc +++ b/src/lib/writer-json-common.cc @@ -50,3 +50,100 @@ object jsonSerializeScanProps(const TScanProps &scanProps) return scan; } + +static inline void prettyPrintArray( + std::ostream &os, + const array &arr, + std::string *indent = nullptr) +{ + os << '['; + if (arr.empty()) { + os << ']'; + return; + } + + indent->append(4, ' '); + + std::string sep{'\n'}; + for (const auto &elem : arr) { + os << sep << *indent; + jsonPrettyPrint(os, elem, indent); + sep = ",\n"; + } + os << '\n'; + + indent->resize(indent->size() - 4); + os << *indent << ']'; +} + +static inline void prettyPrintObject( + std::ostream &os, + const object &obj, + std::string *indent = nullptr) +{ + os << '{'; + if (obj.empty()) { + os << '}'; + return; + } + + indent->append(4, ' '); + + std::string sep{'\n'}; + for (const auto &elem : obj) { + os << sep << *indent << serialize(elem.key()) << ": "; + jsonPrettyPrint(os, elem.value(), indent); + sep = ",\n"; + } + os << '\n'; + + indent->resize(indent->size() - 4); + os << *indent << '}'; +} + +void jsonPrettyPrint( + std::ostream &os, + const value &jv, + std::string *indent) +{ + std::string indent_; + if (!indent) + indent = &indent_; + + switch (jv.kind()) { + case kind::array: + prettyPrintArray(os, jv.get_array(), indent); + break; + + case kind::object: + prettyPrintObject(os, jv.get_object(), indent); + break; + + case kind::string: + os << serialize(jv.get_string()); + break; + + case kind::uint64: + os << jv.get_uint64(); + break; + + case kind::int64: + os << jv.get_int64(); + break; + + case kind::double_: + os << jv.get_double(); + break; + + case kind::bool_: + os << jv.get_bool(); + break; + + case kind::null: + os << "null"; + break; + } + + if (indent->empty()) + os << "\n"; +} diff --git a/src/lib/writer-json-common.hh b/src/lib/writer-json-common.hh index 8bfaad36..8f65be5a 100644 --- a/src/lib/writer-json-common.hh +++ b/src/lib/writer-json-common.hh @@ -32,4 +32,10 @@ std::string sanitizeUTF8(const std::string &str); /// serialize scan properties as a JSON object boost::json::object jsonSerializeScanProps(const TScanProps &scanProps); +/// serialize JSON value into the give output stream +void jsonPrettyPrint( + std::ostream &os, + const boost::json::value &jv, + std::string *indent = nullptr); + #endif /* H_GUARD_WRITER_JSON_COMMON_H */ diff --git a/src/lib/writer-json.cc b/src/lib/writer-json.cc index 35dc5675..d8e1d6b5 100644 --- a/src/lib/writer-json.cc +++ b/src/lib/writer-json.cc @@ -31,106 +31,6 @@ using namespace boost::json; -static void prettyPrint(std::ostream&, const value&, std::string* = nullptr); - -static inline void prettyPrintArray( - std::ostream &os, - const array &arr, - std::string *indent = nullptr) -{ - os << '['; - if (arr.empty()) { - os << ']'; - return; - } - - indent->append(4, ' '); - - std::string sep{'\n'}; - for (const auto &elem : arr) { - os << sep << *indent; - prettyPrint(os, elem, indent); - sep = ",\n"; - } - os << '\n'; - - indent->resize(indent->size() - 4); - os << *indent << ']'; - -} - -static inline void prettyPrintObject( - std::ostream &os, - const object &obj, - std::string *indent = nullptr) -{ - os << '{'; - if (obj.empty()) { - os << '}'; - return; - } - - indent->append(4, ' '); - - std::string sep{'\n'}; - for (const auto &elem : obj) { - os << sep << *indent << serialize(elem.key()) << ": "; - prettyPrint(os, elem.value(), indent); - sep = ",\n"; - } - os << '\n'; - - indent->resize(indent->size() - 4); - os << *indent << '}'; -} - -static void prettyPrint( - std::ostream &os, - const value &jv, - std::string *indent) -{ - std::string indent_; - if (!indent) - indent = &indent_; - - switch (jv.kind()) { - case kind::array: - prettyPrintArray(os, jv.get_array(), indent); - break; - - case kind::object: - prettyPrintObject(os, jv.get_object(), indent); - break; - - case kind::string: - os << serialize(jv.get_string()); - break; - - case kind::uint64: - os << jv.get_uint64(); - break; - - case kind::int64: - os << jv.get_int64(); - break; - - case kind::double_: - os << jv.get_double(); - break; - - case kind::bool_: - os << jv.get_bool(); - break; - - case kind::null: - os << "null"; - break; - } - - if (indent->empty()) - os << "\n"; -} - class SimpleTreeEncoder: public AbstractTreeEncoder { public: /// import supported scan properties @@ -214,7 +114,7 @@ void SimpleTreeEncoder::writeTo(std::ostream &str) // create an empty "defects" node to keep format detection working pDefects_ = &root_["defects"].emplace_array(); - prettyPrint(str, root_); + jsonPrettyPrint(str, root_); } // SARIF 2.1.0 is documented at: @@ -577,7 +477,7 @@ void SarifTreeEncoder::writeTo(std::ostream &str) root["runs"] = array{std::move(run0)}; // encode as JSON - prettyPrint(str, root); + jsonPrettyPrint(str, root); } struct JsonWriter::Private { From 81638f03f138431658296e24a03d59243a16edf1 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Sat, 4 Mar 2023 12:30:03 +0100 Subject: [PATCH 4/5] writer-json-simple: separate module for SimpleTreeEncoder Related: https://github.com/csutils/csdiff/pull/115 --- src/lib/CMakeLists.txt | 1 + src/lib/writer-json-simple.cc | 94 +++++++++++++++++++++++++++++++++++ src/lib/writer-json-simple.hh | 43 ++++++++++++++++ src/lib/writer-json.cc | 87 +------------------------------- 4 files changed, 139 insertions(+), 86 deletions(-) create mode 100644 src/lib/writer-json-simple.cc create mode 100644 src/lib/writer-json-simple.hh diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index b370d08a..09e5ad3e 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -45,4 +45,5 @@ add_library(cs STATIC writer-html.cc writer-json.cc writer-json-common.cc + writer-json-simple.cc ) diff --git a/src/lib/writer-json-simple.cc b/src/lib/writer-json-simple.cc new file mode 100644 index 00000000..cb3448bf --- /dev/null +++ b/src/lib/writer-json-simple.cc @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2011 - 2023 Red Hat, Inc. + * + * This file is part of csdiff. + * + * csdiff is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * csdiff is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with csdiff. If not, see . + */ + +#include "writer-json-simple.hh" + +#include "writer-json-common.hh" + +using namespace boost::json; + +void SimpleTreeEncoder::importScanProps(const TScanProps &scanProps) +{ + if (scanProps.empty()) + return; + + root_["scan"] = jsonSerializeScanProps(scanProps); +} + +void SimpleTreeEncoder::appendDef(const Defect &def) +{ + // go through events + array evtList; + for (const DefEvent &evt : def.events) { + object evtNode; + + // describe the location + evtNode["file_name"] = evt.fileName; + evtNode["line"] = evt.line; + if (0 < evt.column) + evtNode["column"] = evt.column; + + // describe the event + evtNode["event"] = evt.event; + evtNode["message"] = sanitizeUTF8(evt.msg); + evtNode["verbosity_level"] = evt.verbosityLevel; + + // append the event to the list + evtList.push_back(std::move(evtNode)); + } + + // create a node for a single defect + object defNode; + defNode["checker"] = def.checker; + if (!def.annotation.empty()) + defNode["annotation"] = def.annotation; + + // write "defect_id", "cwe", etc. if available + if (0 < def.defectId) + defNode["defect_id"] = def.defectId; + if (0 < def.cwe) + defNode["cwe"] = def.cwe; + if (0 < def.imp) + defNode["imp"] = def.imp; + if (!def.function.empty()) + defNode["function"] = def.function; + if (!def.language.empty()) + defNode["language"] = def.language; + if (!def.tool.empty()) + defNode["tool"] = def.tool; + + defNode["key_event_idx"] = def.keyEventIdx; + defNode["events"] = std::move(evtList); + + // create the node representing the list of defects + if (!pDefects_) + pDefects_ = &root_["defects"].emplace_array(); + + // append the node to the list + pDefects_->push_back(std::move(defNode)); +} + +void SimpleTreeEncoder::writeTo(std::ostream &str) +{ + if (!pDefects_) + // create an empty "defects" node to keep format detection working + pDefects_ = &root_["defects"].emplace_array(); + + jsonPrettyPrint(str, root_); +} diff --git a/src/lib/writer-json-simple.hh b/src/lib/writer-json-simple.hh new file mode 100644 index 00000000..44bf8eca --- /dev/null +++ b/src/lib/writer-json-simple.hh @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2011 - 2023 Red Hat, Inc. + * + * This file is part of csdiff. + * + * csdiff is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * csdiff is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with csdiff. If not, see . + */ + +#ifndef H_GUARD_WRITER_JSON_SIMPLE_H +#define H_GUARD_WRITER_JSON_SIMPLE_H + +#include "abstract-tree.hh" + +#include + +class SimpleTreeEncoder: public AbstractTreeEncoder { + public: + /// import supported scan properties + void importScanProps(const TScanProps &) override; + + /// append single defect + void appendDef(const Defect &) override; + + /// write everything to the given output stream + void writeTo(std::ostream &) override; + + private: + boost::json::object root_; + boost::json::array *pDefects_ = nullptr; +}; + +#endif /* H_GUARD_WRITER_JSON_SIMPLE_H */ diff --git a/src/lib/writer-json.cc b/src/lib/writer-json.cc index d8e1d6b5..b99332fc 100644 --- a/src/lib/writer-json.cc +++ b/src/lib/writer-json.cc @@ -23,6 +23,7 @@ #include "regex.hh" #include "version.hh" #include "writer-json-common.hh" +#include "writer-json-simple.hh" #include #include @@ -31,92 +32,6 @@ using namespace boost::json; -class SimpleTreeEncoder: public AbstractTreeEncoder { - public: - /// import supported scan properties - void importScanProps(const TScanProps &) override; - - /// append single defect - void appendDef(const Defect &) override; - - /// write everything to the given output stream - void writeTo(std::ostream &) override; - - private: - object root_; - array *pDefects_ = nullptr; -}; - -void SimpleTreeEncoder::importScanProps(const TScanProps &scanProps) -{ - if (scanProps.empty()) - return; - - root_["scan"] = jsonSerializeScanProps(scanProps); -} - -void SimpleTreeEncoder::appendDef(const Defect &def) -{ - // go through events - array evtList; - for (const DefEvent &evt : def.events) { - object evtNode; - - // describe the location - evtNode["file_name"] = evt.fileName; - evtNode["line"] = evt.line; - if (0 < evt.column) - evtNode["column"] = evt.column; - - // describe the event - evtNode["event"] = evt.event; - evtNode["message"] = sanitizeUTF8(evt.msg); - evtNode["verbosity_level"] = evt.verbosityLevel; - - // append the event to the list - evtList.push_back(std::move(evtNode)); - } - - // create a node for a single defect - object defNode; - defNode["checker"] = def.checker; - if (!def.annotation.empty()) - defNode["annotation"] = def.annotation; - - // write "defect_id", "cwe", etc. if available - if (0 < def.defectId) - defNode["defect_id"] = def.defectId; - if (0 < def.cwe) - defNode["cwe"] = def.cwe; - if (0 < def.imp) - defNode["imp"] = def.imp; - if (!def.function.empty()) - defNode["function"] = def.function; - if (!def.language.empty()) - defNode["language"] = def.language; - if (!def.tool.empty()) - defNode["tool"] = def.tool; - - defNode["key_event_idx"] = def.keyEventIdx; - defNode["events"] = std::move(evtList); - - // create the node representing the list of defects - if (!pDefects_) - pDefects_ = &root_["defects"].emplace_array(); - - // append the node to the list - pDefects_->push_back(std::move(defNode)); -} - -void SimpleTreeEncoder::writeTo(std::ostream &str) -{ - if (!pDefects_) - // create an empty "defects" node to keep format detection working - pDefects_ = &root_["defects"].emplace_array(); - - jsonPrettyPrint(str, root_); -} - // SARIF 2.1.0 is documented at: // https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning // specification: https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html From 6163c0e41a5cf114be4c43f678af3987fabc5c51 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Sat, 4 Mar 2023 12:39:01 +0100 Subject: [PATCH 5/5] writer-json-sarif: separate module for SarifTreeEncoder Resolves: https://github.com/csutils/csdiff/pull/115 --- src/lib/CMakeLists.txt | 1 + src/lib/writer-json-sarif.cc | 357 +++++++++++++++++++++++++++++++++ src/lib/writer-json-sarif.hh | 59 ++++++ src/lib/writer-json.cc | 371 +---------------------------------- 4 files changed, 418 insertions(+), 370 deletions(-) create mode 100644 src/lib/writer-json-sarif.cc create mode 100644 src/lib/writer-json-sarif.hh diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 09e5ad3e..d0bc4ad1 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -45,5 +45,6 @@ add_library(cs STATIC writer-html.cc writer-json.cc writer-json-common.cc + writer-json-sarif.cc writer-json-simple.cc ) diff --git a/src/lib/writer-json-sarif.cc b/src/lib/writer-json-sarif.cc new file mode 100644 index 00000000..2f3b3a64 --- /dev/null +++ b/src/lib/writer-json-sarif.cc @@ -0,0 +1,357 @@ +/* + * Copyright (C) 2011 - 2023 Red Hat, Inc. + * + * This file is part of csdiff. + * + * csdiff is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * csdiff is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with csdiff. If not, see . + */ + +#include "writer-json-sarif.hh" + +#include "regex.hh" +#include "version.hh" +#include "writer-json-common.hh" + +using namespace boost::json; + +void SarifTreeEncoder::initToolVersion() +{ + std::string tool; + auto it = scanProps_.find("tool"); + if (scanProps_.end() != it) + // read "tool" scan property + tool = it->second; + + std::string ver; + it = scanProps_.find("tool-version"); + if (scanProps_.end() != it) { + // read "tool-version" scan property + ver = it->second; + + if (tool.empty()) { + // try to split the "{tool}-{version}" string by the last '-' + const size_t lastDashAt = ver.rfind('-'); + if (std::string::npos != lastDashAt) { + // read tool from the "{tool}-{version}" string + tool = ver.substr(0, lastDashAt); + + // remove "{tool}-" from "{tool}-{version}" + ver.erase(0U, lastDashAt); + } + } + else { + // try to find "{tool}-" prefix in the "tool-version" scan property + const std::string prefix = tool + "-"; + if (0U == ver.find(prefix)) + ver.erase(0U, prefix.size()); + } + } + + std::string uri; + if (tool.empty()) { + // unable to read tool name --> fallback to csdiff as the tool + tool = "csdiff"; + ver = CS_VERSION; + uri = "https://github.com/csutils/csdiff"; + } + else if (scanProps_.end() != (it = scanProps_.find("tool-url"))) + // read "tool-url" scan property + uri = it->second; + + driver_["name"] = std::move(tool); + + if (!ver.empty()) + driver_["version"] = std::move(ver); + + if (!uri.empty()) + driver_["informationUri"] = std::move(uri); +} + +static void sarifEncodeShellCheckRule(object *rule, const std::string &ruleID) +{ + // name + rule->emplace("name", ruleID); + + // properties.tags[] + object props = { + { "tags", { "ShellCheck" } } + }; + rule->emplace("properties", std::move(props)); + + // help.text && help.markdown + auto helpURI = "https://github.com/koalaman/shellcheck/wiki/" + ruleID; + auto helpMarkdown = "Defect reference: [" + ruleID +"](" + helpURI + ")"; + + object help = { + { "text", "Defect reference: " + helpURI }, + { "markdown", std::move(helpMarkdown) } + }; + + rule->emplace("help", std::move(help)); +} + +static void sarifEncodeCweRule(object *rule, const int cwe, bool append = false) +{ + auto cweStr = std::to_string(cwe); + array cweList = { "CWE-" + cweStr }; + + // properties.cwe[] + if (append) { + object &props = rule->at("properties").as_object(); + props["cwe"] = std::move(cweList); + } else { + object props = { + { "cwe", std::move(cweList) } + }; + rule->emplace("properties", std::move(props)); + } + + // help.text + auto helpText = + "https://cwe.mitre.org/data/definitions/" + cweStr + ".html"; + + if (append) { + object &help = rule->at("help").as_object(); + help["text"].as_string() += '\n' + std::move(helpText); + } else { + object help = { + { "text", std::move(helpText) } + }; + rule->emplace("help", help); + } +} + +void SarifTreeEncoder::serializeRules() +{ + array ruleList; + for (const auto &item : shellCheckMap_) { + const auto &id = item.first; + object rule = { + { "id", id } + }; + + sarifEncodeShellCheckRule(&rule, item.second); + if (1U == cweMap_.count(id)) + sarifEncodeCweRule(&rule, cweMap_[id], /*append =*/ true); + + ruleList.push_back(std::move(rule)); + } + + for (const auto &item : cweMap_) { + const auto &id = item.first; + if (1U == shellCheckMap_.count(id)) + continue; + + object rule = { + { "id", id } + }; + + sarifEncodeCweRule(&rule, item.second); + ruleList.push_back(std::move(rule)); + } + + driver_["rules"] = std::move(ruleList); +} + +void SarifTreeEncoder::importScanProps(const TScanProps &scanProps) +{ + scanProps_ = scanProps; +} + +static void sarifEncodeMsg(object *pDst, const std::string& text) +{ + object message = { + { "text", sanitizeUTF8(text) } + }; + + pDst->emplace("message", std::move(message) ); +} + +static void sarifEncodeLevel(object *result, const std::string &event) +{ + std::string level = event; + + // cut the [...] suffix from event if present + size_t pos = event.find('['); + if (std::string::npos != pos) + level = event.substr(0U, pos); + + // go through events that denote warning level + for (const char *str : {"error", "warning", "note"}) { + if (str == level) { + // encode in the output if matched + result->emplace("level", std::move(level)); + return; + } + } +} + +static void sarifEncodeLoc(object *pLoc, const Defect &def, unsigned idx) +{ + // location ID within the result + pLoc->emplace("id", idx); + + const DefEvent &evt = def.events[idx]; + + // file name + object locPhy = { + { "artifactLocation", { + { "uri", evt.fileName } + }} + }; + + // line/col + if (evt.line) { + object reg = { + { "startLine", evt.line } + }; + + if (evt.column) + reg["startColumn"] = evt.column; + + locPhy["region"] = std::move(reg); + } + + // location + pLoc->emplace("physicalLocation", std::move(locPhy)); +} + +static void sarifEncodeComment(array *pDst, const Defect &def, unsigned idx) +{ + object comment; + + // needed for Github to see the SARIF data as valid + sarifEncodeLoc(&comment, def, idx); + + sarifEncodeMsg(&comment, def.events[idx].msg); + pDst->push_back(std::move(comment)); +} + +static void sarifEncodeEvt(array *pDst, const Defect &def, unsigned idx) +{ + const DefEvent &evt = def.events[idx]; + + // location + message + object loc; + sarifEncodeLoc(&loc, def, idx); + sarifEncodeMsg(&loc, evt.msg); + + // threadFlowLocation + object tfLoc = { + { "location", std::move(loc) }, + // verbosityLevel + { "nestingLevel", evt.verbosityLevel }, + // event + { "kinds", { evt.event } } + }; + + // append the threadFlowLocation object to the destination array + pDst->push_back(std::move(tfLoc)); +} + +void SarifTreeEncoder::appendDef(const Defect &def) +{ + const DefEvent &keyEvt = def.events[def.keyEventIdx]; + object result; + + // checker (FIXME: suboptimal mapping to SARIF) + const std::string ruleId = def.checker + ": " + keyEvt.event; + result["ruleId"] = ruleId; + + if (def.checker == "SHELLCHECK_WARNING") { + boost::smatch sm; + static const RE reShellCheckMsg("(\\[)?(SC[0-9]+)(\\])?$"); + boost::regex_search(keyEvt.event, sm, reShellCheckMsg); + + // update ShellCheck rule map + shellCheckMap_[ruleId] = sm[2]; + } + + if (def.cwe) + // update CWE map + cweMap_[ruleId] = def.cwe; + + // key event severity level + sarifEncodeLevel(&result, keyEvt.event); + + // key event location + object loc; + sarifEncodeLoc(&loc, def, def.keyEventIdx); + result["locations"] = array{std::move(loc)}; + + // key msg + sarifEncodeMsg(&result, keyEvt.msg); + + // other events + array flowLocs, relatedLocs; + for (unsigned i = 0; i < def.events.size(); ++i) { + if (def.events[i].event == "#") + sarifEncodeComment(&relatedLocs, def, i); + else + sarifEncodeEvt(&flowLocs, def, i); + } + + // codeFlows + result["codeFlows"] = { + // threadFlows + {{ "threadFlows", { + // locations + {{ "locations", std::move(flowLocs) }} + }}} + }; + + if (!relatedLocs.empty()) + // our stash for comments + result["relatedLocations"] = std::move(relatedLocs); + + // append the `result` object to the `results` array + results_.push_back(std::move(result)); +} + +void SarifTreeEncoder::writeTo(std::ostream &str) +{ + object root = { + // mandatory: schema/version + { "$schema", "https://json.schemastore.org/sarif-2.1.0.json" }, + { "version", "2.1.0" } + }; + + if (!scanProps_.empty()) { + // scan props + root["inlineExternalProperties"] = { + {{ "externalizedProperties", jsonSerializeScanProps(scanProps_) }} + }; + } + + this->initToolVersion(); + + if (!cweMap_.empty() || !shellCheckMap_.empty()) + // needs to run before we pick driver_ + this->serializeRules(); + + object run0 = { + { "tool", { + { "driver", std::move(driver_) } + }} + }; + + // results + run0["results"] = std::move(results_); + + // mandatory: runs + root["runs"] = array{std::move(run0)}; + + // encode as JSON + jsonPrettyPrint(str, root); +} diff --git a/src/lib/writer-json-sarif.hh b/src/lib/writer-json-sarif.hh new file mode 100644 index 00000000..3acf9489 --- /dev/null +++ b/src/lib/writer-json-sarif.hh @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2011 - 2023 Red Hat, Inc. + * + * This file is part of csdiff. + * + * csdiff is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * csdiff is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with csdiff. If not, see . + */ + +#ifndef H_GUARD_WRITER_JSON_SARIF_H +#define H_GUARD_WRITER_JSON_SARIF_H + +#include "abstract-tree.hh" + +#include + +// SARIF 2.1.0 is documented at: +// https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning +// specification: https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html +// validation: https://sarifweb.azurewebsites.net/Validation +class SarifTreeEncoder: public AbstractTreeEncoder { + public: + SarifTreeEncoder() = default; + + /// import supported scan properties + void importScanProps(const TScanProps &) override; + + /// append single defect + void appendDef(const Defect &) override; + + /// write everything to the given output stream + void writeTo(std::ostream &) override; + + private: + void initToolVersion(); + void serializeRules(); + + using TCweMap = std::map; + TCweMap cweMap_; + + using TShellCheckMap = std::map; + TShellCheckMap shellCheckMap_; + + TScanProps scanProps_; + boost::json::object driver_; + boost::json::array results_; +}; + +#endif /* H_GUARD_WRITER_JSON_SARIF_H */ diff --git a/src/lib/writer-json.cc b/src/lib/writer-json.cc index b99332fc..30e9d450 100644 --- a/src/lib/writer-json.cc +++ b/src/lib/writer-json.cc @@ -19,382 +19,13 @@ #include "writer-json.hh" -#include "abstract-tree.hh" -#include "regex.hh" -#include "version.hh" -#include "writer-json-common.hh" +#include "writer-json-sarif.hh" #include "writer-json-simple.hh" -#include #include #include -using namespace boost::json; - -// SARIF 2.1.0 is documented at: -// https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning -// specification: https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html -// validation: https://sarifweb.azurewebsites.net/Validation -class SarifTreeEncoder: public AbstractTreeEncoder { - public: - SarifTreeEncoder() = default; - - /// import supported scan properties - void importScanProps(const TScanProps &) override; - - /// append single defect - void appendDef(const Defect &) override; - - /// write everything to the given output stream - void writeTo(std::ostream &) override; - - private: - void initToolVersion(); - void serializeRules(); - - using TCweMap = std::map; - TCweMap cweMap_; - - using TShellCheckMap = std::map; - TShellCheckMap shellCheckMap_; - - TScanProps scanProps_; - object driver_; - array results_; -}; - -void SarifTreeEncoder::initToolVersion() -{ - std::string tool; - auto it = scanProps_.find("tool"); - if (scanProps_.end() != it) - // read "tool" scan property - tool = it->second; - - std::string ver; - it = scanProps_.find("tool-version"); - if (scanProps_.end() != it) { - // read "tool-version" scan property - ver = it->second; - - if (tool.empty()) { - // try to split the "{tool}-{version}" string by the last '-' - const size_t lastDashAt = ver.rfind('-'); - if (std::string::npos != lastDashAt) { - // read tool from the "{tool}-{version}" string - tool = ver.substr(0, lastDashAt); - - // remove "{tool}-" from "{tool}-{version}" - ver.erase(0U, lastDashAt); - } - } - else { - // try to find "{tool}-" prefix in the "tool-version" scan property - const std::string prefix = tool + "-"; - if (0U == ver.find(prefix)) - ver.erase(0U, prefix.size()); - } - } - - std::string uri; - if (tool.empty()) { - // unable to read tool name --> fallback to csdiff as the tool - tool = "csdiff"; - ver = CS_VERSION; - uri = "https://github.com/csutils/csdiff"; - } - else if (scanProps_.end() != (it = scanProps_.find("tool-url"))) - // read "tool-url" scan property - uri = it->second; - - driver_["name"] = std::move(tool); - - if (!ver.empty()) - driver_["version"] = std::move(ver); - - if (!uri.empty()) - driver_["informationUri"] = std::move(uri); -} - -static void sarifEncodeShellCheckRule(object *rule, const std::string &ruleID) -{ - // name - rule->emplace("name", ruleID); - - // properties.tags[] - object props = { - { "tags", { "ShellCheck" } } - }; - rule->emplace("properties", std::move(props)); - - // help.text && help.markdown - auto helpURI = "https://github.com/koalaman/shellcheck/wiki/" + ruleID; - auto helpMarkdown = "Defect reference: [" + ruleID +"](" + helpURI + ")"; - - object help = { - { "text", "Defect reference: " + helpURI }, - { "markdown", std::move(helpMarkdown) } - }; - - rule->emplace("help", std::move(help)); -} - -static void sarifEncodeCweRule(object *rule, const int cwe, bool append = false) -{ - auto cweStr = std::to_string(cwe); - array cweList = { "CWE-" + cweStr }; - - // properties.cwe[] - if (append) { - object &props = rule->at("properties").as_object(); - props["cwe"] = std::move(cweList); - } else { - object props = { - { "cwe", std::move(cweList) } - }; - rule->emplace("properties", std::move(props)); - } - - // help.text - auto helpText = - "https://cwe.mitre.org/data/definitions/" + cweStr + ".html"; - - if (append) { - object &help = rule->at("help").as_object(); - help["text"].as_string() += '\n' + std::move(helpText); - } else { - object help = { - { "text", std::move(helpText) } - }; - rule->emplace("help", help); - } -} - -void SarifTreeEncoder::serializeRules() -{ - array ruleList; - for (const auto &item : shellCheckMap_) { - const auto &id = item.first; - object rule = { - { "id", id } - }; - - sarifEncodeShellCheckRule(&rule, item.second); - if (1U == cweMap_.count(id)) - sarifEncodeCweRule(&rule, cweMap_[id], /*append =*/ true); - - ruleList.push_back(std::move(rule)); - } - - for (const auto &item : cweMap_) { - const auto &id = item.first; - if (1U == shellCheckMap_.count(id)) - continue; - - object rule = { - { "id", id } - }; - - sarifEncodeCweRule(&rule, item.second); - ruleList.push_back(std::move(rule)); - } - - driver_["rules"] = std::move(ruleList); -} - -void SarifTreeEncoder::importScanProps(const TScanProps &scanProps) -{ - scanProps_ = scanProps; -} - -static void sarifEncodeMsg(object *pDst, const std::string& text) -{ - object message = { - { "text", sanitizeUTF8(text) } - }; - - pDst->emplace("message", std::move(message) ); -} - -static void sarifEncodeLevel(object *result, const std::string &event) -{ - std::string level = event; - - // cut the [...] suffix from event if present - size_t pos = event.find('['); - if (std::string::npos != pos) - level = event.substr(0U, pos); - - // go through events that denote warning level - for (const char *str : {"error", "warning", "note"}) { - if (str == level) { - // encode in the output if matched - result->emplace("level", std::move(level)); - return; - } - } -} - -static void sarifEncodeLoc(object *pLoc, const Defect &def, unsigned idx) -{ - // location ID within the result - pLoc->emplace("id", idx); - - const DefEvent &evt = def.events[idx]; - - // file name - object locPhy = { - { "artifactLocation", { - { "uri", evt.fileName } - }} - }; - - // line/col - if (evt.line) { - object reg = { - { "startLine", evt.line } - }; - - if (evt.column) - reg["startColumn"] = evt.column; - - locPhy["region"] = std::move(reg); - } - - // location - pLoc->emplace("physicalLocation", std::move(locPhy)); -} - -static void sarifEncodeComment(array *pDst, const Defect &def, unsigned idx) -{ - object comment; - - // needed for Github to see the SARIF data as valid - sarifEncodeLoc(&comment, def, idx); - - sarifEncodeMsg(&comment, def.events[idx].msg); - pDst->push_back(std::move(comment)); -} - -static void sarifEncodeEvt(array *pDst, const Defect &def, unsigned idx) -{ - const DefEvent &evt = def.events[idx]; - - // location + message - object loc; - sarifEncodeLoc(&loc, def, idx); - sarifEncodeMsg(&loc, evt.msg); - - // threadFlowLocation - object tfLoc = { - { "location", std::move(loc) }, - // verbosityLevel - { "nestingLevel", evt.verbosityLevel }, - // event - { "kinds", { evt.event } } - }; - - // append the threadFlowLocation object to the destination array - pDst->push_back(std::move(tfLoc)); -} - -void SarifTreeEncoder::appendDef(const Defect &def) -{ - const DefEvent &keyEvt = def.events[def.keyEventIdx]; - object result; - - // checker (FIXME: suboptimal mapping to SARIF) - const std::string ruleId = def.checker + ": " + keyEvt.event; - result["ruleId"] = ruleId; - - if (def.checker == "SHELLCHECK_WARNING") { - boost::smatch sm; - static const RE reShellCheckMsg("(\\[)?(SC[0-9]+)(\\])?$"); - boost::regex_search(keyEvt.event, sm, reShellCheckMsg); - - // update ShellCheck rule map - shellCheckMap_[ruleId] = sm[2]; - } - - if (def.cwe) - // update CWE map - cweMap_[ruleId] = def.cwe; - - // key event severity level - sarifEncodeLevel(&result, keyEvt.event); - - // key event location - object loc; - sarifEncodeLoc(&loc, def, def.keyEventIdx); - result["locations"] = array{std::move(loc)}; - - // key msg - sarifEncodeMsg(&result, keyEvt.msg); - - // other events - array flowLocs, relatedLocs; - for (unsigned i = 0; i < def.events.size(); ++i) { - if (def.events[i].event == "#") - sarifEncodeComment(&relatedLocs, def, i); - else - sarifEncodeEvt(&flowLocs, def, i); - } - - // codeFlows - result["codeFlows"] = { - // threadFlows - {{ "threadFlows", { - // locations - {{ "locations", std::move(flowLocs) }} - }}} - }; - - if (!relatedLocs.empty()) - // our stash for comments - result["relatedLocations"] = std::move(relatedLocs); - - // append the `result` object to the `results` array - results_.push_back(std::move(result)); -} - -void SarifTreeEncoder::writeTo(std::ostream &str) -{ - object root = { - // mandatory: schema/version - { "$schema", "https://json.schemastore.org/sarif-2.1.0.json" }, - { "version", "2.1.0" } - }; - - if (!scanProps_.empty()) { - // scan props - root["inlineExternalProperties"] = { - {{ "externalizedProperties", jsonSerializeScanProps(scanProps_) }} - }; - } - - this->initToolVersion(); - - if (!cweMap_.empty() || !shellCheckMap_.empty()) - // needs to run before we pick driver_ - this->serializeRules(); - - object run0 = { - { "tool", { - { "driver", std::move(driver_) } - }} - }; - - // results - run0["results"] = std::move(results_); - - // mandatory: runs - root["runs"] = array{std::move(run0)}; - - // encode as JSON - jsonPrettyPrint(str, root); -} - struct JsonWriter::Private { std::ostream &str; std::queue defQueue;