Skip to content

Commit

Permalink
feat(copyright): New directory scan and better JSON
Browse files Browse the repository at this point in the history
1. Add a new flag to allow directory scan simillar to NOMOS
2. Provide a better JSON output with filename

Signed-off-by: Gaurav Mishra <mishra.gaurav@siemens.com>
  • Loading branch information
GMishx committed Aug 16, 2019
1 parent 8bb6b09 commit 8989c1e
Show file tree
Hide file tree
Showing 9 changed files with 307 additions and 59 deletions.
7 changes: 4 additions & 3 deletions src/copyright/agent/Makefile
Expand Up @@ -13,8 +13,9 @@ include $(VARS)
CXXFLAGS_LOCAL = $(FO_CXXFLAGS) -I. -Wall -Wextra -fopenmp $(shell pkg-config --cflags jsoncpp)
DEF = -DDATADIR='"$(MODDIR)"'
CONFDIR = $(DESTDIR)$(SYSCONFDIR)
CXXFLAGS_LINK = -lboost_regex -lboost_program_options $(ALL_CXXFLAGS) -lm -fopenmp $(FO_CXXLDFLAGS) \
$(shell pkg-config --libs jsoncpp)
CXXFLAGS_LINK = -lboost_regex -lboost_program_options -lboost_system \
-lboost_filesystem $(ALL_CXXFLAGS) -lm -fopenmp \
$(FO_CXXLDFLAGS) $(shell pkg-config --libs jsoncpp)

DEF_ID_COP = -DIDENTITY_COPYRIGHT
DEF_ID_ECC = -DIDENTITY_ECC
Expand All @@ -28,7 +29,7 @@ EXE_COV = $(EXE_COP)_cov



OBJECTS = copyright.o regscan.o scanners.o cleanEntries.o regexConfProvider.o regexConfParser.o
OBJECTS = copyright.o regscan.o scanners.o cleanEntries.o regexConfProvider.o regexConfParser.o directoryScan.o
OBJECTS_COP = copyscan_cop.o copyrightUtils_cop.o copyrightState_cop.o database_cop.o
OBJECTS_ECC = copyrightUtils_ecc.o copyrightState_ecc.o database_ecc.o
OBJECTS_KW = copyrightUtils_kw.o copyrightState_kw.o database_kw.o
Expand Down
69 changes: 25 additions & 44 deletions src/copyright/agent/copyright.cc
Expand Up @@ -41,6 +41,7 @@ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* || e.g. 'linux@@1@@(linus) torvalds' |
* | --files arg | Files to scan |
* | -J [--json] | Output JSON |
* | -d [--directory] | Directory to scan (recursive) |
* \section copyrightsource Agent source
* - \link src/copyright/agent \endlink
* - \link src/copyright/ui \endlink
Expand All @@ -53,8 +54,6 @@ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

#include "copyright.hpp"

#include <json/json.h>

using namespace std;
using namespace fo;

Expand All @@ -71,7 +70,8 @@ int main(int argc, char** argv)

CliOptions cliOptions;
vector<string> fileNames;
if (!parseCliOptions(argc, argv, cliOptions, fileNames))
string directoryToScan;
if (!parseCliOptions(argc, argv, cliOptions, fileNames, directoryToScan))
{
return_sched(1);
}
Expand All @@ -81,65 +81,46 @@ int main(int argc, char** argv)

if (!fileNames.empty())
{
const list<unptr::shared_ptr<scanner>>& scanners = state.getScanners();

const unsigned long fileNamesCount = fileNames.size();
bool fileError = false;
bool printComma = false;

if (json)
{
cout << "[" << endl;
}

#pragma omp parallel
{
#pragma omp for
for (unsigned int argn = 0; argn < fileNamesCount; ++argn)
{
const string fileName = fileNames[argn];
// Read file into one string
string s;
if (!ReadFileToString(fileName, s))
pair<string, list<match>> scanResult = processSingleFile(state, fileName);
if (json)
{
// File error
fileError = true;
appendToJson(fileName, scanResult, printComma);
}
else
{
list<match> l;
for (auto sc = scanners.begin(); sc != scanners.end(); ++sc)
{
(*sc)->ScanString(s, l);
}

if (json) {
Json::Value results;
for (auto m = l.begin(); m != l.end(); ++m)
{
Json::Value j;
j["start"] = m->start;
j["end"] = m->end;
j["type"] = m->type;
j["content"] = cleanMatch(s, *m);
results.append(j);
}
Json::Value output;
output["results"] = results;
Json::FastWriter builder;
cout << builder.write(output);
} else {
stringstream ss;
ss << fileName << " ::" << endl;
// Output matches
for (auto m = l.begin(); m != l.end(); ++m)
{
ss << "\t[" << m->start << ':' << m->end << ':' << m->type << "] '"
<< cleanMatch(s, *m)
<< "'" << endl;
}
// Thread-Safety: output all matches (collected in ss) at once to cout
cout << ss.str();
}
printResultToStdout(fileName, scanResult);
}
if (scanResult.first.empty())
{
fileError = true;
}
}
}
if (json)
{
cout << endl << "]" << endl;
}
return fileError ? 1 : 0;
}
else if (directoryToScan.length() > 0)
{
scanDirectory(state, json, directoryToScan);
}
else
{
DbManager dbManager(&argc, argv);
Expand Down
5 changes: 3 additions & 2 deletions src/copyright/agent/copyright.hpp
Expand Up @@ -5,12 +5,12 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Expand All @@ -21,6 +21,7 @@

#include <vector>
#include "copyrightUtils.hpp"
#include "directoryScan.hpp"

extern "C" {
#include "libfossagent.h"
Expand Down
142 changes: 141 additions & 1 deletion src/copyright/agent/copyrightUtils.cc
Expand Up @@ -82,10 +82,12 @@ void bail(int exitval)
* \param[in] argv
* \param[out] dest The parsed CliOptions object
* \param[out] fileNames List of files to be scanned
* \param[out] directoryToScan Directory to be scanned
* \return True if success, false otherwise
* \todo Change and add help based on IDENTITY
*/
bool parseCliOptions(int argc, char** argv, CliOptions& dest, std::vector<std::string>& fileNames)
bool parseCliOptions(int argc, char** argv, CliOptions& dest,
std::vector<std::string>& fileNames, std::string& directoryToScan)
{
unsigned type = 0;

Expand Down Expand Up @@ -129,6 +131,9 @@ bool parseCliOptions(int argc, char** argv, CliOptions& dest, std::vector<std::s
(
"jobId", boost::program_options::value<int>(), "the id of the job (only in combination with --scheduler_start)"
)
(
"directory,d", boost::program_options::value<string>(), "directory to scan (recursive)"
)
;

boost::program_options::positional_options_description p;
Expand Down Expand Up @@ -176,6 +181,18 @@ bool parseCliOptions(int argc, char** argv, CliOptions& dest, std::vector<std::s
}
}

if (vm.count("directory"))
{
if (vm.count("files"))
{
cout << "cannot pass files and directory at the same time" << endl;
cout << desc << endl;
fileNames.clear();
return false;
}
directoryToScan = vm["directory"].as<std::string>();
}

return true;
}
catch (boost::bad_any_cast&) {
Expand Down Expand Up @@ -413,3 +430,126 @@ bool processUploadId(const CopyrightState& state, int agentId, int uploadId, Cop
return true;
}

/**
* Read a single file and run all scanners on it based of CopyrightState.
* @param state Copyright state
* @param fileName Location of the file to be scanned
* @return A pair of file scanned and list of matches found.
*/
pair<string, list<match>> processSingleFile(const CopyrightState& state,
const string fileName)
{
const list<unptr::shared_ptr<scanner>>& scanners = state.getScanners();
list<match> matchList;

// Read file into one string
string s;
if (!ReadFileToString(fileName, s))
{
// File error
s = "";
}
else
{
for (auto sc = scanners.begin(); sc != scanners.end(); ++sc)
{
(*sc)->ScanString(s, matchList);
}
}
return make_pair(s, matchList);
}

/**
* Append a new result from scanner to main output json object
* @param fileName File which was scanned
* @param resultPair The result pair from scanSingleFile()
* @param printComma Set true to print comma. Will be set true after first
* data is printed
*/
void appendToJson(const std::string fileName,
const std::pair<string, list<match>> resultPair, bool &printComma)
{
Json::Value result;
#if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
// Use FastWriter for versions below 1.4.0
Json::FastWriter jsonWriter;
#else
// Since version 1.4.0, FastWriter is deprecated and replaced with
// StreamWriterBuilder
Json::StreamWriterBuilder jsonWriter;
jsonWriter["commentStyle"] = "None";
jsonWriter["indentation"] = "";
#endif

if (resultPair.first.empty())
{
result["file"] = fileName;
result["results"] = "Unable to read file";
}
else
{
list<match> resultList = resultPair.second;
Json::Value results;
for (auto m : resultList)
{
Json::Value j;
j["start"] = m.start;
j["end"] = m.end;
j["type"] = m.type;
j["content"] = cleanMatch(resultPair.first, m);
results.append(j);
}
result["file"] = fileName;
result["results"] = results;
}
// Thread-Safety: output all matches JSON at once to STDOUT
#pragma omp critical (jsonPrinter)
{
if (printComma)
{
cout << "," << endl;
}
else
{
printComma = true;
}
string jsonString;
#if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
// For version below 1.4.0, every writer append `\n` at end.
// Find and replace it.
jsonString = jsonWriter.write(result);
jsonString.replace(jsonString.find("\n"), string("\n").length(), "");
#else
// For version >= 1.4.0, \n is not appended.
jsonString = Json::writeString(jsonWriter, result);
#endif
cout << " " << jsonString << flush;
}
}

/**
* Print the result of current scan to stdout
* @param fileName File which was scanned
* @param resultPair Result pair from scanSingleFile()
*/
void printResultToStdout(const std::string fileName,
const std::pair<string, list<match>> resultPair)
{
if (resultPair.first.empty())
{
cout << fileName << " :: Unable to read file" << endl;
return;
}
stringstream ss;
ss << fileName << " ::" << endl;
// Output matches
list<match> resultList = resultPair.second;
for (auto m = resultList.begin(); m != resultList.end(); ++m)
{
ss << "\t[" << m->start << ':' << m->end << ':' << m->type << "] '"
<< cleanMatch(resultPair.first, *m)
<< "'" << endl;
}
// Thread-Safety: output all matches (collected in ss) at once to cout
cout << ss.str();
}
16 changes: 13 additions & 3 deletions src/copyright/agent/copyrightUtils.hpp
Expand Up @@ -5,12 +5,12 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Expand All @@ -28,6 +28,7 @@
#include <string>
#include <vector>
#include <list>
#include <json/json.h>

#include "scanners.hpp"
#include "regscan.hpp"
Expand All @@ -47,7 +48,8 @@ void bail(int exitval);

int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager& dbManager);

bool parseCliOptions(int argc, char** argv, CliOptions& dest, std::vector<std::string>& fileNames);
bool parseCliOptions(int argc, char** argv, CliOptions& dest,
std::vector<std::string>& fileNames, std::string& directoryToScan);

CopyrightState getState(CliOptions&& cliOptions);

Expand All @@ -59,6 +61,14 @@ void normalizeContent(std::string& content);

bool processUploadId(const CopyrightState& state, int agentId, int uploadId, CopyrightDatabaseHandler& handler);

std::pair<std::string, std::list<match>> processSingleFile(const CopyrightState& state,
const std::string fileName);

void appendToJson(const std::string fileName,
const std::pair<string, list<match>> resultPair, bool &printComma);

void printResultToStdout(const std::string fileName,
const std::pair<string, list<match>> resultPair);

#endif /* COPYRIGHTUTILS_HPP_ */

0 comments on commit 8989c1e

Please sign in to comment.