diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..cedf30c --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,3 @@ +# This project is maintained with love by: + +* @geekmasher diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index 1fed2ac..beb96f8 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -31,10 +31,13 @@ jobs: - name: End-2-end Tests env: - CODEQL_TOKEN: ${{ secrets.CODEQL_TOKEN }} + GITHUB_TOKEN: ${{ secrets.CODEQL_TOKEN }} run: | - ./gh-codeql-summarize --github-token $CODEQL_TOKEN -i ./examples/projects.json -f bundle -o ./examples - + ./gh-codeql-summarize \ + -i ./examples/projects.json \ + -f bundle \ + -o ./examples \ + --disable-banner # Pull Request auto-linting - name: Lint diff --git a/LICENSE b/LICENSE index d32937f..fac6e63 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Mathew Payne +Copyright (c) 2022 GitHub Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/codeqlsummarize/__init__.py b/codeqlsummarize/__init__.py index fce2006..0f7f1d0 100644 --- a/codeqlsummarize/__init__.py +++ b/codeqlsummarize/__init__.py @@ -1,3 +1,7 @@ import os as _os __MODULE_PATH__ = _os.path.dirname(_os.path.realpath(__file__)) + +DOCUMENTATION = { + "codeql_setup": "https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/setting-up-code-scanning-for-a-repository" +} diff --git a/codeqlsummarize/__main__.py b/codeqlsummarize/__main__.py index a8625e4..802a13d 100644 --- a/codeqlsummarize/__main__.py +++ b/codeqlsummarize/__main__.py @@ -7,7 +7,8 @@ sys.path.append(".") -from codeqlsummarize import __MODULE_PATH__ +from codeqlsummarize import __MODULE_PATH__, DOCUMENTATION +from codeqlsummarize.__version__ import __banner__ from codeqlsummarize.generator import Generator, QUERIES from codeqlsummarize.models import CodeQLDatabase, GitHub from codeqlsummarize.exporters import EXPORTERS @@ -27,9 +28,11 @@ help="Export format (`json`, `customizations`, `mad`, `bundle`)", ) parser.add_argument("-i", "--input", help="Input / Project File") -parser.add_argument("-o", "--output", default=os.getcwd(), help="Output directory / file") - -parser.add_argument("--disable-cache", action="store_true") +parser.add_argument( + "-o", "--output", default=os.getcwd(), help="Output directory / file" +) +parser.add_argument("--disable-banner", action="store_true", help="Disable Banner") +parser.add_argument("--disable-cache", action="store_true", help="Disable Caching Databases and other files") parser_codeql = parser.add_argument_group("CodeQL") parser_codeql.add_argument("--codeql-base", default="./codeql", help="CodeQL Base Path") @@ -48,9 +51,9 @@ "-t", "--github-token", default=os.environ.get("GITHUB_TOKEN") ) + def main(arguments): - """ Main workflow - """ + """Main workflow""" github = GitHub(token=arguments.github_token) languages: list[str] = [] databases: list[CodeQLDatabase] = [] @@ -59,6 +62,8 @@ def main(arguments): level=logging.DEBUG if arguments.debug else logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) + if not arguments.disable_banner: + print(__banner__) logger.debug("Debugging is enabled") if not arguments.format: @@ -83,8 +88,7 @@ def main(arguments): database=arguments.database, project_repo=arguments.project_repo ) - if langs: - languages.extend(langs) + languages.extend(langs) if arguments.github_repository: owner, repo = arguments.github_repository.split("/", 1) @@ -107,16 +111,19 @@ def main(arguments): _, repo = arguments.project_repo.split("/", 1) for language in languages: - logger.info( - f"Analysing remote repo: {arguments.project_repo} ({language})" - ) + logger.info(f"Analyzing remote repo: {arguments.project_repo} ({language})") database = CodeQLDatabase( repo, language=language, repository=arguments.project_repo ) - if github.avalible: - database.path = database.downloadDatabase(github, temppath) + if github.available: + try: + database.path = database.downloadDatabase(github, temppath) + except Exception as err: + logger.warning( + f"Error encountered while downloading CodeQL Database: {err}" + ) elif arguments.database: logger.debug("Setting database to arguments.database ") database.path = arguments.database @@ -142,14 +149,17 @@ def main(arguments): db = CodeQLDatabase(name=name, language=lang, repository=repo) - if github.avalible and db.repository: + if github.available and db.repository: logger.info(f"Downloading database for :: {repo}") - download_path = db.downloadDatabase( - github, temppath, use_cache=not arguments.disable_cache - ) - - db.path = download_path + try: + db.path = db.downloadDatabase( + github, temppath, use_cache=not arguments.disable_cache + ) + except Exception as err: + logger.warning( + f"Error encountered while downloading CodeQL Database: {err}" + ) if not db.path: logger.warning(f"CodeQL Database path is not set") @@ -178,7 +188,15 @@ def main(arguments): logger.info(f"Database setup complete: {database}") if not database.exists(): - raise Exception("CodeQL Database does not exist...") + logger.warning( + f"Failed to find or download the CodeQL Database for '{database.name}'" + ) + logger.warning( + "Please consult the GitHub docs to find out how to build a CodeQL Database" + ) + logger.warning(DOCUMENTATION.get("codeql_setup")) + logger.warning("Skipping project until Database is available...") + continue # find codeql generator = Generator(database) diff --git a/codeqlsummarize/__version__.py b/codeqlsummarize/__version__.py new file mode 100644 index 0000000..573ae24 --- /dev/null +++ b/codeqlsummarize/__version__.py @@ -0,0 +1,34 @@ + +__title__ = "CodeQL Summarize" +__name__ = "codeqlsummarize" +__version__ = "0.1.0" + +__description__ = "GitHub CodeQL Summaries Toolkit" +__summary__ = """\ +This is the GitHub CodeQL Summarize project and Actions which allows users to generate Models as Data (MaD) from CodeQL databases. +""" + +__url__ = "https://github.com/advanced-security/gh-codeql-summarize" + +__license__ = "MIT License" +__copyright__ = "Copyright (c) 2022, GitHub" + +__author__ = "GitHub Field Team" +__email__ = "" +__maintainer__ = "GeekMasher" +__mEmail__ = "" + +__contributors__ = [ + "GeekMasher", + "zbazztian" +] + +__banner__ = f"""\ + _____ _ _____ _ _____ _ +/ __ \ | | | _ | | / ___| (_) +| / \/ ___ __| | ___ | | | | | \ `--. _ _ _ __ ___ _ __ ___ __ _ _ __ _ _______ +| | / _ \ / _` |/ _ \| | | | | `--. \ | | | '_ ` _ \| '_ ` _ \ / _` | '__| |_ / _ \\ +| \__/\ (_) | (_| | __/\ \/' / |____ /\__/ / |_| | | | | | | | | | | | (_| | | | |/ / __/ + \____/\___/ \__,_|\___| \_/\_\_____/ \____/ \__,_|_| |_| |_|_| |_| |_|\__,_|_| |_/___\___| + By {__author__} - v{__version__} +""" diff --git a/codeqlsummarize/exporters/customizations.py b/codeqlsummarize/exporters/customizations.py index 661f816..050c6e0 100644 --- a/codeqlsummarize/exporters/customizations.py +++ b/codeqlsummarize/exporters/customizations.py @@ -116,9 +116,9 @@ def exportCustomizations( def exportBundle(database: CodeQLDatabase, output: str, github: GitHub, **kargs): logger.debug(f"Output directory :: {output}") - + owner = github.owner.replace("-", "_") - + if not github or not github.owner: raise Exception("Failed to export Bundle: No owner / repo name set") diff --git a/codeqlsummarize/exporters/exptjson.py b/codeqlsummarize/exporters/exptjson.py index 2923ba6..74f1d86 100644 --- a/codeqlsummarize/exporters/exptjson.py +++ b/codeqlsummarize/exporters/exptjson.py @@ -9,14 +9,13 @@ def exportToJson(database: CodeQLDatabase, output: str, **kargs): - """ Export to JSON - """ + """Export to JSON""" logger.info("Running export to JSON") data = {} for name, summary in database.summaries.items(): data[name] = summary.rows - + logger.info(f"Saving output to file: {output}") with open(output, "w") as handle: js.dump(data, handle, indent=2, sort_keys=True) @@ -24,4 +23,3 @@ def exportToJson(database: CodeQLDatabase, output: str, **kargs): logger.info("Completed writing to output") return - diff --git a/codeqlsummarize/generator.py b/codeqlsummarize/generator.py index 7289bdf..6d82bbd 100644 --- a/codeqlsummarize/generator.py +++ b/codeqlsummarize/generator.py @@ -3,19 +3,15 @@ import json import os -from os.path import ( - join, - exists, - realpath -) +from os.path import join, exists, realpath import shlex import tempfile import logging from typing import * from codeqlsummarize.utils import ( - findCodeQLCli, - exec_from_path_env, - print_to_stream, + findCodeQLCli, + exec_from_path_env, + print_to_stream, ) from codeqlsummarize import __MODULE_PATH__ from codeqlsummarize.models import CodeQLDatabase, Summaries @@ -40,13 +36,10 @@ def __init__(self, database: CodeQLDatabase): self.database = database self.codeql = findCodeQLCli() if not self.codeql: - raise Exception('Failed to find CodeQL distribution!') + raise Exception("Failed to find CodeQL distribution!") - self.pack_name = f'codeql/{database.language}-queries' - self.codeql( - 'pack', 'download', - self.pack_name - ) + self.pack_name = f"codeql/{database.language}-queries" + self.codeql("pack", "download", self.pack_name) def getModelGeneratorQuery(self, name) -> Optional[str]: logger.info(f"Finding query name: {name}") @@ -63,8 +56,8 @@ def runQuery(self, query: str) -> Summaries: logger.info("Running Query :: " + query) resultBqrs = join( self.database.path, - 'results', - query.replace(':', '/').replace('.ql', '.bqrs') + "results", + query.replace(":", "/").replace(".ql", ".bqrs"), ) output_std = join(Generator.TEMP_PATH, "runquery.txt") @@ -72,8 +65,10 @@ def runQuery(self, query: str) -> Summaries: print(f'Running query "{query}"...') with open(output_std, "wb") as std: self.codeql( - "database", "run-queries", - "--threads", "0", + "database", + "run-queries", + "--threads", + "0", self.database.path, query, outconsumer=print_to_stream(std), @@ -91,9 +86,12 @@ def readRows(self, bqrsFile): with open(output_std, "wb") as std: self.codeql( - "bqrs", "decode", - "--format", "json", - "--output", generatedJson, + "bqrs", + "decode", + "--format", + "json", + "--output", + generatedJson, bqrsFile, outconsumer=print_to_stream(std), ) diff --git a/codeqlsummarize/models.py b/codeqlsummarize/models.py index de570ea..aaeec32 100644 --- a/codeqlsummarize/models.py +++ b/codeqlsummarize/models.py @@ -34,9 +34,10 @@ def __post_init__(self): logger.debug(f"GitHub Token is set") @property - def avalible(self): + def available(self): return self.token is not None + @dataclass class CodeQLDatabase: name: str @@ -134,7 +135,7 @@ def downloadDatabase( # SECURITY: Do we trust this DB? with zipfile.ZipFile(output_zip) as zf: zf.extractall(output_db) - + logger.info(f" >>> {output_db}") codeql_lang_path = os.path.join(output_db, self.language) if os.path.exists(codeql_lang_path): @@ -144,4 +145,3 @@ def downloadDatabase( codeql_dir = os.path.join(output_db, codeql_dir) if os.path.isdir(codeql_dir): return codeql_dir - diff --git a/codeqlsummarize/utils.py b/codeqlsummarize/utils.py index 6983bc1..7066649 100644 --- a/codeqlsummarize/utils.py +++ b/codeqlsummarize/utils.py @@ -55,8 +55,7 @@ def request( def loadYaml(path: str) -> Any: - """ Loading YAML files - """ + """Loading YAML files""" try: # TODO: Replace with a native solution import yaml @@ -68,8 +67,10 @@ def loadYaml(path: str) -> Any: return yaml.safe_load(handle) -def detectLanguage(database: str = "", project_repo: str = "", github = None) -> Optional[list[str]]: - """ Detect languages based on: +def detectLanguage( + database: str = "", project_repo: str = "", github=None +) -> List[str]: + """Detect languages based on: - the database - the repo languages """ @@ -82,17 +83,19 @@ def detectLanguage(database: str = "", project_repo: str = "", github = None) -> if project_repo: # TODO: get from GitHub API languages pass - return - + return [] + + def print_to_stream(f): def impl(cmd, stream): while True: chunk = stream.readline() - if chunk == b'': + if chunk == b"": break f.write(chunk) f.flush() stream.close() + return impl @@ -111,10 +114,10 @@ def __call__( errconsumer=None, combine_std_out_err=True, inprovider=close_stream, - cwd='.', - **kwargs + cwd=".", + **kwargs, ): - with open(os.devnull, 'wb') as devnull: + with open(os.devnull, "wb") as devnull: outconsumer = outconsumer or print_to_stream(devnull) errconsumer = errconsumer or print_to_stream(devnull) @@ -133,50 +136,53 @@ def __call__( cwd=cwd, ) as proc: - commandstr = ' '.join(command) - tout = threading.Thread(target=outconsumer, args=(commandstr, proc.stdout)) - tout.start() - terr = None - if not combine_std_out_err: - terr = threading.Thread(target=errconsumer, args=(commandstr, proc.stderr)) - terr.start() - tin = threading.Thread(target=inprovider, args=(commandstr, proc.stdin)) - tin.start() - - ret = proc.wait() - tout.join() - tin.join() - if terr: - terr.join() - if ret != 0: - raise CalledProcessError(cmd=commandstr, returncode=ret) + commandstr = " ".join(command) + tout = threading.Thread( + target=outconsumer, args=(commandstr, proc.stdout) + ) + tout.start() + terr = None + if not combine_std_out_err: + terr = threading.Thread( + target=errconsumer, args=(commandstr, proc.stderr) + ) + terr.start() + tin = threading.Thread(target=inprovider, args=(commandstr, proc.stdin)) + tin.start() + + ret = proc.wait() + tout.join() + tin.join() + if terr: + terr.join() + if ret != 0: + raise CalledProcessError(cmd=commandstr, returncode=ret) def exec_from_path_env(execname): - """ Find CodeQL in PATH - """ + """Find CodeQL in PATH""" e = shutil.which(execname) return Executable(e) if e else None def codeql_from_gh_codeql(): - """ Find CodeQL using GitHub CLI CodeQL Extension - """ - gh = exec_from_path_env('gh') + """Find CodeQL using GitHub CLI CodeQL Extension""" + gh = exec_from_path_env("gh") if gh: try: output = io.BytesIO() gh( - 'codeql', - 'version', - '--format', 'json', + "codeql", + "version", + "--format", + "json", combine_std_out_err=False, - outconsumer=print_to_stream(output) + outconsumer=print_to_stream(output), ) output.seek(0) return Executable( os.path.join( - json.load(output)['unpackedLocation'], + json.load(output)["unpackedLocation"], codeql_exec_name(), ) ) @@ -186,14 +192,12 @@ def codeql_from_gh_codeql(): def codeql_exec_name(): - """ Check CodeQL CLI name based on OS Type - """ + """Check CodeQL CLI name based on OS Type""" return "codeql" + ("" if os.name == "posix" else ".exe") def codeql_from_actions(): - """ Find CodeQL in GitHub Actions - """ + """Find CodeQL in GitHub Actions""" actions = glob.glob( os.path.join( os.environ.get("RUNNER_TOOL_CACHE", ""), @@ -211,10 +215,9 @@ def codeql_from_actions(): def findCodeQLCli(): - """ Find CodeQL executable - """ - return \ - exec_from_path_env(codeql_exec_name()) or \ - codeql_from_gh_codeql() or \ - codeql_from_actions() - + """Find CodeQL executable""" + return ( + exec_from_path_env(codeql_exec_name()) + or codeql_from_gh_codeql() + or codeql_from_actions() + )