Fix an issue with relative paths in a target for truffle projects

Consensys · May 24, 2021 · 83aaa45 · 83aaa45
1 parent 2f90ec1
commit 83aaa45
Show file tree

Hide file tree

Showing 7 changed files with 360 additions and 239 deletions.
diff --git a/mythx_cli/fuzz/ide/brownie.py b/mythx_cli/fuzz/ide/brownie.py
@@ -6,8 +6,7 @@
 from mythx_cli.fuzz.exceptions import BuildArtifactsError
 from mythx_cli.fuzz.ide.generic import IDEArtifacts, JobBuilder
 
-from ...util import sol_files_by_directory
-from ...util import files_by_directory
+from ...util import files_by_directory, sol_files_by_directory
 
 LOGGER = logging.getLogger("mythx-cli")
 
@@ -130,8 +129,12 @@ def _get_build_artifacts(build_dir) -> Dict:
 
 
 class BrownieJob:
-    def __init__(self, target: List[str], build_dir: Path, map_to_original_source: bool):
-        artifacts = BrownieArtifacts(build_dir, targets=target, map_to_original_source=map_to_original_source)
+    def __init__(
+        self, target: List[str], build_dir: Path, map_to_original_source: bool
+    ):
+        artifacts = BrownieArtifacts(
+            build_dir, targets=target, map_to_original_source=map_to_original_source
+        )
         self._jb = JobBuilder(artifacts)
         self.payload = None
 

diff --git a/mythx_cli/fuzz/ide/hardhat.py b/mythx_cli/fuzz/ide/hardhat.py
@@ -1,5 +1,5 @@
 import json
-from os.path import commonpath, relpath
+from os.path import abspath, commonpath, relpath
 from pathlib import Path
 from typing import List
 
@@ -15,7 +15,9 @@ def __init__(self, build_dir=None, targets=None):
         if targets:
             include = []
             for target in targets:
-                include.extend(sol_files_by_directory(target))
+                include.extend(
+                    [abspath(file_path) for file_path in sol_files_by_directory(target)]
+                )
             self._include = include
 
         self._build_dir = Path(build_dir).absolute() or Path("./artifacts").absolute()

diff --git a/mythx_cli/fuzz/ide/truffle.py b/mythx_cli/fuzz/ide/truffle.py
@@ -1,31 +1,39 @@
 import json
+from os.path import abspath
 from pathlib import Path
-from typing import List, Dict
+from subprocess import Popen, TimeoutExpired
+from tempfile import TemporaryFile
+from typing import Any, Dict, List
 
 from mythx_cli.fuzz.exceptions import BuildArtifactsError
-from mythx_cli.fuzz.ide.generic import JobBuilder, IDEArtifacts
-from mythx_cli.util import sol_files_by_directory, LOGGER
-
-from gql import Client, gql
-from gql.transport.requests import RequestsHTTPTransport
+from mythx_cli.fuzz.ide.generic import IDEArtifacts, JobBuilder
+from mythx_cli.util import LOGGER, sol_files_by_directory
 
 
 class TruffleArtifacts(IDEArtifacts):
-    def __init__(self, db_url: str, project_dir: str, build_dir=None, targets=None):
-        self._include = []
+    def __init__(self, project_dir: str, build_dir=None, targets=None):
+        self._include: List[str] = []
         if targets:
             include = []
             for target in targets:
-                include.extend(sol_files_by_directory(target))
+                # targets could be specified using relative path. But sourcePath in truffle artifacts
+                # will use absolute paths, so we need to use absolute paths in targets as well
+                include.extend(
+                    [abspath(file_path) for file_path in sol_files_by_directory(target)]
+                )
             self._include = include
 
         self._build_dir = build_dir or Path("./build/contracts")
         build_files_by_source_file = self._get_build_artifacts(self._build_dir)
-        project_sources = self._get_project_sources(db_url, project_dir)
+        project_sources = self._get_project_sources(project_dir)
 
-        self._contracts, self._sources = self.fetch_data(build_files_by_source_file, project_sources)
+        self._contracts, self._sources = self.fetch_data(
+            build_files_by_source_file, project_sources
+        )
 
-    def fetch_data(self, build_files_by_source_file, project_sources: Dict[str, List[str]]):
+    def fetch_data(
+        self, build_files_by_source_file, project_sources: Dict[str, List[str]]
+    ):
         result_contracts = {}
         result_sources = {}
         for source_file, contracts in build_files_by_source_file.items():
@@ -56,7 +64,9 @@ def fetch_data(self, build_files_by_source_file, project_sources: Dict[str, List
                         f"Build artifact did not contain expected key. Contract: {contract}: \n{e}"
                     )
 
-                for file_index, source_file_dep in enumerate(project_sources[contract["contractName"]]):
+                for file_index, source_file_dep in enumerate(
+                    project_sources[contract["contractName"]]
+                ):
                     if source_file_dep in result_sources.keys():
                         continue
 
@@ -75,20 +85,56 @@ def fetch_data(self, build_files_by_source_file, project_sources: Dict[str, List
         return result_contracts, result_sources
 
     @staticmethod
-    def _get_project_sources(db_url: str, project_dir: str) -> Dict[str, List[str]]:
-        transport = RequestsHTTPTransport(url=db_url)
-        client = Client(transport=transport, fetch_schema_from_transport=True)
-        get_project_id_query = gql(
-            f"""
-            query {{
-                projectId(input: {{ directory: "{project_dir}" }})
-            }}
-            """
+    def query_truffle_db(query: str, project_dir: str) -> Dict[str, Any]:
+        try:
+            # here we're using the tempfile to overcome the subprocess.PIPE's buffer size limit (65536 bytes).
+            # This limit becomes a problem on a large sized output which will be truncated, resulting to an invalid json
+            with TemporaryFile() as stdout_file, TemporaryFile() as stderr_file:
+                with Popen(
+                    ["truffle", "db", "query", f"{query}"],
+                    stdout=stdout_file,
+                    stderr=stderr_file,
+                    cwd=project_dir,
+                ) as p:
+                    p.communicate(timeout=3 * 60)
+                    if stdout_file.tell() == 0:
+                        error = ""
+                        if stderr_file.tell() > 0:
+                            stderr_file.seek(0)
+                            error = f"\nError: {str(stderr_file.read())}"
+                        raise BuildArtifactsError(
+                            f'Empty response from the Truffle DB.\nQuery: "{query}"{error}'
+                        )
+                    stdout_file.seek(0)
+                    result = json.load(stdout_file)
+        except BuildArtifactsError as e:
+            raise e
+        except TimeoutExpired:
+            raise BuildArtifactsError(f'Truffle DB query timeout.\nQuery: "{query}"')
+        except Exception as e:
+            raise BuildArtifactsError(
+                f'Truffle DB query error.\nQuery: "{query}"'
+            ) from e
+        if not result.get("data"):
+            raise BuildArtifactsError(
+                f'"data" field is not found in the query result.\n Result: "{json.dumps(result)}".\nQuery: "{query}"'
+            )
+        return result.get("data")
+
+    @staticmethod
+    def _get_project_sources(project_dir: str) -> Dict[str, List[str]]:
+        result = TruffleArtifacts.query_truffle_db(
+            f'query {{ projectId(input: {{ directory: "{project_dir}" }}) }}',
+            project_dir,
         )
-        result = client.execute(get_project_id_query)
         project_id = result.get("projectId")
 
-        get_project_contracts_query = gql(
+        if not project_id:
+            raise BuildArtifactsError(
+                f'No project artifacts found. Path: "{project_dir}"'
+            )
+
+        result = TruffleArtifacts.query_truffle_db(
             f"""
             {{
               project(id:"{project_id}") {{
@@ -104,18 +150,24 @@ def _get_project_sources(db_url: str, project_dir: str) -> Dict[str, List[str]]:
                 }}
               }}
             }}
-            """
+            """,
+            project_dir,
         )
 
-        result = client.execute(get_project_contracts_query)
-
         contracts = {}
 
+        if not result.get("project") or not result["project"]["contracts"]:
+            raise BuildArtifactsError(
+                f'No project artifacts found. Path: "{project_dir}". Project ID "{project_id}"'
+            )
+
         for contract in result["project"]["contracts"]:
-            contracts[contract["name"]] = list(map(
-                lambda x: x["source"]["sourcePath"],
-                contract["compilation"]["processedSources"],
-            ))
+            contracts[contract["name"]] = list(
+                map(
+                    lambda x: x["source"]["sourcePath"],
+                    contract["compilation"]["processedSources"],
+                )
+            )
         return contracts
 
     @staticmethod
@@ -170,8 +222,8 @@ def sources(self):
 
 
 class TruffleJob:
-    def __init__(self, db_url: str, project_dir: str, target: List[str], build_dir: Path):
-        artifacts = TruffleArtifacts(db_url, project_dir, build_dir, targets=target)
+    def __init__(self, project_dir: str, target: List[str], build_dir: Path):
+        artifacts = TruffleArtifacts(project_dir, build_dir, targets=target)
         self._jb = JobBuilder(artifacts)
         self.payload = None
 

diff --git a/mythx_cli/fuzz/run.py b/mythx_cli/fuzz/run.py
@@ -68,10 +68,12 @@ def determine_ide() -> IDE:
     is_flag=True,
     default=False,
     help="Map the analyses results to the original source code, instead of the instrumented one. "
-         "This is meant to be used with Scribble.",
+    "This is meant to be used with Scribble.",
 )
 @click.pass_obj
-def fuzz_run(ctx, address, more_addresses, corpus_target, map_to_original_source, target):
+def fuzz_run(
+    ctx, address, more_addresses, corpus_target, map_to_original_source, target
+):
     # read YAML config params from ctx dict, e.g. ganache rpc url
     #   Introduce a separate `fuzz` section in the YAML file
 
@@ -120,11 +122,11 @@ def fuzz_run(ctx, address, more_addresses, corpus_target, map_to_original_source
     if not target:
         target = analyze_config["targets"]
     if not map_to_original_source:
-       map_to_original_source = (
-        analyze_config["map_to_original_source"]
-        if "map_to_original_source" in config_options
-        else default_config["map_to_original_source"]
-    )
+        map_to_original_source = (
+            analyze_config["map_to_original_source"]
+            if "map_to_original_source" in config_options
+            else default_config["map_to_original_source"]
+        )
     # Optional config parameters
     # Here we parse the config parameters from the config file and use defaults for non available values
     contract_address = analyze_config["deployed_contract_address"]
@@ -175,22 +177,18 @@ def fuzz_run(ctx, address, more_addresses, corpus_target, map_to_original_source
     ide = determine_ide()
 
     if ide == IDE.BROWNIE:
-        artifacts = BrownieJob(target, analyze_config["build_directory"], map_to_original_source=map_to_original_source)
+        artifacts = BrownieJob(
+            target,
+            analyze_config["build_directory"],
+            map_to_original_source=map_to_original_source,
+        )
         artifacts.generate_payload()
     elif ide == IDE.HARDHAT:
         artifacts = HardhatJob(target, analyze_config["build_directory"])
         artifacts.generate_payload()
     elif ide == IDE.TRUFFLE:
-        db_url = analyze_config.get("truffle_db_url", None)
-        if not db_url:
-            raise click.exceptions.UsageError(
-                f"Truffle DB URL must be specified in config file"
-            )
         artifacts = TruffleJob(
-            db_url,
-            str(Path.cwd().absolute()),
-            target,
-            analyze_config["build_directory"],
+            str(Path.cwd().absolute()), target, analyze_config["build_directory"]
         )
         artifacts.generate_payload()
     else:

diff --git a/mythx_cli/util.py b/mythx_cli/util.py
@@ -250,6 +250,7 @@ def write_or_print(ctx, data: str, mode="a+") -> None:
         LOGGER.debug(f"Writing data to {ctx['output']}")
         outfile.write(data + "\n")
 
+
 def sol_files_by_directory(target_path: AnyStr) -> List:
     """Gathers all the .sol files inside the target path
     including sub-directories and returns them as a List.
@@ -260,6 +261,7 @@ def sol_files_by_directory(target_path: AnyStr) -> List:
     """
     return files_by_directory(target_path, ".sol")
 
+
 def files_by_directory(target_path: AnyStr, extension: AnyStr) -> List:
     """Gathers all the target extension files inside the target path
     including sub-directories and returns them as a List.
@@ -281,7 +283,7 @@ def files_by_directory(target_path: AnyStr, extension: AnyStr) -> List:
         else:
             """ If it's a valid target extension file there is no need to search further and we just append it to our
             list to be returned, removing the .original extension, leaving only the .sol """
-            target_files.append(target_path.replace(".original",""))
+            target_files.append(target_path.replace(".original", ""))
     source_dir = os.walk(target_path)
     for sub_dir in source_dir:
         if len(sub_dir[2]) > 0:
@@ -298,5 +300,5 @@ def files_by_directory(target_path: AnyStr, extension: AnyStr) -> List:
                 file_name = file_prefix + "/" + file
                 LOGGER.debug(f"Found target extension file: {file_name}")
                 # We remove the .original extension, added by Scribble
-                target_files.append(file_name.replace(".original",""))
+                target_files.append(file_name.replace(".original", ""))
     return target_files