Skip to content

Commit

Permalink
infra: make it possible to download public corpus (google#9155)
Browse files Browse the repository at this point in the history
Ref: ossf/fuzz-introspector#587

CC @evverx 

Signed-off-by: David Korczynski <david@adalogics.com>

Signed-off-by: David Korczynski <david@adalogics.com>
  • Loading branch information
DavidKorczynski authored and eamonnmcmanus committed Mar 15, 2023
1 parent e0ed4a3 commit 0a57be1
Showing 1 changed file with 69 additions and 8 deletions.
77 changes: 69 additions & 8 deletions infra/helper.py
Expand Up @@ -63,6 +63,10 @@
'gs://{project_name}-backup.clusterfuzz-external.appspot.com/corpus/'
'libFuzzer/{fuzz_target}/')

HTTPS_CORPUS_BACKUP_URL_FORMAT = (
'https://storage.googleapis.com/{project_name}-backup.clusterfuzz-external'
'.appspot.com/corpus/libFuzzer/{fuzz_target}/public.zip')

LANGUAGE_REGEX = re.compile(r'[^\s]+')
PROJECT_LANGUAGE_REGEX = re.compile(r'\s*language\s*:\s*([^\s]+)')

Expand Down Expand Up @@ -340,6 +344,10 @@ def get_parser(): # pylint: disable=too-many-statements
'download_corpora', help='Download all corpora for a project.')
download_corpora_parser.add_argument('--fuzz-target',
help='specify name of a fuzz target')
download_corpora_parser.add_argument('--public',
action='store_true',
help='if set, will download public '
'corpus using wget')
download_corpora_parser.add_argument(
'project', help='name of the project or path (external)')

Expand Down Expand Up @@ -861,19 +869,69 @@ def _get_latest_corpus(project, fuzz_target, base_corpus_dir):
subprocess.check_call(command)


def download_corpora(args):
"""Downloads most recent corpora from GCS for the given project."""
if not check_project_exists(args.project):
return False
def _get_latest_public_corpus(args, fuzzer):
"""Downloads the public corpus"""
target_corpus_dir = "build/corpus/%s" % args.project.name
if not os.path.isdir(target_corpus_dir):
os.makedirs(target_corpus_dir)

target_zip = os.path.join(target_corpus_dir, fuzzer + ".zip")

project_qualified_fuzz_target_name = fuzzer
qualified_name_prefix = args.project.name + '_'
if not fuzzer.startswith(qualified_name_prefix):
project_qualified_fuzz_target_name = qualified_name_prefix + fuzzer

download_url = HTTPS_CORPUS_BACKUP_URL_FORMAT.format(
project_name=args.project.name,
fuzz_target=project_qualified_fuzz_target_name)

cmd = ['wget', download_url, '-O', target_zip]
try:
with open(os.devnull, 'w') as stdout:
subprocess.check_call(cmd, stdout=stdout)
except OSError:
logging.error('Failed to download corpus')

target_fuzzer_dir = os.path.join(target_corpus_dir, fuzzer)
if not os.path.isdir(target_fuzzer_dir):
os.mkdir(target_fuzzer_dir)

target_corpus_dir = os.path.join(target_corpus_dir, fuzzer)
try:
with open(os.devnull, 'w') as stdout:
subprocess.check_call(['gsutil', '--version'], stdout=stdout)
subprocess.check_call(['unzip', target_zip, '-d', target_fuzzer_dir],
stdout=stdout)
except OSError:
logging.error('gsutil not found. Please install it from '
'https://cloud.google.com/storage/docs/gsutil_install')
logging.error('Failed to unzip corpus')

# Remove the downloaded zip
os.remove(target_zip)
return True


def download_corpora(args):
"""Downloads most recent corpora from GCS for the given project."""
if not check_project_exists(args.project):
return False

if args.public:
logging.info("Downloading public corpus")
try:
with open(os.devnull, 'w') as stdout:
subprocess.check_call(['wget', '--version'], stdout=stdout)
except OSError:
logging.error('wget not found')
return False
else:
try:
with open(os.devnull, 'w') as stdout:
subprocess.check_call(['gsutil', '--version'], stdout=stdout)
except OSError:
logging.error('gsutil not found. Please install it from '
'https://cloud.google.com/storage/docs/gsutil_install')
return False

if args.fuzz_target:
fuzz_targets = [args.fuzz_target]
else:
Expand All @@ -883,7 +941,10 @@ def download_corpora(args):

def _download_for_single_target(fuzz_target):
try:
_get_latest_corpus(args.project, fuzz_target, corpus_dir)
if args.public:
_get_latest_public_corpus(args, fuzz_target)
else:
_get_latest_corpus(args.project, fuzz_target, corpus_dir)
return True
except Exception as error: # pylint:disable=broad-except
logging.error('Corpus download for %s failed: %s.', fuzz_target,
Expand Down

0 comments on commit 0a57be1

Please sign in to comment.