Skip to content

Commit

Permalink
fix: copy raw reports for parallel experiment (#414)
Browse files Browse the repository at this point in the history
* copy raw reports for parallel

* fix tests
  • Loading branch information
daniel-codecov committed Apr 25, 2024
1 parent bcec347 commit c71ddfd
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 29 deletions.
3 changes: 2 additions & 1 deletion helpers/parallel_upload_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,10 @@ def save_final_serial_report_results(
report_code,
f"serial/chunks<latest_upload_pk:{latest_upload_pk}>",
)
archive_service.write_parallel_experiment_file(
report_url = archive_service.write_parallel_experiment_file(
commitid,
files_and_sessions,
report_code,
f"serial/files_and_sessions<latest_upload_pk:{latest_upload_pk}>",
)
return report_url
37 changes: 29 additions & 8 deletions services/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ async def create_new_report_for_commit(self, commit: Commit) -> Report:

@sentry_sdk.trace
def parse_raw_report_from_storage(
self, repo: Repository, upload: Upload, is_parallel=False
self, repo: Repository, upload: Upload, is_parallel=False, is_error_case=False
) -> ParsedRawReport:
"""Pulls the raw uploaded report from storage and parses it do it's
easier to access different parts of the raw upload.
Expand All @@ -841,14 +841,35 @@ def parse_raw_report_from_storage(
archive_service = self.get_archive_service(repo)
archive_url = upload.storage_path

# for the parallel experiment: since the parallel version runs after the old behaviour
# has finished, the current uploads have already been rewritten in a human readable
# format, so we need to use the legacy parser here for the parallel version.
parser = get_proper_parser(upload, use_legacy=is_parallel)
# For the parallel upload verification experiment, we need to make a copy of the raw uploaded reports
# so that the parallel pipeline can use those to parse. The serial pipeline rewrites the raw uploaded
# reports to a human readable version that doesn't include file fixes, so that's why copying is necessary.
if PARALLEL_UPLOAD_PROCESSING_BY_REPO.check_value(
repo_id=repo.repoid, default=False
) and (not is_error_case):
parallel_url = archive_url.removesuffix(".txt") + "_PARALLEL.txt"
if not is_parallel:
archive_file = archive_service.read_file(archive_url)
archive_service.write_file(parallel_url, archive_file)
log.info(
"Copying raw report file for parallel experiment to: "
+ str(parallel_url),
extra=dict(commit=upload.report.commit_id, repoid=repo.repoid),
)
else:
archive_url = parallel_url
archive_file = archive_service.read_file(archive_url)
log.info(
"Reading raw report file for parallel experiment from: "
+ str(archive_url),
extra=dict(commit=upload.report.commit_id, repoid=repo.repoid),
)
else:
archive_file = archive_service.read_file(archive_url)

raw_uploaded_report = parser.parse_raw_report_from_bytes(
archive_service.read_file(archive_url)
)
parser = get_proper_parser(upload)

raw_uploaded_report = parser.parse_raw_report_from_bytes(archive_file)
return raw_uploaded_report

@sentry_sdk.trace
Expand Down
4 changes: 1 addition & 3 deletions services/report/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from services.report.parser.version_one import VersionOneReportParser


def get_proper_parser(upload: Upload, use_legacy=None):
if use_legacy:
return LegacyReportParser()
def get_proper_parser(upload: Upload):
if upload.upload_extras and upload.upload_extras.get("format_version") == "v1":
return VersionOneReportParser()
return LegacyReportParser()
2 changes: 1 addition & 1 deletion tasks/tests/unit/test_upload_processing_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def test_upload_task_call_exception_within_individual_upload(
assert upload.state_id == UploadState.ERROR.db_id
assert upload.state == "error"
assert not mocked_3.called
mocked_4.assert_called_with(commit.repository, upload)
mocked_4.assert_called_with(commit.repository, upload, is_error_case=True)
mocked_5.assert_called()

@pytest.mark.django_db(databases={"default"})
Expand Down
31 changes: 15 additions & 16 deletions tasks/upload_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,27 +354,26 @@ def process_impl_within_lock(
# experiment. The report being saved is not necessarily the final
# report for the commit, as more uploads can still be made.
if is_final and (not in_parallel):
save_final_serial_report_results(
final_serial_report_url = save_final_serial_report_results(
report_service, commit, report, report_code, arguments_list
)
log.info(
"Saved final serial report results to storage",
extra=dict(
repoid=repoid,
commit=commitid,
final_serial_result_path=final_serial_report_url,
),
)

for processed_individual_report in processings_so_far:
# We delete and rewrite the artifacts when the serial flow runs first. When
# the parallel flow runs second, it parses the human readable artifacts instead
# since the serial flow already rewrote it
if not (
PARALLEL_UPLOAD_PROCESSING_BY_REPO.check_value(
repo_id=repository.repoid
)
and in_parallel
):
deleted_archive = self._possibly_delete_archive(
deleted_archive = self._possibly_delete_archive(
processed_individual_report, report_service, commit
)
if not deleted_archive:
self._rewrite_raw_report_readable(
processed_individual_report, report_service, commit
)
if not deleted_archive:
self._rewrite_raw_report_readable(
processed_individual_report, report_service, commit
)
processed_individual_report.pop("upload_obj", None)
processed_individual_report.pop("raw_report", None)
log.info(
Expand Down Expand Up @@ -503,7 +502,7 @@ def _attempt_rewrite_raw_report_readable_error_case(
)
try:
raw_report = report_service.parse_raw_report_from_storage(
commit.repository, upload
commit.repository, upload, is_error_case=True
)
self._rewrite_raw_report_readable(
processing_result={"raw_report": raw_report, "upload_object": upload},
Expand Down

0 comments on commit c71ddfd

Please sign in to comment.