From a149581db938a177750a301990d8b6aa4e08c8a3 Mon Sep 17 00:00:00 2001 From: Daniel Griesser Date: Fri, 9 Feb 2018 10:43:25 +0100 Subject: [PATCH 1/4] feat: Add dif to assemble response, Add error handling --- src/sentry/api/bases/chunk.py | 15 ++-- src/sentry/api/endpoints/dif_files.py | 28 ++++++- src/sentry/models/file.py | 2 +- src/sentry/tasks/assemble.py | 8 +- .../sentry/api/endpoints/test_dif_assemble.py | 77 +++++++++++++++++++ 5 files changed, 117 insertions(+), 13 deletions(-) diff --git a/src/sentry/api/bases/chunk.py b/src/sentry/api/bases/chunk.py index e75db30411e38a..03f6da23d75faf 100644 --- a/src/sentry/api/bases/chunk.py +++ b/src/sentry/api/bases/chunk.py @@ -14,7 +14,7 @@ def _create_file_response(self, state, missing_chunks=[]): 'missingChunks': missing_chunks } - def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists): + def _check_chunk_ownership(self, organization, file_blobs, chunks, file): # Check the ownership of these blobs with the org all_owned_blobs = FileBlobOwner.objects.filter( blob__in=file_blobs, @@ -35,9 +35,9 @@ def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists): # Only if this org already has the ownership of all blobs # and the count of chunks is the same as in the request # and the file already exists, we say this file is OK - elif len(file_blobs) == len(owned_blobs) == len(chunks) and file_exists: + elif len(file_blobs) == len(owned_blobs) == len(chunks) and file is not None: return self._create_file_response( - ChunkFileState.OK + file.headers.get('__state', ChunkFileState.OK) ) # If the length of owned and sent chunks is not the same # we return all missing blobs @@ -66,15 +66,18 @@ def _check_file_blobs(self, organization, checksum, chunks): file_blobs = FileBlob.objects.filter( checksum__in=chunks ).all() - return self._check_chunk_ownership(organization, file_blobs, chunks, False) + return ( + None, + self._check_chunk_ownership(organization, file_blobs, chunks) + ) # It is possible to have multiple files in the db because # we do not have a unique on the checksum for file in files: # We need to fetch all blobs file_blobs = file.blobs.all() - rv = self._check_chunk_ownership(organization, file_blobs, chunks, True) + rv = self._check_chunk_ownership(organization, file_blobs, chunks, file) if rv is not None: - return rv + return (file, rv) def _create_file_for_assembling(self, name, checksum, chunks): # If we have all chunks and the file wasn't found before diff --git a/src/sentry/api/endpoints/dif_files.py b/src/sentry/api/endpoints/dif_files.py index b68f18320f6118..4bce2f4d5973fe 100644 --- a/src/sentry/api/endpoints/dif_files.py +++ b/src/sentry/api/endpoints/dif_files.py @@ -6,14 +6,30 @@ from rest_framework.response import Response from sentry.utils import json +from sentry.api.serializers import serialize from sentry.api.bases.chunk import ChunkAssembleMixin from sentry.api.bases.project import ProjectEndpoint, ProjectReleasePermission -from sentry.models import File, ChunkFileState +from sentry.models import File, ChunkFileState, ProjectDSymFile class DifAssembleEndpoint(ChunkAssembleMixin, ProjectEndpoint): permission_classes = (ProjectReleasePermission, ) + def _add_project_dsym_to_reponse(self, found_file, response): + if found_file is not None: + if found_file.headers.get('error', None) is not None: + response['error'] = found_file.headers.get('error') + try: + dsym = ProjectDSymFile.objects.filter( + file=found_file + ).first() + if dsym is not None: + response['dif'] = serialize(dsym) + except ProjectDSymFile.DoesNotExist: + pass + + return response + def post(self, request, project): """ Assmble one or multiple chunks (FileBlob) into dsym files @@ -58,11 +74,15 @@ def post(self, request, project): chunks = file_to_assemble.get('chunks', []) try: - result = self._check_file_blobs(project.organization, checksum, chunks) + found_file, response = self._check_file_blobs( + project.organization, checksum, chunks) # This either returns a file OK because we already own all chunks # OR we return not_found with the missing chunks (or not owned) - if result is not None: - file_response[checksum] = result + if response is not None: + # We also found a file, we try to fetch project dsym to return more + # information in the request + file_response[checksum] = self._add_project_dsym_to_reponse( + found_file, response) continue except File.MultipleObjectsReturned: return Response({'error': 'Duplicate checksum'}, diff --git a/src/sentry/models/file.py b/src/sentry/models/file.py index 2e1dd2e9da05a5..57966a48fad503 100644 --- a/src/sentry/models/file.py +++ b/src/sentry/models/file.py @@ -292,7 +292,7 @@ def assemble_from_file_blob_ids(self, file_blob_ids, checksum, commit=True): if checksum != self.checksum: self.headers['__state'] = ChunkFileState.ERROR - self.headers['error'] = 'invalid_checksum' + self.headers['error'] = 'Checksum missmatch between chunks and file' metrics.timing('filestore.file-size', offset) if commit: diff --git a/src/sentry/tasks/assemble.py b/src/sentry/tasks/assemble.py index 4b709c609e82cb..8ae983d21e979b 100644 --- a/src/sentry/tasks/assemble.py +++ b/src/sentry/tasks/assemble.py @@ -11,11 +11,15 @@ @instrumented_task(name='sentry.tasks.assemble.assemble_dif', queue='assemble') def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs): + from sentry.models import ChunkFileState, dsymfile, Project with transaction.atomic(): + # Assemble the chunks into files file = assemble_chunks(file_id, file_blob_ids, checksum) - from sentry.models import ChunkFileState, dsymfile, Project + # If an error happend during assembling, we early return here + if file.headers.get('__state') == ChunkFileState.ERROR: + return project = Project.objects.filter( id=project_id @@ -75,7 +79,7 @@ def assemble_chunks(file_id, file_blob_ids, checksum, **kwargs): 'file_id': file.id } ) - return + return file file.headers['__state'] = ChunkFileState.OK file.save() return file diff --git a/tests/sentry/api/endpoints/test_dif_assemble.py b/tests/sentry/api/endpoints/test_dif_assemble.py index 9b024d18d80ef7..793350f35a1d84 100644 --- a/tests/sentry/api/endpoints/test_dif_assemble.py +++ b/tests/sentry/api/endpoints/test_dif_assemble.py @@ -9,6 +9,7 @@ from sentry.models import ApiToken, FileBlob, File, FileBlobIndex, FileBlobOwner from sentry.models.file import ChunkFileState from sentry.testutils import APITestCase +from sentry.tasks.assemble import assemble_dif class DifAssembleEndpoint(APITestCase): @@ -236,3 +237,79 @@ def test_assemble(self, mock_assemble_dif): file_blob_index = FileBlobIndex.objects.all() assert len(file_blob_index) == 3 + + def test_dif_reponse(self): + sym_file = self.load_fixture('crash.sym') + bolb1 = FileBlob.from_file(ContentFile(sym_file)) + + total_checksum = sha1(sym_file).hexdigest() + + file = File.objects.create( + name='test.sym', + checksum=total_checksum, + type='chunked', + headers={'__state': ChunkFileState.CREATED} + ) + + file_blob_id_order = [bolb1.id] + + assemble_dif( + project_id=self.project.id, + file_id=file.id, + file_blob_ids=file_blob_id_order, + checksum=total_checksum, + ) + + response = self.client.post( + self.url, + data={ + total_checksum: { + 'name': 'test.sym', + 'chunks': [ + bolb1.checksum + ] + } + }, + HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token) + ) + + assert response.status_code == 200, response.content + assert response.data[total_checksum]['dif']['cpuName'] == 'x86_64' + + def test_dif_error_reponse(self): + sym_file = 'fail' + bolb1 = FileBlob.from_file(ContentFile(sym_file)) + + total_checksum = sha1(sym_file).hexdigest() + + file = File.objects.create( + name='test.sym', + checksum=total_checksum, + type='chunked', + headers={'__state': ChunkFileState.CREATED} + ) + + file_blob_id_order = [bolb1.id] + + assemble_dif( + project_id=self.project.id, + file_id=file.id, + file_blob_ids=file_blob_id_order, + checksum=total_checksum, + ) + + response = self.client.post( + self.url, + data={ + total_checksum: { + 'name': 'test.sym', + 'chunks': [ + bolb1.checksum + ] + } + }, + HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token) + ) + + assert response.status_code == 200, response.content + assert response.data[total_checksum]['error'] == 'Invalid object file' From 041456097d627ab9e26b661dbde02fac472eb5e1 Mon Sep 17 00:00:00 2001 From: Daniel Griesser Date: Fri, 9 Feb 2018 11:33:10 +0100 Subject: [PATCH 2/4] fix: Tests --- src/sentry/api/bases/chunk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/api/bases/chunk.py b/src/sentry/api/bases/chunk.py index 03f6da23d75faf..992ed5aa8b4169 100644 --- a/src/sentry/api/bases/chunk.py +++ b/src/sentry/api/bases/chunk.py @@ -14,7 +14,7 @@ def _create_file_response(self, state, missing_chunks=[]): 'missingChunks': missing_chunks } - def _check_chunk_ownership(self, organization, file_blobs, chunks, file): + def _check_chunk_ownership(self, organization, file_blobs, chunks, file=None): # Check the ownership of these blobs with the org all_owned_blobs = FileBlobOwner.objects.filter( blob__in=file_blobs, From 5f55c8c9c0372a185faf2838308c69edc2e0da0c Mon Sep 17 00:00:00 2001 From: Daniel Griesser Date: Fri, 9 Feb 2018 12:40:37 +0100 Subject: [PATCH 3/4] fix: Remove unnecessary code --- src/sentry/api/endpoints/dif_files.py | 17 ++++++----------- .../sentry/api/endpoints/test_dif_assemble.py | 18 +++++++++--------- tests/sentry/tasks/test_assemble.py | 8 ++++---- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/src/sentry/api/endpoints/dif_files.py b/src/sentry/api/endpoints/dif_files.py index 4bce2f4d5973fe..7413520f28e6eb 100644 --- a/src/sentry/api/endpoints/dif_files.py +++ b/src/sentry/api/endpoints/dif_files.py @@ -19,14 +19,12 @@ def _add_project_dsym_to_reponse(self, found_file, response): if found_file is not None: if found_file.headers.get('error', None) is not None: response['error'] = found_file.headers.get('error') - try: - dsym = ProjectDSymFile.objects.filter( - file=found_file - ).first() - if dsym is not None: - response['dif'] = serialize(dsym) - except ProjectDSymFile.DoesNotExist: - pass + + dsym = ProjectDSymFile.objects.filter( + file=found_file + ).first() + if dsym is not None: + response['dif'] = serialize(dsym) return response @@ -84,9 +82,6 @@ def post(self, request, project): file_response[checksum] = self._add_project_dsym_to_reponse( found_file, response) continue - except File.MultipleObjectsReturned: - return Response({'error': 'Duplicate checksum'}, - status=400) except File.DoesNotExist: pass diff --git a/tests/sentry/api/endpoints/test_dif_assemble.py b/tests/sentry/api/endpoints/test_dif_assemble.py index 793350f35a1d84..84969698ecf136 100644 --- a/tests/sentry/api/endpoints/test_dif_assemble.py +++ b/tests/sentry/api/endpoints/test_dif_assemble.py @@ -166,10 +166,10 @@ def test_assemble(self, mock_assemble_dif): total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums - bolb1 = FileBlob.from_file(fileobj1) + blob1 = FileBlob.from_file(fileobj1) FileBlobOwner.objects.get_or_create( organization=self.organization, - blob=bolb1 + blob=blob1 ) bolb3 = FileBlob.from_file(fileobj3) FileBlobOwner.objects.get_or_create( @@ -218,7 +218,7 @@ def test_assemble(self, mock_assemble_dif): assert response.data[total_checksum]['state'] == ChunkFileState.CREATED assert response.data[total_checksum]['missingChunks'] == [] - file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id] + file_blob_id_order = [bolb2.id, blob1.id, bolb3.id] mock_assemble_dif.apply_async.assert_called_once_with( kwargs={ @@ -240,7 +240,7 @@ def test_assemble(self, mock_assemble_dif): def test_dif_reponse(self): sym_file = self.load_fixture('crash.sym') - bolb1 = FileBlob.from_file(ContentFile(sym_file)) + blob1 = FileBlob.from_file(ContentFile(sym_file)) total_checksum = sha1(sym_file).hexdigest() @@ -251,7 +251,7 @@ def test_dif_reponse(self): headers={'__state': ChunkFileState.CREATED} ) - file_blob_id_order = [bolb1.id] + file_blob_id_order = [blob1.id] assemble_dif( project_id=self.project.id, @@ -266,7 +266,7 @@ def test_dif_reponse(self): total_checksum: { 'name': 'test.sym', 'chunks': [ - bolb1.checksum + blob1.checksum ] } }, @@ -278,7 +278,7 @@ def test_dif_reponse(self): def test_dif_error_reponse(self): sym_file = 'fail' - bolb1 = FileBlob.from_file(ContentFile(sym_file)) + blob1 = FileBlob.from_file(ContentFile(sym_file)) total_checksum = sha1(sym_file).hexdigest() @@ -289,7 +289,7 @@ def test_dif_error_reponse(self): headers={'__state': ChunkFileState.CREATED} ) - file_blob_id_order = [bolb1.id] + file_blob_id_order = [blob1.id] assemble_dif( project_id=self.project.id, @@ -304,7 +304,7 @@ def test_dif_error_reponse(self): total_checksum: { 'name': 'test.sym', 'chunks': [ - bolb1.checksum + blob1.checksum ] } }, diff --git a/tests/sentry/tasks/test_assemble.py b/tests/sentry/tasks/test_assemble.py index 28ca11b2261c27..e9913babef9d27 100644 --- a/tests/sentry/tasks/test_assemble.py +++ b/tests/sentry/tasks/test_assemble.py @@ -33,7 +33,7 @@ def test_wrong_dif(self): total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums - bolb1 = FileBlob.from_file(fileobj1) + blob1 = FileBlob.from_file(fileobj1) bolb3 = FileBlob.from_file(fileobj3) bolb2 = FileBlob.from_file(fileobj2) @@ -44,7 +44,7 @@ def test_wrong_dif(self): headers={'__state': ChunkFileState.CREATED} ) - file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id] + file_blob_id_order = [bolb2.id, blob1.id, bolb3.id] file = File.objects.create( name='test', @@ -68,7 +68,7 @@ def test_wrong_dif(self): def test_dif(self): sym_file = self.load_fixture('crash.sym') - bolb1 = FileBlob.from_file(ContentFile(sym_file)) + blob1 = FileBlob.from_file(ContentFile(sym_file)) total_checksum = sha1(sym_file).hexdigest() @@ -79,7 +79,7 @@ def test_dif(self): headers={'__state': ChunkFileState.CREATED} ) - file_blob_id_order = [bolb1.id] + file_blob_id_order = [blob1.id] assemble_dif( project_id=self.project.id, From 386d5f30bd2895b0ab9d1313bf3ce915eb8c3f2b Mon Sep 17 00:00:00 2001 From: Daniel Griesser Date: Fri, 9 Feb 2018 13:19:15 +0100 Subject: [PATCH 4/4] ref: Use constant for __state --- src/sentry/api/bases/chunk.py | 6 +++--- src/sentry/models/file.py | 1 + src/sentry/tasks/assemble.py | 14 +++++++------- tests/sentry/api/endpoints/test_dif_assemble.py | 6 +++--- tests/sentry/tasks/test_assemble.py | 10 +++++----- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/sentry/api/bases/chunk.py b/src/sentry/api/bases/chunk.py index 992ed5aa8b4169..39524a5fe9f1fa 100644 --- a/src/sentry/api/bases/chunk.py +++ b/src/sentry/api/bases/chunk.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from sentry.models import File, FileBlob, FileBlobOwner -from sentry.models.file import ChunkFileState +from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER class ChunkAssembleMixin(object): @@ -37,7 +37,7 @@ def _check_chunk_ownership(self, organization, file_blobs, chunks, file=None): # and the file already exists, we say this file is OK elif len(file_blobs) == len(owned_blobs) == len(chunks) and file is not None: return self._create_file_response( - file.headers.get('__state', ChunkFileState.OK) + file.headers.get(CHUNK_STATE_HEADER, ChunkFileState.OK) ) # If the length of owned and sent chunks is not the same # we return all missing blobs @@ -87,7 +87,7 @@ def _create_file_for_assembling(self, name, checksum, chunks): name=name, checksum=checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) # Load all FileBlobs from db since we can be sure here we already own all diff --git a/src/sentry/models/file.py b/src/sentry/models/file.py index 57966a48fad503..818f57d5ff6d85 100644 --- a/src/sentry/models/file.py +++ b/src/sentry/models/file.py @@ -33,6 +33,7 @@ ONE_DAY = 60 * 60 * 24 DEFAULT_BLOB_SIZE = 1024 * 1024 # one mb +CHUNK_STATE_HEADER = '__state' def enum(**named_values): diff --git a/src/sentry/tasks/assemble.py b/src/sentry/tasks/assemble.py index 8ae983d21e979b..1cfd321d138566 100644 --- a/src/sentry/tasks/assemble.py +++ b/src/sentry/tasks/assemble.py @@ -11,14 +11,14 @@ @instrumented_task(name='sentry.tasks.assemble.assemble_dif', queue='assemble') def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs): - from sentry.models import ChunkFileState, dsymfile, Project + from sentry.models import ChunkFileState, dsymfile, Project, CHUNK_STATE_HEADER with transaction.atomic(): # Assemble the chunks into files file = assemble_chunks(file_id, file_blob_ids, checksum) # If an error happend during assembling, we early return here - if file.headers.get('__state') == ChunkFileState.ERROR: + if file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR: return project = Project.objects.filter( @@ -42,7 +42,7 @@ def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs): # We can delete the original chunk file since we created new dsym files file.delete() else: - file.headers['__state'] = ChunkFileState.ERROR + file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ERROR file.headers['error'] = 'Invalid object file' file.save() logger.error( @@ -61,17 +61,17 @@ def assemble_chunks(file_id, file_blob_ids, checksum, **kwargs): 'error': 'Empty file blobs' }) - from sentry.models import File, ChunkFileState + from sentry.models import File, ChunkFileState, CHUNK_STATE_HEADER file = File.objects.filter( id=file_id, ).get() - file.headers['__state'] = ChunkFileState.ASSEMBLING + file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ASSEMBLING # Do the actual assembling here file.assemble_from_file_blob_ids(file_blob_ids, checksum) - if file.headers.get('__state', '') == ChunkFileState.ERROR: + if file.headers.get(CHUNK_STATE_HEADER, '') == ChunkFileState.ERROR: logger.error( 'assemble_chunks.assemble_error', extra={ @@ -80,6 +80,6 @@ def assemble_chunks(file_id, file_blob_ids, checksum, **kwargs): } ) return file - file.headers['__state'] = ChunkFileState.OK + file.headers[CHUNK_STATE_HEADER] = ChunkFileState.OK file.save() return file diff --git a/tests/sentry/api/endpoints/test_dif_assemble.py b/tests/sentry/api/endpoints/test_dif_assemble.py index 84969698ecf136..2cebdcf9d86cee 100644 --- a/tests/sentry/api/endpoints/test_dif_assemble.py +++ b/tests/sentry/api/endpoints/test_dif_assemble.py @@ -7,7 +7,7 @@ from django.core.files.base import ContentFile from sentry.models import ApiToken, FileBlob, File, FileBlobIndex, FileBlobOwner -from sentry.models.file import ChunkFileState +from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER from sentry.testutils import APITestCase from sentry.tasks.assemble import assemble_dif @@ -248,7 +248,7 @@ def test_dif_reponse(self): name='test.sym', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) file_blob_id_order = [blob1.id] @@ -286,7 +286,7 @@ def test_dif_error_reponse(self): name='test.sym', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) file_blob_id_order = [blob1.id] diff --git a/tests/sentry/tasks/test_assemble.py b/tests/sentry/tasks/test_assemble.py index e9913babef9d27..5808ba05eec157 100644 --- a/tests/sentry/tasks/test_assemble.py +++ b/tests/sentry/tasks/test_assemble.py @@ -7,7 +7,7 @@ from sentry.testutils import TestCase from sentry.tasks.assemble import assemble_dif from sentry.models import FileBlob, File -from sentry.models.file import ChunkFileState +from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER class AssembleTest(TestCase): @@ -41,7 +41,7 @@ def test_wrong_dif(self): name='test', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) file_blob_id_order = [bolb2.id, blob1.id, bolb3.id] @@ -50,7 +50,7 @@ def test_wrong_dif(self): name='test', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) assemble_dif( @@ -64,7 +64,7 @@ def test_wrong_dif(self): id=file.id, ).get() - assert file.headers.get('__state') == ChunkFileState.ERROR + assert file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR def test_dif(self): sym_file = self.load_fixture('crash.sym') @@ -76,7 +76,7 @@ def test_dif(self): name='test.sym', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) file_blob_id_order = [blob1.id]