diff --git a/src/sentry/api/bases/chunk.py b/src/sentry/api/bases/chunk.py index e75db30411e38a..39524a5fe9f1fa 100644 --- a/src/sentry/api/bases/chunk.py +++ b/src/sentry/api/bases/chunk.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from sentry.models import File, FileBlob, FileBlobOwner -from sentry.models.file import ChunkFileState +from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER class ChunkAssembleMixin(object): @@ -14,7 +14,7 @@ def _create_file_response(self, state, missing_chunks=[]): 'missingChunks': missing_chunks } - def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists): + def _check_chunk_ownership(self, organization, file_blobs, chunks, file=None): # Check the ownership of these blobs with the org all_owned_blobs = FileBlobOwner.objects.filter( blob__in=file_blobs, @@ -35,9 +35,9 @@ def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists): # Only if this org already has the ownership of all blobs # and the count of chunks is the same as in the request # and the file already exists, we say this file is OK - elif len(file_blobs) == len(owned_blobs) == len(chunks) and file_exists: + elif len(file_blobs) == len(owned_blobs) == len(chunks) and file is not None: return self._create_file_response( - ChunkFileState.OK + file.headers.get(CHUNK_STATE_HEADER, ChunkFileState.OK) ) # If the length of owned and sent chunks is not the same # we return all missing blobs @@ -66,15 +66,18 @@ def _check_file_blobs(self, organization, checksum, chunks): file_blobs = FileBlob.objects.filter( checksum__in=chunks ).all() - return self._check_chunk_ownership(organization, file_blobs, chunks, False) + return ( + None, + self._check_chunk_ownership(organization, file_blobs, chunks) + ) # It is possible to have multiple files in the db because # we do not have a unique on the checksum for file in files: # We need to fetch all blobs file_blobs = file.blobs.all() - rv = self._check_chunk_ownership(organization, file_blobs, chunks, True) + rv = self._check_chunk_ownership(organization, file_blobs, chunks, file) if rv is not None: - return rv + return (file, rv) def _create_file_for_assembling(self, name, checksum, chunks): # If we have all chunks and the file wasn't found before @@ -84,7 +87,7 @@ def _create_file_for_assembling(self, name, checksum, chunks): name=name, checksum=checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) # Load all FileBlobs from db since we can be sure here we already own all diff --git a/src/sentry/api/endpoints/dif_files.py b/src/sentry/api/endpoints/dif_files.py index b68f18320f6118..7413520f28e6eb 100644 --- a/src/sentry/api/endpoints/dif_files.py +++ b/src/sentry/api/endpoints/dif_files.py @@ -6,14 +6,28 @@ from rest_framework.response import Response from sentry.utils import json +from sentry.api.serializers import serialize from sentry.api.bases.chunk import ChunkAssembleMixin from sentry.api.bases.project import ProjectEndpoint, ProjectReleasePermission -from sentry.models import File, ChunkFileState +from sentry.models import File, ChunkFileState, ProjectDSymFile class DifAssembleEndpoint(ChunkAssembleMixin, ProjectEndpoint): permission_classes = (ProjectReleasePermission, ) + def _add_project_dsym_to_reponse(self, found_file, response): + if found_file is not None: + if found_file.headers.get('error', None) is not None: + response['error'] = found_file.headers.get('error') + + dsym = ProjectDSymFile.objects.filter( + file=found_file + ).first() + if dsym is not None: + response['dif'] = serialize(dsym) + + return response + def post(self, request, project): """ Assmble one or multiple chunks (FileBlob) into dsym files @@ -58,15 +72,16 @@ def post(self, request, project): chunks = file_to_assemble.get('chunks', []) try: - result = self._check_file_blobs(project.organization, checksum, chunks) + found_file, response = self._check_file_blobs( + project.organization, checksum, chunks) # This either returns a file OK because we already own all chunks # OR we return not_found with the missing chunks (or not owned) - if result is not None: - file_response[checksum] = result + if response is not None: + # We also found a file, we try to fetch project dsym to return more + # information in the request + file_response[checksum] = self._add_project_dsym_to_reponse( + found_file, response) continue - except File.MultipleObjectsReturned: - return Response({'error': 'Duplicate checksum'}, - status=400) except File.DoesNotExist: pass diff --git a/src/sentry/models/file.py b/src/sentry/models/file.py index 2e1dd2e9da05a5..818f57d5ff6d85 100644 --- a/src/sentry/models/file.py +++ b/src/sentry/models/file.py @@ -33,6 +33,7 @@ ONE_DAY = 60 * 60 * 24 DEFAULT_BLOB_SIZE = 1024 * 1024 # one mb +CHUNK_STATE_HEADER = '__state' def enum(**named_values): @@ -292,7 +293,7 @@ def assemble_from_file_blob_ids(self, file_blob_ids, checksum, commit=True): if checksum != self.checksum: self.headers['__state'] = ChunkFileState.ERROR - self.headers['error'] = 'invalid_checksum' + self.headers['error'] = 'Checksum missmatch between chunks and file' metrics.timing('filestore.file-size', offset) if commit: diff --git a/src/sentry/tasks/assemble.py b/src/sentry/tasks/assemble.py index 4b709c609e82cb..1cfd321d138566 100644 --- a/src/sentry/tasks/assemble.py +++ b/src/sentry/tasks/assemble.py @@ -11,11 +11,15 @@ @instrumented_task(name='sentry.tasks.assemble.assemble_dif', queue='assemble') def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs): + from sentry.models import ChunkFileState, dsymfile, Project, CHUNK_STATE_HEADER with transaction.atomic(): + # Assemble the chunks into files file = assemble_chunks(file_id, file_blob_ids, checksum) - from sentry.models import ChunkFileState, dsymfile, Project + # If an error happend during assembling, we early return here + if file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR: + return project = Project.objects.filter( id=project_id @@ -38,7 +42,7 @@ def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs): # We can delete the original chunk file since we created new dsym files file.delete() else: - file.headers['__state'] = ChunkFileState.ERROR + file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ERROR file.headers['error'] = 'Invalid object file' file.save() logger.error( @@ -57,17 +61,17 @@ def assemble_chunks(file_id, file_blob_ids, checksum, **kwargs): 'error': 'Empty file blobs' }) - from sentry.models import File, ChunkFileState + from sentry.models import File, ChunkFileState, CHUNK_STATE_HEADER file = File.objects.filter( id=file_id, ).get() - file.headers['__state'] = ChunkFileState.ASSEMBLING + file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ASSEMBLING # Do the actual assembling here file.assemble_from_file_blob_ids(file_blob_ids, checksum) - if file.headers.get('__state', '') == ChunkFileState.ERROR: + if file.headers.get(CHUNK_STATE_HEADER, '') == ChunkFileState.ERROR: logger.error( 'assemble_chunks.assemble_error', extra={ @@ -75,7 +79,7 @@ def assemble_chunks(file_id, file_blob_ids, checksum, **kwargs): 'file_id': file.id } ) - return - file.headers['__state'] = ChunkFileState.OK + return file + file.headers[CHUNK_STATE_HEADER] = ChunkFileState.OK file.save() return file diff --git a/tests/sentry/api/endpoints/test_dif_assemble.py b/tests/sentry/api/endpoints/test_dif_assemble.py index 9b024d18d80ef7..2cebdcf9d86cee 100644 --- a/tests/sentry/api/endpoints/test_dif_assemble.py +++ b/tests/sentry/api/endpoints/test_dif_assemble.py @@ -7,8 +7,9 @@ from django.core.files.base import ContentFile from sentry.models import ApiToken, FileBlob, File, FileBlobIndex, FileBlobOwner -from sentry.models.file import ChunkFileState +from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER from sentry.testutils import APITestCase +from sentry.tasks.assemble import assemble_dif class DifAssembleEndpoint(APITestCase): @@ -165,10 +166,10 @@ def test_assemble(self, mock_assemble_dif): total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums - bolb1 = FileBlob.from_file(fileobj1) + blob1 = FileBlob.from_file(fileobj1) FileBlobOwner.objects.get_or_create( organization=self.organization, - blob=bolb1 + blob=blob1 ) bolb3 = FileBlob.from_file(fileobj3) FileBlobOwner.objects.get_or_create( @@ -217,7 +218,7 @@ def test_assemble(self, mock_assemble_dif): assert response.data[total_checksum]['state'] == ChunkFileState.CREATED assert response.data[total_checksum]['missingChunks'] == [] - file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id] + file_blob_id_order = [bolb2.id, blob1.id, bolb3.id] mock_assemble_dif.apply_async.assert_called_once_with( kwargs={ @@ -236,3 +237,79 @@ def test_assemble(self, mock_assemble_dif): file_blob_index = FileBlobIndex.objects.all() assert len(file_blob_index) == 3 + + def test_dif_reponse(self): + sym_file = self.load_fixture('crash.sym') + blob1 = FileBlob.from_file(ContentFile(sym_file)) + + total_checksum = sha1(sym_file).hexdigest() + + file = File.objects.create( + name='test.sym', + checksum=total_checksum, + type='chunked', + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} + ) + + file_blob_id_order = [blob1.id] + + assemble_dif( + project_id=self.project.id, + file_id=file.id, + file_blob_ids=file_blob_id_order, + checksum=total_checksum, + ) + + response = self.client.post( + self.url, + data={ + total_checksum: { + 'name': 'test.sym', + 'chunks': [ + blob1.checksum + ] + } + }, + HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token) + ) + + assert response.status_code == 200, response.content + assert response.data[total_checksum]['dif']['cpuName'] == 'x86_64' + + def test_dif_error_reponse(self): + sym_file = 'fail' + blob1 = FileBlob.from_file(ContentFile(sym_file)) + + total_checksum = sha1(sym_file).hexdigest() + + file = File.objects.create( + name='test.sym', + checksum=total_checksum, + type='chunked', + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} + ) + + file_blob_id_order = [blob1.id] + + assemble_dif( + project_id=self.project.id, + file_id=file.id, + file_blob_ids=file_blob_id_order, + checksum=total_checksum, + ) + + response = self.client.post( + self.url, + data={ + total_checksum: { + 'name': 'test.sym', + 'chunks': [ + blob1.checksum + ] + } + }, + HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token) + ) + + assert response.status_code == 200, response.content + assert response.data[total_checksum]['error'] == 'Invalid object file' diff --git a/tests/sentry/tasks/test_assemble.py b/tests/sentry/tasks/test_assemble.py index 28ca11b2261c27..5808ba05eec157 100644 --- a/tests/sentry/tasks/test_assemble.py +++ b/tests/sentry/tasks/test_assemble.py @@ -7,7 +7,7 @@ from sentry.testutils import TestCase from sentry.tasks.assemble import assemble_dif from sentry.models import FileBlob, File -from sentry.models.file import ChunkFileState +from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER class AssembleTest(TestCase): @@ -33,7 +33,7 @@ def test_wrong_dif(self): total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums - bolb1 = FileBlob.from_file(fileobj1) + blob1 = FileBlob.from_file(fileobj1) bolb3 = FileBlob.from_file(fileobj3) bolb2 = FileBlob.from_file(fileobj2) @@ -41,16 +41,16 @@ def test_wrong_dif(self): name='test', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) - file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id] + file_blob_id_order = [bolb2.id, blob1.id, bolb3.id] file = File.objects.create( name='test', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) assemble_dif( @@ -64,11 +64,11 @@ def test_wrong_dif(self): id=file.id, ).get() - assert file.headers.get('__state') == ChunkFileState.ERROR + assert file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR def test_dif(self): sym_file = self.load_fixture('crash.sym') - bolb1 = FileBlob.from_file(ContentFile(sym_file)) + blob1 = FileBlob.from_file(ContentFile(sym_file)) total_checksum = sha1(sym_file).hexdigest() @@ -76,10 +76,10 @@ def test_dif(self): name='test.sym', checksum=total_checksum, type='chunked', - headers={'__state': ChunkFileState.CREATED} + headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED} ) - file_blob_id_order = [bolb1.id] + file_blob_id_order = [blob1.id] assemble_dif( project_id=self.project.id,