getsentry · HazAT · Feb 9, 2018 · Feb 9, 2018 · Feb 9, 2018 · Feb 9, 2018
diff --git a/src/sentry/api/bases/chunk.py b/src/sentry/api/bases/chunk.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import
 
 from sentry.models import File, FileBlob, FileBlobOwner
-from sentry.models.file import ChunkFileState
+from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER
 
 
 class ChunkAssembleMixin(object):
@@ -14,7 +14,7 @@ def _create_file_response(self, state, missing_chunks=[]):
             'missingChunks': missing_chunks
         }
 
-    def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists):
+    def _check_chunk_ownership(self, organization, file_blobs, chunks, file=None):
         # Check the ownership of these blobs with the org
         all_owned_blobs = FileBlobOwner.objects.filter(
             blob__in=file_blobs,
@@ -35,9 +35,9 @@ def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists):
         # Only if this org already has the ownership of all blobs
         # and the count of chunks is the same as in the request
         # and the file already exists, we say this file is OK
-        elif len(file_blobs) == len(owned_blobs) == len(chunks) and file_exists:
+        elif len(file_blobs) == len(owned_blobs) == len(chunks) and file is not None:
             return self._create_file_response(
-                ChunkFileState.OK
+                file.headers.get(CHUNK_STATE_HEADER, ChunkFileState.OK)
             )
         # If the length of owned and sent chunks is not the same
         # we return all missing blobs
@@ -66,15 +66,18 @@ def _check_file_blobs(self, organization, checksum, chunks):
             file_blobs = FileBlob.objects.filter(
                 checksum__in=chunks
             ).all()
-            return self._check_chunk_ownership(organization, file_blobs, chunks, False)
+            return (
+                None,
+                self._check_chunk_ownership(organization, file_blobs, chunks)
+            )
         # It is possible to have multiple files in the db because
         # we do not have a unique on the checksum
         for file in files:
             # We need to fetch all blobs
             file_blobs = file.blobs.all()
-            rv = self._check_chunk_ownership(organization, file_blobs, chunks, True)
+            rv = self._check_chunk_ownership(organization, file_blobs, chunks, file)
             if rv is not None:
-                return rv
+                return (file, rv)
 
     def _create_file_for_assembling(self, name, checksum, chunks):
         # If we have all chunks and the file wasn't found before
@@ -84,7 +87,7 @@ def _create_file_for_assembling(self, name, checksum, chunks):
             name=name,
             checksum=checksum,
             type='chunked',
-            headers={'__state': ChunkFileState.CREATED}
+            headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
         )
 
         # Load all FileBlobs from db since we can be sure here we already own all

@@ -6,14 +6,28 @@
 from rest_framework.response import Response
 
 from sentry.utils import json
+from sentry.api.serializers import serialize
 from sentry.api.bases.chunk import ChunkAssembleMixin
 from sentry.api.bases.project import ProjectEndpoint, ProjectReleasePermission
-from sentry.models import File, ChunkFileState
+from sentry.models import File, ChunkFileState, ProjectDSymFile
 
 
 class DifAssembleEndpoint(ChunkAssembleMixin, ProjectEndpoint):
     permission_classes = (ProjectReleasePermission, )
 
+    def _add_project_dsym_to_reponse(self, found_file, response):
+        if found_file is not None:
+            if found_file.headers.get('error', None) is not None:
+                response['error'] = found_file.headers.get('error')
+
+            dsym = ProjectDSymFile.objects.filter(
+                file=found_file
+            ).first()
+            if dsym is not None:
+                response['dif'] = serialize(dsym)
+
+        return response
+
     def post(self, request, project):
         """
         Assmble one or multiple chunks (FileBlob) into dsym files
@@ -58,15 +72,16 @@ def post(self, request, project):
             chunks = file_to_assemble.get('chunks', [])
 
             try:
-                result = self._check_file_blobs(project.organization, checksum, chunks)
+                found_file, response = self._check_file_blobs(
+                    project.organization, checksum, chunks)
                 # This either returns a file OK because we already own all chunks
                 # OR we return not_found with the missing chunks (or not owned)
-                if result is not None:
-                    file_response[checksum] = result
+                if response is not None:
+                    # We also found a file, we try to fetch project dsym to return more
+                    # information in the request
+                    file_response[checksum] = self._add_project_dsym_to_reponse(
+                        found_file, response)
                     continue
-            except File.MultipleObjectsReturned:
-                return Response({'error': 'Duplicate checksum'},
-                                status=400)
             except File.DoesNotExist:
                 pass
 

diff --git a/src/sentry/models/file.py b/src/sentry/models/file.py
@@ -33,6 +33,7 @@
 ONE_DAY = 60 * 60 * 24
 
 DEFAULT_BLOB_SIZE = 1024 * 1024  # one mb
+CHUNK_STATE_HEADER = '__state'
 
 
 def enum(**named_values):
@@ -292,7 +293,7 @@ def assemble_from_file_blob_ids(self, file_blob_ids, checksum, commit=True):
 
         if checksum != self.checksum:
             self.headers['__state'] = ChunkFileState.ERROR
-            self.headers['error'] = 'invalid_checksum'
+            self.headers['error'] = 'Checksum missmatch between chunks and file'
 
         metrics.timing('filestore.file-size', offset)
         if commit:

@@ -11,11 +11,15 @@
 
 @instrumented_task(name='sentry.tasks.assemble.assemble_dif', queue='assemble')
 def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs):
+    from sentry.models import ChunkFileState, dsymfile, Project, CHUNK_STATE_HEADER
     with transaction.atomic():
+
         # Assemble the chunks into files
         file = assemble_chunks(file_id, file_blob_ids, checksum)
 
-        from sentry.models import ChunkFileState, dsymfile, Project
+        # If an error happend during assembling, we early return here
+        if file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR:
+            return
 
         project = Project.objects.filter(
             id=project_id
@@ -38,7 +42,7 @@ def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs):
                 # We can delete the original chunk file since we created new dsym files
                 file.delete()
             else:
-                file.headers['__state'] = ChunkFileState.ERROR
+                file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ERROR
                 file.headers['error'] = 'Invalid object file'
                 file.save()
                 logger.error(
@@ -57,25 +61,25 @@ def assemble_chunks(file_id, file_blob_ids, checksum, **kwargs):
             'error': 'Empty file blobs'
         })
 
-    from sentry.models import File, ChunkFileState
+    from sentry.models import File, ChunkFileState, CHUNK_STATE_HEADER
 
     file = File.objects.filter(
         id=file_id,
     ).get()
 
-    file.headers['__state'] = ChunkFileState.ASSEMBLING
+    file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ASSEMBLING
     # Do the actual assembling here
 
     file.assemble_from_file_blob_ids(file_blob_ids, checksum)
-    if file.headers.get('__state', '') == ChunkFileState.ERROR:
+    if file.headers.get(CHUNK_STATE_HEADER, '') == ChunkFileState.ERROR:
         logger.error(
             'assemble_chunks.assemble_error',
             extra={
                 'error': file.headers.get('error', ''),
                 'file_id': file.id
             }
         )
-        return
-    file.headers['__state'] = ChunkFileState.OK
+        return file
+    file.headers[CHUNK_STATE_HEADER] = ChunkFileState.OK
     file.save()
     return file
diff --git a/tests/sentry/api/endpoints/test_dif_assemble.py b/tests/sentry/api/endpoints/test_dif_assemble.py
@@ -7,8 +7,9 @@
 from django.core.files.base import ContentFile
 
 from sentry.models import ApiToken, FileBlob, File, FileBlobIndex, FileBlobOwner
-from sentry.models.file import ChunkFileState
+from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER
 from sentry.testutils import APITestCase
+from sentry.tasks.assemble import assemble_dif
 
 
 class DifAssembleEndpoint(APITestCase):
@@ -165,10 +166,10 @@ def test_assemble(self, mock_assemble_dif):
         total_checksum = sha1(content2 + content1 + content3).hexdigest()
 
         # The order here is on purpose because we check for the order of checksums
-        bolb1 = FileBlob.from_file(fileobj1)
+        blob1 = FileBlob.from_file(fileobj1)
         FileBlobOwner.objects.get_or_create(
             organization=self.organization,
-            blob=bolb1
+            blob=blob1
         )
         bolb3 = FileBlob.from_file(fileobj3)
         FileBlobOwner.objects.get_or_create(
@@ -217,7 +218,7 @@ def test_assemble(self, mock_assemble_dif):
         assert response.data[total_checksum]['state'] == ChunkFileState.CREATED
         assert response.data[total_checksum]['missingChunks'] == []
 
-        file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id]
+        file_blob_id_order = [bolb2.id, blob1.id, bolb3.id]
 
         mock_assemble_dif.apply_async.assert_called_once_with(
             kwargs={
@@ -236,3 +237,79 @@ def test_assemble(self, mock_assemble_dif):
 
         file_blob_index = FileBlobIndex.objects.all()
         assert len(file_blob_index) == 3
+
+    def test_dif_reponse(self):
+        sym_file = self.load_fixture('crash.sym')
+        blob1 = FileBlob.from_file(ContentFile(sym_file))
+
+        total_checksum = sha1(sym_file).hexdigest()
+
+        file = File.objects.create(
+            name='test.sym',
+            checksum=total_checksum,
+            type='chunked',
+            headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
+        )
+
+        file_blob_id_order = [blob1.id]
+
+        assemble_dif(
+            project_id=self.project.id,
+            file_id=file.id,
+            file_blob_ids=file_blob_id_order,
+            checksum=total_checksum,
+        )
+
+        response = self.client.post(
+            self.url,
+            data={
+                total_checksum: {
+                    'name': 'test.sym',
+                    'chunks': [
+                        blob1.checksum
+                    ]
+                }
+            },
+            HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token)
+        )
+
+        assert response.status_code == 200, response.content
+        assert response.data[total_checksum]['dif']['cpuName'] == 'x86_64'
+
+    def test_dif_error_reponse(self):
+        sym_file = 'fail'
+        blob1 = FileBlob.from_file(ContentFile(sym_file))
+
+        total_checksum = sha1(sym_file).hexdigest()
+
+        file = File.objects.create(
+            name='test.sym',
+            checksum=total_checksum,
+            type='chunked',
+            headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
+        )
+
+        file_blob_id_order = [blob1.id]
+
+        assemble_dif(
+            project_id=self.project.id,
+            file_id=file.id,
+            file_blob_ids=file_blob_id_order,
+            checksum=total_checksum,
+        )
+
+        response = self.client.post(
+            self.url,
+            data={
+                total_checksum: {
+                    'name': 'test.sym',
+                    'chunks': [
+                        blob1.checksum
+                    ]
+                }
+            },
+            HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token)
+        )
+
+        assert response.status_code == 200, response.content
+        assert response.data[total_checksum]['error'] == 'Invalid object file'
diff --git a/tests/sentry/tasks/test_assemble.py b/tests/sentry/tasks/test_assemble.py
@@ -7,7 +7,7 @@
 from sentry.testutils import TestCase
 from sentry.tasks.assemble import assemble_dif
 from sentry.models import FileBlob, File
-from sentry.models.file import ChunkFileState
+from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER
 
 
 class AssembleTest(TestCase):
@@ -33,24 +33,24 @@ def test_wrong_dif(self):
         total_checksum = sha1(content2 + content1 + content3).hexdigest()
 
         # The order here is on purpose because we check for the order of checksums
-        bolb1 = FileBlob.from_file(fileobj1)
+        blob1 = FileBlob.from_file(fileobj1)
         bolb3 = FileBlob.from_file(fileobj3)
         bolb2 = FileBlob.from_file(fileobj2)
 
         file = File.objects.create(
             name='test',
             checksum=total_checksum,
             type='chunked',
-            headers={'__state': ChunkFileState.CREATED}
+            headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
         )
 
-        file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id]
+        file_blob_id_order = [bolb2.id, blob1.id, bolb3.id]
 
         file = File.objects.create(
             name='test',
             checksum=total_checksum,
             type='chunked',
-            headers={'__state': ChunkFileState.CREATED}
+            headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
         )
 
         assemble_dif(
@@ -64,22 +64,22 @@ def test_wrong_dif(self):
             id=file.id,
         ).get()
 
-        assert file.headers.get('__state') == ChunkFileState.ERROR
+        assert file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR
 
     def test_dif(self):
         sym_file = self.load_fixture('crash.sym')
-        bolb1 = FileBlob.from_file(ContentFile(sym_file))
+        blob1 = FileBlob.from_file(ContentFile(sym_file))
 
         total_checksum = sha1(sym_file).hexdigest()
 
         file = File.objects.create(
             name='test.sym',
             checksum=total_checksum,
             type='chunked',
-            headers={'__state': ChunkFileState.CREATED}
+            headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
         )
 
-        file_blob_id_order = [bolb1.id]
+        file_blob_id_order = [blob1.id]
 
         assemble_dif(
             project_id=self.project.id,