Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions src/sentry/api/bases/chunk.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import absolute_import

from sentry.models import File, FileBlob, FileBlobOwner
from sentry.models.file import ChunkFileState
from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER


class ChunkAssembleMixin(object):
Expand All @@ -14,7 +14,7 @@ def _create_file_response(self, state, missing_chunks=[]):
'missingChunks': missing_chunks
}

def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists):
def _check_chunk_ownership(self, organization, file_blobs, chunks, file=None):
# Check the ownership of these blobs with the org
all_owned_blobs = FileBlobOwner.objects.filter(
blob__in=file_blobs,
Expand All @@ -35,9 +35,9 @@ def _check_chunk_ownership(self, organization, file_blobs, chunks, file_exists):
# Only if this org already has the ownership of all blobs
# and the count of chunks is the same as in the request
# and the file already exists, we say this file is OK
elif len(file_blobs) == len(owned_blobs) == len(chunks) and file_exists:
elif len(file_blobs) == len(owned_blobs) == len(chunks) and file is not None:
return self._create_file_response(
ChunkFileState.OK
file.headers.get(CHUNK_STATE_HEADER, ChunkFileState.OK)
)
# If the length of owned and sent chunks is not the same
# we return all missing blobs
Expand Down Expand Up @@ -66,15 +66,18 @@ def _check_file_blobs(self, organization, checksum, chunks):
file_blobs = FileBlob.objects.filter(
checksum__in=chunks
).all()
return self._check_chunk_ownership(organization, file_blobs, chunks, False)
return (
None,
self._check_chunk_ownership(organization, file_blobs, chunks)
)
# It is possible to have multiple files in the db because
# we do not have a unique on the checksum
for file in files:
# We need to fetch all blobs
file_blobs = file.blobs.all()
rv = self._check_chunk_ownership(organization, file_blobs, chunks, True)
rv = self._check_chunk_ownership(organization, file_blobs, chunks, file)
if rv is not None:
return rv
return (file, rv)

def _create_file_for_assembling(self, name, checksum, chunks):
# If we have all chunks and the file wasn't found before
Expand All @@ -84,7 +87,7 @@ def _create_file_for_assembling(self, name, checksum, chunks):
name=name,
checksum=checksum,
type='chunked',
headers={'__state': ChunkFileState.CREATED}
headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
)

# Load all FileBlobs from db since we can be sure here we already own all
Expand Down
29 changes: 22 additions & 7 deletions src/sentry/api/endpoints/dif_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,28 @@
from rest_framework.response import Response

from sentry.utils import json
from sentry.api.serializers import serialize
from sentry.api.bases.chunk import ChunkAssembleMixin
from sentry.api.bases.project import ProjectEndpoint, ProjectReleasePermission
from sentry.models import File, ChunkFileState
from sentry.models import File, ChunkFileState, ProjectDSymFile


class DifAssembleEndpoint(ChunkAssembleMixin, ProjectEndpoint):
permission_classes = (ProjectReleasePermission, )

def _add_project_dsym_to_reponse(self, found_file, response):
if found_file is not None:
if found_file.headers.get('error', None) is not None:
response['error'] = found_file.headers.get('error')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just a minor consistency thing, but: What if there is an error header but the state is OK? In that case it is weird, that you're adding that field.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The database doesn't prevent this from happening, but there is no codepath setting OK and error. So when there is an error state is also an error.


dsym = ProjectDSymFile.objects.filter(
file=found_file
).first()
if dsym is not None:
response['dif'] = serialize(dsym)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should even be required if the state is OK and log an error if the ProjectDsymFile is missing since it indicates that something went wrong and we weren't able to write the correct state header.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't check this here, since the original chunked file will be deleted, only the new dsym file is in the database without the __state header at all.
So pretty much the API returns OK if there is no state header but a file with owned blobs.


return response

def post(self, request, project):
"""
Assmble one or multiple chunks (FileBlob) into dsym files
Expand Down Expand Up @@ -58,15 +72,16 @@ def post(self, request, project):
chunks = file_to_assemble.get('chunks', [])

try:
result = self._check_file_blobs(project.organization, checksum, chunks)
found_file, response = self._check_file_blobs(
project.organization, checksum, chunks)
# This either returns a file OK because we already own all chunks
# OR we return not_found with the missing chunks (or not owned)
if result is not None:
file_response[checksum] = result
if response is not None:
# We also found a file, we try to fetch project dsym to return more
# information in the request
file_response[checksum] = self._add_project_dsym_to_reponse(
found_file, response)
continue
except File.MultipleObjectsReturned:
return Response({'error': 'Duplicate checksum'},
status=400)
except File.DoesNotExist:
pass

Expand Down
3 changes: 2 additions & 1 deletion src/sentry/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
ONE_DAY = 60 * 60 * 24

DEFAULT_BLOB_SIZE = 1024 * 1024 # one mb
CHUNK_STATE_HEADER = '__state'


def enum(**named_values):
Expand Down Expand Up @@ -292,7 +293,7 @@ def assemble_from_file_blob_ids(self, file_blob_ids, checksum, commit=True):

if checksum != self.checksum:
self.headers['__state'] = ChunkFileState.ERROR
self.headers['error'] = 'invalid_checksum'
self.headers['error'] = 'Checksum missmatch between chunks and file'

metrics.timing('filestore.file-size', offset)
if commit:
Expand Down
18 changes: 11 additions & 7 deletions src/sentry/tasks/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@

@instrumented_task(name='sentry.tasks.assemble.assemble_dif', queue='assemble')
def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs):
from sentry.models import ChunkFileState, dsymfile, Project, CHUNK_STATE_HEADER
with transaction.atomic():

# Assemble the chunks into files
file = assemble_chunks(file_id, file_blob_ids, checksum)

from sentry.models import ChunkFileState, dsymfile, Project
# If an error happend during assembling, we early return here
if file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR:
return

project = Project.objects.filter(
id=project_id
Expand All @@ -38,7 +42,7 @@ def assemble_dif(project_id, file_id, file_blob_ids, checksum, **kwargs):
# We can delete the original chunk file since we created new dsym files
file.delete()
else:
file.headers['__state'] = ChunkFileState.ERROR
file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ERROR
file.headers['error'] = 'Invalid object file'
file.save()
logger.error(
Expand All @@ -57,25 +61,25 @@ def assemble_chunks(file_id, file_blob_ids, checksum, **kwargs):
'error': 'Empty file blobs'
})

from sentry.models import File, ChunkFileState
from sentry.models import File, ChunkFileState, CHUNK_STATE_HEADER

file = File.objects.filter(
id=file_id,
).get()

file.headers['__state'] = ChunkFileState.ASSEMBLING
file.headers[CHUNK_STATE_HEADER] = ChunkFileState.ASSEMBLING
# Do the actual assembling here

file.assemble_from_file_blob_ids(file_blob_ids, checksum)
if file.headers.get('__state', '') == ChunkFileState.ERROR:
if file.headers.get(CHUNK_STATE_HEADER, '') == ChunkFileState.ERROR:
logger.error(
'assemble_chunks.assemble_error',
extra={
'error': file.headers.get('error', ''),
'file_id': file.id
}
)
return
file.headers['__state'] = ChunkFileState.OK
return file
file.headers[CHUNK_STATE_HEADER] = ChunkFileState.OK
file.save()
return file
85 changes: 81 additions & 4 deletions tests/sentry/api/endpoints/test_dif_assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from django.core.files.base import ContentFile

from sentry.models import ApiToken, FileBlob, File, FileBlobIndex, FileBlobOwner
from sentry.models.file import ChunkFileState
from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER
from sentry.testutils import APITestCase
from sentry.tasks.assemble import assemble_dif


class DifAssembleEndpoint(APITestCase):
Expand Down Expand Up @@ -165,10 +166,10 @@ def test_assemble(self, mock_assemble_dif):
total_checksum = sha1(content2 + content1 + content3).hexdigest()

# The order here is on purpose because we check for the order of checksums
bolb1 = FileBlob.from_file(fileobj1)
blob1 = FileBlob.from_file(fileobj1)
FileBlobOwner.objects.get_or_create(
organization=self.organization,
blob=bolb1
blob=blob1
)
bolb3 = FileBlob.from_file(fileobj3)
FileBlobOwner.objects.get_or_create(
Expand Down Expand Up @@ -217,7 +218,7 @@ def test_assemble(self, mock_assemble_dif):
assert response.data[total_checksum]['state'] == ChunkFileState.CREATED
assert response.data[total_checksum]['missingChunks'] == []

file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id]
file_blob_id_order = [bolb2.id, blob1.id, bolb3.id]

mock_assemble_dif.apply_async.assert_called_once_with(
kwargs={
Expand All @@ -236,3 +237,79 @@ def test_assemble(self, mock_assemble_dif):

file_blob_index = FileBlobIndex.objects.all()
assert len(file_blob_index) == 3

def test_dif_reponse(self):
sym_file = self.load_fixture('crash.sym')
blob1 = FileBlob.from_file(ContentFile(sym_file))

total_checksum = sha1(sym_file).hexdigest()

file = File.objects.create(
name='test.sym',
checksum=total_checksum,
type='chunked',
headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
)

file_blob_id_order = [blob1.id]

assemble_dif(
project_id=self.project.id,
file_id=file.id,
file_blob_ids=file_blob_id_order,
checksum=total_checksum,
)

response = self.client.post(
self.url,
data={
total_checksum: {
'name': 'test.sym',
'chunks': [
blob1.checksum
]
}
},
HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token)
)

assert response.status_code == 200, response.content
assert response.data[total_checksum]['dif']['cpuName'] == 'x86_64'

def test_dif_error_reponse(self):
sym_file = 'fail'
blob1 = FileBlob.from_file(ContentFile(sym_file))

total_checksum = sha1(sym_file).hexdigest()

file = File.objects.create(
name='test.sym',
checksum=total_checksum,
type='chunked',
headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
)

file_blob_id_order = [blob1.id]

assemble_dif(
project_id=self.project.id,
file_id=file.id,
file_blob_ids=file_blob_id_order,
checksum=total_checksum,
)

response = self.client.post(
self.url,
data={
total_checksum: {
'name': 'test.sym',
'chunks': [
blob1.checksum
]
}
},
HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token)
)

assert response.status_code == 200, response.content
assert response.data[total_checksum]['error'] == 'Invalid object file'
18 changes: 9 additions & 9 deletions tests/sentry/tasks/test_assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sentry.testutils import TestCase
from sentry.tasks.assemble import assemble_dif
from sentry.models import FileBlob, File
from sentry.models.file import ChunkFileState
from sentry.models.file import ChunkFileState, CHUNK_STATE_HEADER


class AssembleTest(TestCase):
Expand All @@ -33,24 +33,24 @@ def test_wrong_dif(self):
total_checksum = sha1(content2 + content1 + content3).hexdigest()

# The order here is on purpose because we check for the order of checksums
bolb1 = FileBlob.from_file(fileobj1)
blob1 = FileBlob.from_file(fileobj1)
bolb3 = FileBlob.from_file(fileobj3)
bolb2 = FileBlob.from_file(fileobj2)

file = File.objects.create(
name='test',
checksum=total_checksum,
type='chunked',
headers={'__state': ChunkFileState.CREATED}
headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
)

file_blob_id_order = [bolb2.id, bolb1.id, bolb3.id]
file_blob_id_order = [bolb2.id, blob1.id, bolb3.id]

file = File.objects.create(
name='test',
checksum=total_checksum,
type='chunked',
headers={'__state': ChunkFileState.CREATED}
headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
)

assemble_dif(
Expand All @@ -64,22 +64,22 @@ def test_wrong_dif(self):
id=file.id,
).get()

assert file.headers.get('__state') == ChunkFileState.ERROR
assert file.headers.get(CHUNK_STATE_HEADER) == ChunkFileState.ERROR

def test_dif(self):
sym_file = self.load_fixture('crash.sym')
bolb1 = FileBlob.from_file(ContentFile(sym_file))
blob1 = FileBlob.from_file(ContentFile(sym_file))

total_checksum = sha1(sym_file).hexdigest()

file = File.objects.create(
name='test.sym',
checksum=total_checksum,
type='chunked',
headers={'__state': ChunkFileState.CREATED}
headers={CHUNK_STATE_HEADER: ChunkFileState.CREATED}
)

file_blob_id_order = [bolb1.id]
file_blob_id_order = [blob1.id]

assemble_dif(
project_id=self.project.id,
Expand Down