From 3280e9ef457da29ebc4d667442b9dad716ed1347 Mon Sep 17 00:00:00 2001 From: Anton Ovchinnikov Date: Fri, 23 Nov 2018 17:08:55 +0100 Subject: [PATCH 1/3] ref(fileblob): Change naming convention for file blobs --- src/sentry/models/file.py | 14 +++++++------- tests/sentry/models/test_file.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/sentry/models/file.py b/src/sentry/models/file.py index 3f4860b6f7c01c..ff0a9edb7bcf18 100644 --- a/src/sentry/models/file.py +++ b/src/sentry/models/file.py @@ -58,7 +58,7 @@ def enum(**named_values): def _get_size_and_checksum(fileobj): size = 0 checksum = sha1() - while 1: + while True: chunk = fileobj.read(65536) if not chunk: break @@ -134,7 +134,7 @@ def from_files(cls, files, organization=None): def _upload_and_pend_chunk(fileobj, size, checksum, lock): blob = cls(size=size, checksum=checksum) - blob.path = cls.generate_unique_path(blob.timestamp) + blob.path = cls.generate_unique_path() storage = get_storage() storage.save(blob.path, fileobj) blobs_to_save.append((blob, lock)) @@ -156,7 +156,7 @@ def _save_blob(blob): _ensure_blob_owned(blob) def _flush_blobs(): - while 1: + while True: try: blob, lock = blobs_to_save.pop() except IndexError: @@ -229,7 +229,7 @@ def from_file(cls, fileobj): return existing blob = cls(size=size, checksum=checksum) - blob.path = cls.generate_unique_path(blob.timestamp) + blob.path = cls.generate_unique_path() storage = get_storage() storage.save(blob.path, fileobj) blob.save() @@ -238,9 +238,9 @@ def from_file(cls, fileobj): return blob @classmethod - def generate_unique_path(cls, timestamp): - pieces = [six.text_type(x) for x in divmod(int(timestamp.strftime('%s')), ONE_DAY)] - pieces.append(uuid4().hex) + def generate_unique_path(cls): + uuid_hex = uuid4().hex + pieces = [uuid_hex[:2], uuid_hex[2:6], uuid_hex[6:]] return u'/'.join(pieces) def delete(self, *args, **kwargs): diff --git a/tests/sentry/models/test_file.py b/tests/sentry/models/test_file.py index d26b7981be8d09..f7bf4d51a60e52 100644 --- a/tests/sentry/models/test_file.py +++ b/tests/sentry/models/test_file.py @@ -24,6 +24,18 @@ def test_from_file(self): assert my_file1.checksum == my_file2.checksum assert my_file1.path == my_file2.path + def test_generate_unique_path(self): + path = FileBlob.generate_unique_path() + assert path + + parts = path.split('/') + assert len(parts) == 3 + assert map(len, parts) == [2, 4, 26] + + # Check uniqueness + path2 = FileBlob.generate_unique_path() + assert path != path2 + class FileTest(TestCase): def test_file_handling(self): From db96c9fc809d46e0b6b187ca772502458c6e04c5 Mon Sep 17 00:00:00 2001 From: Anton Ovchinnikov Date: Mon, 26 Nov 2018 11:56:25 +0100 Subject: [PATCH 2/3] doc: Add a clarifying comment --- src/sentry/models/file.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sentry/models/file.py b/src/sentry/models/file.py index ff0a9edb7bcf18..3c3e9252f65e63 100644 --- a/src/sentry/models/file.py +++ b/src/sentry/models/file.py @@ -239,6 +239,8 @@ def from_file(cls, fileobj): @classmethod def generate_unique_path(cls): + # We intentionally do not use checksums as path names to avoid concurrency issues + # when we attempt concurrent uploads for any reason. uuid_hex = uuid4().hex pieces = [uuid_hex[:2], uuid_hex[2:6], uuid_hex[6:]] return u'/'.join(pieces) From 4100136e39e52a8880f199c1426df1c768eb9bfc Mon Sep 17 00:00:00 2001 From: Anton Ovchinnikov Date: Mon, 26 Nov 2018 14:18:14 +0100 Subject: [PATCH 3/3] ci: Kick bot