Skip to content
This repository has been archived by the owner on Nov 22, 2022. It is now read-only.

Commit

Permalink
Don't mirror chunks that are all zeroes. (closes #1)
Browse files Browse the repository at this point in the history
  • Loading branch information
YoRyan committed Nov 13, 2019
1 parent 7246851 commit a2cc0bf
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 8 deletions.
32 changes: 24 additions & 8 deletions siaslice.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,14 +331,23 @@ async def read():
if eof:
break

md5_hash = await md5_hasher(
region_read(source_afp, pos, storage.block_size))
block_file = storage.block_files.get(index, None)
if (block_file is None or block_file.md5_hash != md5_hash
or block_file.partial or block_file.stalled):
data = lzma_compress(
region_read(source_afp, pos, storage.block_size))
await storage.upload(index, md5_hash, data, overwrite=True)
def region_agen(): return region_read(source_afp, pos,
storage.block_size)
if is_zeroes(region_agen()):
try:
await storage.delete(index)
except FileNotFoundError:
pass
else:
md5_hash = await md5_hasher(region_agen())
block_file = storage.block_files.get(index, None)
if (block_file is None
or block_file.md5_hash != md5_hash
or block_file.partial
or block_file.stalled):
await storage.upload(
index, md5_hash, lzma_compress(region_agen()),
overwrite=True)

async def schedule_reads():
nonlocal status, current_index
Expand Down Expand Up @@ -412,6 +421,13 @@ async def region_read(afp, start, max_length, readsize=DEFAULT_BUFFER_SIZE):
break


async def is_zeroes(abytesgen):
async for chunk in abytesgen:
if chunk.count(0) != len(chunk):
return False
return True


async def md5_hasher(adata):
loop = asyncio.get_running_loop()
hasher = md5()
Expand Down
11 changes: 11 additions & 0 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ async def test_afp_generator(self):
in ss.region_read(afp, 0, 40*1000*1000)])
self.assertEqual(read, reference)

async def test_is_zeroes(self):
async with AIOFile('40MiBempty.img', mode='rb') as afp:
self.assertTrue(await ss.is_zeroes(ss.region_read(afp, 0, 40*1000*1000)))

async def test_is_not_zeroes(self):
async def agen(gen):
for x in gen:
yield x
chunks = [b'\0\0\0\0', b'\0\0\0\0', b'\0\0\0X', b'\0\0\0\0']
self.assertFalse(await ss.is_zeroes(agen(iter(chunks))))

async def test_md5_hasher(self):
async with AIOFile('40MiBempty.img', mode='rb') as afp:
reference = md5(await afp.read()).hexdigest()
Expand Down

0 comments on commit a2cc0bf

Please sign in to comment.