Skip to content

Commit

Permalink
Use 8MB as average block size.
Browse files Browse the repository at this point in the history
And be compatible with files chunked with the old block size.
  • Loading branch information
killing committed Jun 30, 2017
1 parent 0ea8158 commit 31dcd92
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 213 deletions.
9 changes: 9 additions & 0 deletions common/cdc/cdc.c
Expand Up @@ -22,6 +22,15 @@
#define finger rabin_checksum
#define rolling_finger rabin_rolling_checksum

#define BLOCK_SZ (1024*1024*1)
#define BLOCK_MIN_SZ (1024*256)
#define BLOCK_MAX_SZ (1024*1024*4)
#define BLOCK_WIN_SZ 48

#define NAME_MAX_SZ 4096

#define BREAK_VALUE 0x0013 ///0x0513

#define READ_SIZE 1024 * 4

#define BYTE_TO_HEX(b) (((b)>=10)?('a'+b-10):('0'+b))
Expand Down
9 changes: 0 additions & 9 deletions common/cdc/cdc.h
Expand Up @@ -6,15 +6,6 @@
#include <glib.h>
#include <stdint.h>

#define BLOCK_SZ (1024*1024*1)
#define BLOCK_MIN_SZ (1024*256)
#define BLOCK_MAX_SZ (1024*1024*4)
#define BLOCK_WIN_SZ 48

#define NAME_MAX_SZ 4096

#define BREAK_VALUE 0x0013 ///0x0513

#define CHECKSUM_LENGTH 20

#ifndef O_BINARY
Expand Down
183 changes: 3 additions & 180 deletions common/fs-mgr.c
Expand Up @@ -659,160 +659,6 @@ create_cdc_for_empty_file (CDCFileDescriptor *cdc)
memset (cdc, 0, sizeof(CDCFileDescriptor));
}

#if defined SEAFILE_SERVER && defined FULL_FEATURE

#define FIXED_BLOCK_SIZE (1<<20)

typedef struct ChunkingData {
const char *repo_id;
int version;
const char *file_path;
SeafileCrypt *crypt;
guint8 *blk_sha1s;
GAsyncQueue *finished_tasks;
} ChunkingData;

static void
chunking_worker (gpointer vdata, gpointer user_data)
{
ChunkingData *data = user_data;
CDCDescriptor *chunk = vdata;
int fd = -1;
ssize_t n;
int idx;

chunk->block_buf = g_new0 (char, chunk->len);
if (!chunk->block_buf) {
seaf_warning ("Failed to allow chunk buffer\n");
goto out;
}

fd = seaf_util_open (data->file_path, O_RDONLY | O_BINARY);
if (fd < 0) {
seaf_warning ("Failed to open %s: %s\n", data->file_path, strerror(errno));
chunk->result = -1;
goto out;
}

if (seaf_util_lseek (fd, chunk->offset, SEEK_SET) == (gint64)-1) {
seaf_warning ("Failed to lseek %s: %s\n", data->file_path, strerror(errno));
chunk->result = -1;
goto out;
}

n = readn (fd, chunk->block_buf, chunk->len);
if (n < 0) {
seaf_warning ("Failed to read chunk from %s: %s\n",
data->file_path, strerror(errno));
chunk->result = -1;
goto out;
}

chunk->result = seafile_write_chunk (data->repo_id, data->version,
chunk, data->crypt,
chunk->checksum, 1);
if (chunk->result < 0)
goto out;

idx = chunk->offset / FIXED_BLOCK_SIZE;
memcpy (data->blk_sha1s + idx * CHECKSUM_LENGTH, chunk->checksum, CHECKSUM_LENGTH);

out:
g_free (chunk->block_buf);
close (fd);
g_async_queue_push (data->finished_tasks, chunk);
}

static int
split_file_to_block (const char *repo_id,
int version,
const char *file_path,
gint64 file_size,
SeafileCrypt *crypt,
CDCFileDescriptor *cdc,
gboolean write_data)
{
int n_blocks;
uint8_t *block_sha1s = NULL;
GThreadPool *tpool = NULL;
GAsyncQueue *finished_tasks = NULL;
GList *pending_tasks = NULL;
int n_pending = 0;
CDCDescriptor *chunk;
int ret = 0;

n_blocks = (file_size + BLOCK_SZ - 1) / BLOCK_SZ;
block_sha1s = g_new0 (uint8_t, n_blocks * CHECKSUM_LENGTH);
if (!block_sha1s) {
seaf_warning ("Failed to allocate block_sha1s.\n");
ret = -1;
goto out;
}

finished_tasks = g_async_queue_new ();

ChunkingData data;
memset (&data, 0, sizeof(data));
data.repo_id = repo_id;
data.version = version;
data.file_path = file_path;
data.crypt = crypt;
data.blk_sha1s = block_sha1s;
data.finished_tasks = finished_tasks;

tpool = g_thread_pool_new (chunking_worker, &data,
seaf->http_server->max_indexing_threads, FALSE, NULL);
if (!tpool) {
seaf_warning ("Failed to allocate thread pool\n");
ret = -1;
goto out;
}

guint64 offset = 0;
guint64 len;
guint64 left = (guint64)file_size;
while (left > 0) {
len = ((left >= FIXED_BLOCK_SIZE) ? FIXED_BLOCK_SIZE : left);

chunk = g_new0 (CDCDescriptor, 1);
chunk->offset = offset;
chunk->len = (guint32)len;

g_thread_pool_push (tpool, chunk, NULL);
pending_tasks = g_list_prepend (pending_tasks, chunk);
n_pending++;

left -= len;
offset += len;
}

while ((chunk = g_async_queue_pop (finished_tasks)) != NULL) {
if (chunk->result < 0) {
ret = -1;
goto out;
}

if ((--n_pending) <= 0)
break;
}

cdc->block_nr = n_blocks;
cdc->blk_sha1s = block_sha1s;

out:
if (tpool)
g_thread_pool_free (tpool, TRUE, TRUE);
if (finished_tasks)
g_async_queue_unref (finished_tasks);
g_list_free_full (pending_tasks, g_free);
if (ret < 0)
g_free (block_sha1s);

return ret;
}

#endif /* SEAFILE_SERVER */

int
seaf_fs_manager_index_blocks (SeafFSManager *mgr,
const char *repo_id,
Expand Down Expand Up @@ -841,39 +687,16 @@ seaf_fs_manager_index_blocks (SeafFSManager *mgr,
} else {
memset (&cdc, 0, sizeof(cdc));

#if defined SEAFILE_SERVER && defined FULL_FEATURE
if (use_cdc || version == 0) {
cdc.block_sz = calculate_chunk_size (sb.st_size);
cdc.block_min_sz = cdc.block_sz >> 2;
cdc.block_max_sz = cdc.block_sz << 2;
cdc.write_block = seafile_write_chunk;
memcpy (cdc.repo_id, repo_id, 36);
cdc.version = version;
if (filename_chunk_cdc (file_path, &cdc, crypt, write_data) < 0) {
seaf_warning ("Failed to chunk file with CDC.\n");
return -1;
}
} else {
memcpy (cdc.repo_id, repo_id, 36);
cdc.version = version;
cdc.file_size = sb.st_size;
if (split_file_to_block (repo_id, version, file_path, sb.st_size,
crypt, &cdc, write_data) < 0) {
return -1;
}
}
#else
cdc.block_sz = calculate_chunk_size (sb.st_size);
cdc.block_min_sz = cdc.block_sz >> 2;
cdc.block_max_sz = cdc.block_sz << 2;
cdc.block_sz = CDC_AVERAGE_BLOCK_SIZE;
cdc.block_min_sz = CDC_MIN_BLOCK_SIZE;
cdc.block_max_sz = CDC_MAX_BLOCK_SIZE;
cdc.write_block = seafile_write_chunk;
memcpy (cdc.repo_id, repo_id, 36);
cdc.version = version;
if (filename_chunk_cdc (file_path, &cdc, crypt, write_data) < 0) {
seaf_warning ("Failed to chunk file with CDC.\n");
return -1;
}
#endif

if (write_data && write_seafile (mgr, repo_id, version, &cdc, sha1) < 0) {
g_free (cdc.blk_sha1s);
Expand Down
4 changes: 4 additions & 0 deletions common/fs-mgr.h
Expand Up @@ -15,6 +15,10 @@
#define CURRENT_DIR_OBJ_VERSION 1
#define CURRENT_SEAFILE_OBJ_VERSION 1

#define CDC_AVERAGE_BLOCK_SIZE (1 << 23) /* 8MB */
#define CDC_MIN_BLOCK_SIZE (6 * (1 << 20)) /* 6MB */
#define CDC_MAX_BLOCK_SIZE (10 * (1 << 20)) /* 10MB */

typedef struct _SeafFSManager SeafFSManager;
typedef struct _SeafFSObject SeafFSObject;
typedef struct _Seafile Seafile;
Expand Down
71 changes: 47 additions & 24 deletions daemon/vc-utils.c
Expand Up @@ -337,43 +337,66 @@ unlink_entry (struct cache_entry *ce, struct unpack_trees_options *o)
return 0;
}

static int
compute_file_id_with_cdc (const char *path, SeafStat *st,
SeafileCrypt *crypt, int repo_version,
uint32_t blk_avg_size, uint32_t blk_min_size, uint32_t blk_max_size,
unsigned char sha1[])
{
CDCFileDescriptor cdc;

memset (&cdc, 0, sizeof(cdc));
cdc.block_sz = blk_avg_size;
cdc.block_min_sz = blk_min_size;
cdc.block_max_sz = blk_max_size;
cdc.write_block = seafile_write_chunk;
if (filename_chunk_cdc (path, &cdc, crypt, FALSE) < 0) {
seaf_warning ("Failed to chunk file.\n");
return -1;
}

if (repo_version > 0)
seaf_fs_manager_calculate_seafile_id_json (repo_version, &cdc, sha1);
else
memcpy (sha1, cdc.file_sum, 20);

if (cdc.blk_sha1s)
free (cdc.blk_sha1s);

return 0;
}

int
compare_file_content (const char *path, SeafStat *st, const unsigned char *ce_sha1,
SeafileCrypt *crypt, int repo_version)
{
CDCFileDescriptor cdc;
unsigned char sha1[20];

if (st->st_size == 0) {
memset (sha1, 0, 20);
return hashcmp (sha1, ce_sha1);
} else {
memset (&cdc, 0, sizeof(cdc));
cdc.block_sz = calculate_chunk_size (st->st_size);
cdc.block_min_sz = cdc.block_sz >> 2;
cdc.block_max_sz = cdc.block_sz << 2;
cdc.write_block = seafile_write_chunk;
if (filename_chunk_cdc (path, &cdc, crypt, FALSE) < 0) {
seaf_warning ("Failed to chunk file.\n");
if (compute_file_id_with_cdc (path, st, crypt, repo_version,
CDC_AVERAGE_BLOCK_SIZE,
CDC_MIN_BLOCK_SIZE,
CDC_MAX_BLOCK_SIZE,
sha1) < 0) {
return -1;
}
if (hashcmp (sha1, ce_sha1) == 0)
return 0;

if (repo_version > 0)
seaf_fs_manager_calculate_seafile_id_json (repo_version, &cdc, sha1);
else
memcpy (sha1, cdc.file_sum, 20);

if (cdc.blk_sha1s)
free (cdc.blk_sha1s);
/* Compare with old cdc block size. */
uint32_t block_size = calculate_chunk_size (st->st_size);
if (compute_file_id_with_cdc (path, st, crypt, repo_version,
block_size,
block_size >> 2,
block_size << 2,
sha1) < 0) {
return -1;
}
return hashcmp (sha1, ce_sha1);
}

#if 0
char id1[41], id2[41];
rawdata_to_hex (sha1, id1, 20);
rawdata_to_hex (ce_sha1, id2, 20);
seaf_debug ("id1: %s, id2: %s.\n", id1, id2);
#endif

return hashcmp (sha1, ce_sha1);
}

#if defined WIN32 || defined __APPLE__
Expand Down

0 comments on commit 31dcd92

Please sign in to comment.