Skip to content

Commit

Permalink
[PATCH] Deltification library work by Nicolas Pitre.
Browse files Browse the repository at this point in the history
This patch adds the basic library functions to create and replay delta
information.  Also included is a test-delta utility to validate the
code.

diff-delta was based on LibXDiff written by Davide Libenzi

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Nicolas Pitre authored and Linus Torvalds committed May 19, 2005
1 parent 2194925 commit a310d43
Show file tree
Hide file tree
Showing 5 changed files with 511 additions and 2 deletions.
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ install: $(PROG) $(SCRIPTS)
$(INSTALL) $(PROG) $(SCRIPTS) $(dest)$(bin)

LIB_OBJS=read-cache.o sha1_file.o usage.o object.o commit.o tree.o blob.o \
tag.o date.o
tag.o date.o diff-delta.o patch-delta.o
LIB_FILE=libgit.a
LIB_H=cache.h object.h blob.h tree.h commit.h tag.h
LIB_H=cache.h object.h blob.h tree.h commit.h tag.h delta.h

LIB_H += strbuf.h
LIB_OBJS += strbuf.o
Expand Down Expand Up @@ -72,6 +72,9 @@ $(LIB_FILE): $(LIB_OBJS)
test-date: test-date.c date.o
$(CC) $(CFLAGS) -o $@ test-date.c date.o

test-delta: test-delta.c diff-delta.o patch-delta.o
$(CC) $(CFLAGS) -o $@ $^

git-%: %.c $(LIB_FILE)
$(CC) $(CFLAGS) -o $@ $(filter %.c,$^) $(LIBS)

Expand Down
6 changes: 6 additions & 0 deletions delta.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extern void *diff_delta(void *from_buf, unsigned long from_size,
void *to_buf, unsigned long to_size,
unsigned long *delta_size);
extern void *patch_delta(void *src_buf, unsigned long src_size,
void *delta_buf, unsigned long delta_size,
unsigned long *dst_size);
333 changes: 333 additions & 0 deletions diff-delta.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,333 @@
/*
* diff-delta.c: generate a delta between two buffers
*
* Many parts of this file have been lifted from LibXDiff version 0.10.
* http://www.xmailserver.org/xdiff-lib.html
*
* LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
* Copyright (C) 2003 Davide Libenzi
*
* Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
*
* This file is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Use of this within git automatically means that the LGPL
* licensing gets turned into GPLv2 within this project.
*/

#include <stdlib.h>
#include "delta.h"


/* block size: min = 16, max = 64k, power of 2 */
#define BLK_SIZE 16

#define MIN(a, b) ((a) < (b) ? (a) : (b))

#define GR_PRIME 0x9e370001
#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))

/* largest prime smaller than 65536 */
#define BASE 65521

/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
#define NMAX 5552

#define DO1(buf, i) { s1 += buf[i]; s2 += s1; }
#define DO2(buf, i) DO1(buf, i); DO1(buf, i + 1);
#define DO4(buf, i) DO2(buf, i); DO2(buf, i + 2);
#define DO8(buf, i) DO4(buf, i); DO4(buf, i + 4);
#define DO16(buf) DO8(buf, 0); DO8(buf, 8);

static unsigned int adler32(unsigned int adler, const unsigned char *buf, int len)
{
int k;
unsigned int s1 = adler & 0xffff;
unsigned int s2 = adler >> 16;

while (len > 0) {
k = MIN(len, NMAX);
len -= k;
while (k >= 16) {
DO16(buf);
buf += 16;
k -= 16;
}
if (k != 0)
do {
s1 += *buf++;
s2 += s1;
} while (--k);
s1 %= BASE;
s2 %= BASE;
}

return (s2 << 16) | s1;
}

static unsigned int hashbits(unsigned int size)
{
unsigned int val = 1, bits = 0;
while (val < size && bits < 32) {
val <<= 1;
bits++;
}
return bits ? bits: 1;
}

typedef struct s_chanode {
struct s_chanode *next;
int icurr;
} chanode_t;

typedef struct s_chastore {
chanode_t *head, *tail;
int isize, nsize;
chanode_t *ancur;
chanode_t *sncur;
int scurr;
} chastore_t;

static void cha_init(chastore_t *cha, int isize, int icount)
{
cha->head = cha->tail = NULL;
cha->isize = isize;
cha->nsize = icount * isize;
cha->ancur = cha->sncur = NULL;
cha->scurr = 0;
}

static void *cha_alloc(chastore_t *cha)
{
chanode_t *ancur;
void *data;

ancur = cha->ancur;
if (!ancur || ancur->icurr == cha->nsize) {
ancur = malloc(sizeof(chanode_t) + cha->nsize);
if (!ancur)
return NULL;
ancur->icurr = 0;
ancur->next = NULL;
if (cha->tail)
cha->tail->next = ancur;
if (!cha->head)
cha->head = ancur;
cha->tail = ancur;
cha->ancur = ancur;
}

data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
ancur->icurr += cha->isize;
return data;
}

static void cha_free(chastore_t *cha)
{
chanode_t *cur = cha->head;
while (cur) {
chanode_t *tmp = cur;
cur = cur->next;
free(tmp);
}
}

typedef struct s_bdrecord {
struct s_bdrecord *next;
unsigned int fp;
const unsigned char *ptr;
} bdrecord_t;

typedef struct s_bdfile {
const unsigned char *data, *top;
chastore_t cha;
unsigned int fphbits;
bdrecord_t **fphash;
} bdfile_t;

static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
{
unsigned int fphbits;
int i, hsize;
const unsigned char *base, *data, *top;
bdrecord_t *brec;
bdrecord_t **fphash;

fphbits = hashbits(bufsize / BLK_SIZE + 1);
hsize = 1 << fphbits;
fphash = malloc(hsize * sizeof(bdrecord_t *));
if (!fphash)
return -1;
for (i = 0; i < hsize; i++)
fphash[i] = NULL;
cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);

bdf->data = data = base = buf;
bdf->top = top = buf + bufsize;
data += (bufsize / BLK_SIZE) * BLK_SIZE;
if (data == top)
data -= BLK_SIZE;

for ( ; data >= base; data -= BLK_SIZE) {
brec = cha_alloc(&bdf->cha);
if (!brec) {
cha_free(&bdf->cha);
free(fphash);
return -1;
}
brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
brec->ptr = data;
i = HASH(brec->fp, fphbits);
brec->next = fphash[i];
fphash[i] = brec;
}

bdf->fphbits = fphbits;
bdf->fphash = fphash;

return 0;
}

static void delta_cleanup(bdfile_t *bdf)
{
free(bdf->fphash);
cha_free(&bdf->cha);
}

#define COPYOP_SIZE(o, s) \
(!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
!!(s & 0xff) + !!(s & 0xff00) + 1)

void *diff_delta(void *from_buf, unsigned long from_size,
void *to_buf, unsigned long to_size,
unsigned long *delta_size)
{
int i, outpos, outsize, inscnt, csize, msize, moff;
unsigned int fp;
const unsigned char *data, *top, *ptr1, *ptr2;
unsigned char *out, *orig;
bdrecord_t *brec;
bdfile_t bdf;

if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
return NULL;

outpos = 0;
outsize = 8192;
out = malloc(outsize);
if (!out) {
delta_cleanup(&bdf);
return NULL;
}

data = to_buf;
top = to_buf + to_size;

/* store reference buffer size */
orig = out + outpos++;
*orig = i = 0;
do {
if (from_size & 0xff) {
*orig |= (1 << i);
out[outpos++] = from_size;
}
i++;
from_size >>= 8;
} while (from_size);

/* store target buffer size */
orig = out + outpos++;
*orig = i = 0;
do {
if (to_size & 0xff) {
*orig |= (1 << i);
out[outpos++] = to_size;
}
i++;
to_size >>= 8;
} while (to_size);

inscnt = 0;
moff = 0;
while (data < top) {
msize = 0;
fp = adler32(0, data, MIN(top - data, BLK_SIZE));
i = HASH(fp, bdf.fphbits);
for (brec = bdf.fphash[i]; brec; brec = brec->next) {
if (brec->fp == fp) {
csize = bdf.top - brec->ptr;
if (csize > top - data)
csize = top - data;
for (ptr1 = brec->ptr, ptr2 = data;
csize && *ptr1 == *ptr2;
csize--, ptr1++, ptr2++);

csize = ptr1 - brec->ptr;
if (csize > msize) {
moff = brec->ptr - bdf.data;
msize = csize;
if (msize >= 0x10000) {
msize = 0x10000;
break;
}
}
}
}

if (!msize || msize < COPYOP_SIZE(moff, msize)) {
if (!inscnt)
outpos++;
out[outpos++] = *data++;
inscnt++;
if (inscnt == 0x7f) {
out[outpos - inscnt - 1] = inscnt;
inscnt = 0;
}
} else {
if (inscnt) {
out[outpos - inscnt - 1] = inscnt;
inscnt = 0;
}

data += msize;
orig = out + outpos++;
i = 0x80;

if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
moff >>= 8;
if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
moff >>= 8;
if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
moff >>= 8;
if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }

if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
msize >>= 8;
if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }

*orig = i;
}

/* next time around the largest possible output is 1 + 4 + 3 */
if (outpos > outsize - 8) {
void *tmp = out;
outsize = outsize * 3 / 2;
out = realloc(out, outsize);
if (!out) {
free(tmp);
delta_cleanup(&bdf);
return NULL;
}
}
}

if (inscnt)
out[outpos - inscnt - 1] = inscnt;

delta_cleanup(&bdf);
*delta_size = outpos;
return out;
}
Loading

0 comments on commit a310d43

Please sign in to comment.