Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Initial BDelta project taken from BDelta version 0.1.0 + code style c…

…hanges.

git-svn-id: svn://deltup.org/bdelta@1 0b3c7260-62b7-4bd3-8502-661595b7e89c
  • Loading branch information...
commit 251c9189dce4218d62713ee48c513949ad238e59 0 parents
John Whitney authored
2  Changelog
@@ -0,0 +1,2 @@
+19 July 2003; John Whitney <jjw@linuxmail.org> :
+ Initial release of bdelta
12 Format
@@ -0,0 +1,12 @@
+(All numbers are stored in little-endian format)
+char[3] magic "BDT"
+unsigned short version;
+unsigned char intsize; (remaining ints are all unsigned and "intsize" bytes)
+unsigned file 1 size
+unsigned file 2 size
+unsigned number of matches
+for (number of matches) {
+ unsigned match relative location 1 (this value can represent a negative)
+ unsigned match relative location 2
+ unsigned match size
+}
4 README
@@ -0,0 +1,4 @@
+Author: John Whitney (jjw@deltup.org)
+License: GNU General Public License v2
+
+ BDelta is an advanced delta creator, patcher, and library. This code is still beta quality, so don't expect too much! It might not even produce correct deltas in some cases. You can find the output format in file "Format"
19 src/Makefile
@@ -0,0 +1,19 @@
+DESTDIR=
+PREFIX=/usr
+BINDIR=$(PREFIX)/bin
+LIBDIR=${PREFIX}/lib
+CXXFLAGS=-O2
+
+all: bpatch libbdelta.so bdelta
+libbdelta.so: libbdelta.cpp container.h
+ ${CC} -shared -fPIC ${CXXFLAGS} $< -o $@
+bdelta: bdelta.cpp container.h file.h
+ ${CC} $< -o $@ ${CXXFLAGS} -L. -lbdelta -lstdc++
+bpatch: bpatch.cpp file.h
+% : %.cpp
+ ${CC} $< -o $@ ${CXXFLAGS} -lstdc++
+install: libbdelta.so bdelta bpatch
+ mkdir -p $(DESTDIR)$(BINDIR)
+ install -m 755 libbdelta.so $(DESTDIR)$(LIBDIR)
+ install -m 755 bdelta $(DESTDIR)$(BINDIR)
+ install -m 755 bpatch $(DESTDIR)$(BINDIR)
152 src/bdelta.cpp
@@ -0,0 +1,152 @@
+/* Copyright (C) 2003-2008 John Whitney
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: John Whitney <jjw@deltup.org>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "bdelta.h"
+#include "file.h"
+
+const int BUFNUM=16;
+class Buffered_File {
+ FILE *f;
+ char *buf[BUFNUM];
+ unsigned bufplace[BUFNUM];
+ int bufsize;
+public:
+ Buffered_File(char *fname, unsigned maxread) {
+ f = fopen(fname, "rb");
+ bufsize=maxread;
+ for (int i = 0; i < BUFNUM; ++i) {
+ buf[i] = new char[bufsize];
+ bufplace[i] = 0;
+ }
+ }
+ ~Buffered_File() {
+ for (int i = 0; i < BUFNUM; ++i)
+ delete buf[i];
+ fclose(f);
+ }
+ void *read(unsigned place, unsigned num) {
+ for (int i = 0; i < BUFNUM; ++i)
+ if (bufplace[i] && place>bufplace[i] && place+num<bufplace[i]+bufsize)
+ return buf[i]+place-bufplace[i];
+ char *lastbuf = buf[BUFNUM-1];
+ for (int i = BUFNUM-1; i > 0; --i) {
+ buf[i] = buf[i-1];
+ bufplace[i] = bufplace[i-1];
+ }
+ buf[0]=lastbuf;
+ bufplace[0]=place;
+ fseek(f, place, SEEK_SET);
+ fread(buf[0], 1, bufsize, f);
+ return buf[0];
+ }
+};
+
+Buffered_File *f1, *f2;
+
+void *f1_read(unsigned place, unsigned num) {
+ return f1->read(place, num);
+}
+void *f2_read(unsigned place, unsigned num) {
+ return f2->read(place, num);
+}
+
+int main(int argc, char **argv) {
+ if (argc!=4) {
+ printf("needs two files to compare + output file:\n");
+ printf("delta oldfile newfile patchfile\n");
+ exit(1);
+ }
+ if (!fileExists(argv[1]) || !fileExists(argv[2])) {
+ printf("one of the input files does not exist\n");
+ exit(1);
+ }
+ unsigned size = getLenOfFile(argv[1]);
+ unsigned size2 = getLenOfFile(argv[2]);
+ f1 = new Buffered_File(argv[1], 4096);
+ f2 = new Buffered_File(argv[2], 4096);
+
+ void *b = bdelta_init_alg(size, size2, f1_read, f2_read);
+ int nummatches;
+ for (int i = 512; i >= 16; i/=2)
+ nummatches = bdelta_pass(b, i);
+
+ unsigned copyloc1[nummatches+1];
+ unsigned copyloc2[nummatches+1];
+ unsigned copynum[nummatches+1];
+
+ FILE *fout = fopen(argv[3], "wb");
+ if (!fout) {
+ printf("couldn't open output file\n");
+ exit(1);
+ }
+
+ char *magic = "BDT";
+ fwrite(magic, 1, 3, fout);
+ unsigned short version = 1;
+ write_word(fout, version);
+ unsigned char intsize = 4;
+ fwrite(&intsize, 1, 1, fout);
+ write_dword(fout, size);
+ write_dword(fout, size2);
+ write_dword(fout, nummatches);
+
+ unsigned lastp1 = 0,
+ lastp2 = 0;
+ for (int i = 0; i < nummatches; ++i) {
+ unsigned p1, p2, num;
+ bdelta_getMatch(b, i, &p1, &p2, &num);
+ // printf("%*x, %*x, %*x, %*x\n", 10, p1, 10, p2, 10, num, 10, p2-lastp2);
+ copyloc1[i] = p1-lastp1;
+ write_dword(fout, copyloc1[i]);
+ copyloc2[i] = p2-lastp2;
+ write_dword(fout, copyloc2[i]);
+ copynum[i] = num;
+ write_dword(fout, copynum[i]);
+ lastp1=p1+num;
+ lastp2=p2+num;
+ }
+ if (size2!=lastp2) {
+ copyloc1[nummatches]=0; copynum[nummatches]=0;
+ copyloc2[nummatches]=size2-lastp2;
+ ++nummatches;
+ }
+
+// write_unsigned_list(adds, nummatches+1, fout);
+// write_unsigned_list(copynum, nummatches, fout);
+// write_signed_list(copyloc, nummatches, fout);
+ unsigned fp = 0;
+ for (int i = 0; i < nummatches; ++i) {
+ unsigned num = copyloc2[i];
+ while (num>0) {
+ unsigned towrite = num;
+ if (towrite>4096) towrite=4096;
+ void *buf = f2->read(fp, towrite);
+ fwrite(buf, 1, towrite, fout);
+ num-=towrite;
+ fp+=towrite;
+ }
+ // fp+=copyloc2[i];
+ if (i!=nummatches) fp+=copynum[i];
+ }
+
+ fclose(fout);
+
+ bdelta_done_alg(b);
+
+ delete f1;
+ delete f2;
+}
36 src/bdelta.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2003-2008 John Whitney
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: John Whitney <jjw@deltup.org>
+ */
+
+// bdelta uses two callback functions to supply it with the
+// data to be compared, callback functions must keep data
+// incorrupt until two more calls for data
+typedef void *(*bdelta_readCallback)(unsigned place, unsigned num);
+
+void *bdelta_init_alg(unsigned f1_size, unsigned f2_size,
+ bdelta_readCallback f1, bdelta_readCallback f2);
+void bdelta_done_alg(void *instance);
+
+//returns the total number of matches found
+unsigned bdelta_pass(void *instance, unsigned blocksize);
+
+void bdelta_getMatch(void *instance, unsigned matchNum,
+ unsigned *p1, unsigned *p2, unsigned *num);
+
+int bdelta_getError(void *instance);
+
+const int
+ BDELTA_OK = 0,
+ BDELTA_MEM_ERROR = -1,
+ BDELTA_READ_ERROR = -2;
105 src/bpatch.cpp
@@ -0,0 +1,105 @@
+/* Copyright (C) 2003-2008 John Whitney
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: John Whitney <jjw@deltup.org>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "file.h"
+
+bool copy_bytes_to_file(FILE *infile, FILE *outfile, unsigned numleft) {
+ size_t numread;
+ do {
+ char buf[1024];
+ numread = fread(buf, 1, numleft>1024?1024:numleft, infile);
+ if (fwrite(buf, 1, numread, outfile) != numread) {
+ printf("Could not write temporary data. Possibly out of space\n");
+ return false;
+ }
+ numleft-=numread;
+ } while (numleft && !(numread < 1024 && numleft));
+ return (numleft==0);
+}
+
+int main(int argc, char **argv) {
+ if (argc!=4) {
+ printf("needs a reference file, file to output, and patchfile:\n");
+ printf("delta oldfile newfile patchfile\n");
+ return 1;
+ }
+
+ if (!fileExists(argv[1]) || !fileExists(argv[3])) {
+ printf("one of the input files does not exist\n");
+ return 1;
+ }
+
+ FILE *patchfile = fopen(argv[3], "rb");
+ char magic[3];
+ fread(magic, 1, 3, patchfile);
+ if (strncmp(magic, "BDT", 3)) {
+ printf("Given file is not a recognized patchfile\n");
+ return 1;
+ }
+ unsigned short version = read_word(patchfile);
+ if (version!=1) {
+ printf("unsupported patch version\n");
+ return 1;
+ }
+ char intsize;
+ fread(&intsize, 1, 1, patchfile);
+ if (intsize!=4) {
+ printf("unsupported file pointer size\n");
+ return 1;
+ }
+ unsigned size1 = read_dword(patchfile),
+ size2 = read_dword(patchfile);
+
+ unsigned nummatches = read_dword(patchfile);
+
+ unsigned
+ *copyloc1 = new unsigned[nummatches+1],
+ *copyloc2 = new unsigned[nummatches+1],
+ *copynum = new unsigned[nummatches+1];
+
+ for (int i = 0; i < nummatches; ++i) {
+ copyloc1[i] = read_dword(patchfile);
+ copyloc2[i] = read_dword(patchfile);
+ copynum[i] = read_dword(patchfile);
+ size2-=copyloc2[i]+copynum[i];
+ }
+ if (size2) {
+ copyloc1[nummatches]=0; copynum[nummatches]=0;
+ copyloc2[nummatches]=size2;
+ ++nummatches;
+ }
+
+ FILE *ref = fopen(argv[1], "rb");
+ FILE *outfile = fopen(argv[2], "wb");
+
+ char *buf;
+ for (int i = 0; i < nummatches; ++i) {
+ if (!copy_bytes_to_file(patchfile, outfile, copyloc2[i])) {
+ printf("Error. patchfile is truncated\n");
+ return -1;
+ }
+
+ int copyloc = copyloc1[i];
+ fseek(ref, copyloc, SEEK_CUR);
+
+ if (!copy_bytes_to_file(ref, outfile, copynum[i])) {
+ printf("Error while copying from reference file\n");
+ return -1;
+ }
+ }
+ return 0;
+}
82 src/container.h
@@ -0,0 +1,82 @@
+/* Copyright (C) 2003-2008 John Whitney
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: John Whitney <jjw@deltup.org>
+ */
+
+/*
+struct Stack_node {
+ Stack_node(void *datA, Stack_node *nexT) {data = datA; next = nexT;}
+ void *data;
+ Stack_node *next;
+};
+
+template <class T>
+class Stack {
+ Stack_node *s;
+public:
+ Stack() {s = 0;}
+ void push(T* t) {s = new Stack_node(t, s);}
+ void pop() {Stack_node *old = s; s = s->next; delete old;}
+ T& top() {return *((T*)s->data);}
+ bool empty() {return (s==0);}
+ void clear() {while (!empty()) pop();}
+};
+*/
+
+template <class T>
+struct DLink {
+ T *obj;
+ DLink *prev, *next;
+ DLink(T *obJ, DLink *preV, DLink *nexT) : obj(obJ), prev(preV), next(nexT) {
+ if (preV) preV->next = this;
+ if (nexT) nexT->prev = this;
+ }
+ void erase() {
+ if (prev) prev->next = next;
+ if (next) next->prev = prev;
+ delete this;
+ }
+};
+
+template <class T>
+struct DList {
+ DLink<T> *first, *last;
+ DList() : first(0), last(0) {}
+ DLink<T> *find_first(T *o) {
+ for (DLink<T> *i = first; i; i=i->next)
+ if (i->obj==o) return i;
+ return 0;
+ }
+
+ int size() {int j = 0; for (DLink<T> *i = first; i; i=i->next) ++j; return j;}
+ DLink<T> *insert(T *o, DLink<T> *prev, DLink<T> *next);
+ void push_front(T *o) {insert(o, 0, first);}
+ void push_back(T *o) {insert(o, last, 0);}
+ void erase(DLink<T> *o);
+ bool empty() {return !first;}
+};
+
+template <class T>
+DLink<T> *DList<T>::insert(T *o, DLink<T> *prev, DLink<T> *next) {
+ DLink<T> *newobj = new DLink<T>(o, prev, next);
+ if (prev == last) last = newobj;
+ if (next == first) first = newobj;
+ return newobj;
+}
+
+template <class T>
+void DList<T>::erase(DLink<T> *obj) {
+ if (obj==first) first=obj->next;
+ if (obj==last) last=obj->prev;
+ obj->erase();
+}
53 src/file.h
@@ -0,0 +1,53 @@
+/* Copyright (C) 2003-2008 John Whitney
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: John Whitney <jjw@deltup.org>
+ */
+
+unsigned read_word(FILE *f) {
+ unsigned char b, b2;
+ fread(&b, 1, 1, f);
+ fread(&b2, 1, 1, f);
+ return (b2<<8)+b;
+}
+
+unsigned read_dword(FILE *f) {
+ unsigned low = read_word(f);
+ return (read_word(f)<<16)+low;
+}
+
+void write_word(FILE *f, unsigned number) {
+ unsigned char b = number&255;
+ fwrite(&b, 1, 1, f);
+ b = number>>8;
+ fwrite(&b, 1, 1, f);
+}
+
+void write_dword(FILE *f, unsigned number) {
+ write_word(f, number&65535);
+ write_word(f, number>>16);
+}
+
+bool fileExists(char *fname) {
+ FILE *f = fopen(fname, "rb");
+ bool exists = (f!=NULL);
+ if (exists) fclose(f);
+ return exists;
+}
+
+unsigned getLenOfFile(char *fname) {
+ FILE *f = fopen(fname, "rb");
+ fseek(f, 0, SEEK_END);
+ unsigned len = ftell(f);
+ fclose(f);
+ return len;
+}
394 src/libbdelta.cpp
@@ -0,0 +1,394 @@
+/* Copyright (C) 2003-2008 John Whitney
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: John Whitney <jjw@deltup.org>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "container.h"
+#include "bdelta.h"
+const bool verbose = false;
+typedef unsigned char byte;
+typedef unsigned long long Checksum;
+struct checksum_entry {
+ Checksum cksum; //Rolling checksums
+ unsigned loc;
+ int next;
+};
+
+struct Range {
+ unsigned p, num;
+ Range(unsigned p, unsigned num) {this->p=p; this->num=num;}
+ Range() {}
+};
+
+struct Match {
+ unsigned p1, p2, num;
+ Match(unsigned p1, unsigned p2, unsigned num)
+ {this->p1=p1; this->p2=p2; this->num=num;}
+};
+
+struct BDelta_Instance {
+ bdelta_readCallback f1, f2;
+ unsigned f1_size, f2_size;
+ DList<Match> matches;
+ DLink<Match> *accessplace;
+ int access_int;
+ int errorcode;
+};
+
+struct Checksums_Instance {
+ unsigned blocksize;
+ unsigned hashsize;
+ int *hash;
+ checksum_entry *hash_items;
+ unsigned numhashitems;
+};
+
+const unsigned multiplyamount = 181;
+
+unsigned match_buf_forward(void *buf1, void *buf2, unsigned num) {
+ unsigned i = 0;
+ while (i<num && (unsigned*)((char*)buf1+i)==(unsigned*)((char*)buf2+i)) i+=4;
+ while (i<num && ((byte*)buf1)[i]==((byte*)buf2)[i]) ++i;
+ return i;
+}
+unsigned match_buf_backward(void *buf1, void *buf2, unsigned num) {
+ int i = num;
+ do --i;
+ while (i>=0 && ((byte*)buf1)[i]==((byte*)buf2)[i]);
+ return num-i-1;
+}
+inline unsigned lesser(unsigned a, unsigned b) {return a<b?a:b;}
+unsigned match_forward(BDelta_Instance *b, unsigned p1, unsigned p2) {
+ unsigned num = 0, match, numtoread;
+ do {
+ numtoread=lesser(b->f1_size-p1, b->f2_size-p2);
+ if (numtoread>4096) numtoread=4096;
+ void *read1 = b->f1(p1, numtoread);
+ void *read2 = b->f2(p2, numtoread);
+ p1+=numtoread; p2+=numtoread;
+ match = match_buf_forward(read1, read2, numtoread);
+ num+=match;
+ } while (match && match==numtoread);
+ return num;
+}
+
+unsigned match_backward(BDelta_Instance *b, unsigned p1, unsigned p2, unsigned blocksize) {
+ unsigned num = 0, match, numtoread;
+ do {
+ numtoread = lesser(p1, p2);
+ if (numtoread > blocksize) numtoread = blocksize;
+ p1-=numtoread; p2-=numtoread;
+ void *read1 = b->f1(p1, numtoread);
+ void *read2 = b->f2(p2, numtoread);
+ match = match_buf_backward(read1, read2, numtoread);
+ num+=match;
+ } while (match && match==numtoread);
+ return num;
+}
+
+
+void calculate_block_checksum(byte *blockptr, unsigned blocksize,
+ unsigned &sum, Checksum &accum) {
+ sum = 0; accum = 0;
+ // Checksum rsum=0;
+ for (unsigned buf_loc = 0; buf_loc < blocksize; ++buf_loc) {
+ sum += blockptr[buf_loc];
+ accum *= multiplyamount;
+ accum += sum;
+ // rsum = (rsum<<shiftSize)|(rsum>>(32-shiftSize));
+ // rsum^=blockptr[buf_loc];
+ }
+}
+
+void addMatch(BDelta_Instance *b, unsigned p1, unsigned p2, unsigned num, DLink<Match> *&place) {
+ while (place && place->obj->p2>=p2) {
+ DLink<Match> *toerase = place;
+ place=place->prev;
+ b->matches.erase(toerase);
+ }
+ if (place && place->obj->p2+place->obj->num>p2)
+ place->obj->num=p2-place->obj->p2;
+ DLink<Match> *next = place?place->next:b->matches.first;
+ // if (next && p2>=next->obj->p2) {printf("Bad thing\n"); }// goto outofhere;
+ if (next && p2+num>next->obj->p2)
+ num=next->obj->p2-p2;
+ // printf("%i, %i, %i, %x, %x\n", p1, p2, num, place, next);
+ place = b->matches.insert(new Match(p1, p2, num), place, next);
+}
+
+//long long stata = 0, statb = 0;
+void findMatches(BDelta_Instance *b, Checksums_Instance *h, unsigned start, unsigned end,
+ DLink<Match> *place, Checksum oldcoefficient) {
+ byte *inbuf, *outbuf;
+ unsigned buf_loc;
+ const unsigned blocksize = h->blocksize;
+
+ unsigned sum;
+ Checksum accum;
+
+ const unsigned maxSectionMatches = 128;//16+b->f2_size/262140;
+ int checkMatches[maxSectionMatches];
+ int matchP2[maxSectionMatches];
+ int numcheckMatches;
+ int challengerP1, challengerP2 = end, challengerNum=0;
+ int j = start;
+ while (j < end) {
+ inbuf = (byte*)b->f2(j, blocksize);
+ calculate_block_checksum(inbuf, blocksize, sum, accum);
+ buf_loc=blocksize;
+ j+=blocksize;
+
+ numcheckMatches = 0;
+
+ unsigned endi = end;
+ int i;
+ for (i = j; i < endi; ++i) {
+ if (buf_loc==blocksize) {
+ buf_loc=0;
+ outbuf=inbuf;
+ inbuf=(byte*)b->f2(i, blocksize);
+ }
+ const Checksum ck = accum;
+ int c = h->hash[ck&(h->hashsize-1)];
+ if (c!=-1) {
+ // ++stata;
+ // if (c!=-1) {
+ int start = c;
+ do {
+ c=h->hash_items[c].next;
+ if (h->hash_items[c].cksum==ck) {
+ // printf("%i\n", numcheckMatches);
+ if (numcheckMatches>=maxSectionMatches) {
+ i = endi;
+ numcheckMatches=0;//printf("too many matches\n");
+ break;
+ }
+ matchP2[numcheckMatches] = i-blocksize;
+ checkMatches[numcheckMatches++] = c;
+ if (endi==end) endi = i+blocksize;
+ if (endi>end) endi=end;
+ }
+ } while (c!=start);
+ } //else ++statb;
+ const byte
+ oldbyte = outbuf[buf_loc],
+ newbyte = inbuf[buf_loc];
+ ++buf_loc;
+ accum -= oldcoefficient*oldbyte;
+ accum*=multiplyamount;
+ sum = sum - oldbyte + newbyte;
+ accum += sum;
+
+ // static int lastmark = 0;
+ // if (start==0 && end==size2 && i>lastmark*(size2/20)) {
+ // fprintf(stderr, "checkpoint %i\n", lastmark);
+ // lastmark++;
+ // }
+ }
+
+ j=i;
+ // again:
+ if (numcheckMatches) {
+ unsigned lastf1Place = place?place->obj->p1+place->obj->num:0;
+ int closestMatch=0;
+ for (int i = 1; i < numcheckMatches; ++i)
+ if (abs(lastf1Place-h->hash_items[checkMatches[i]].loc) <
+ abs(lastf1Place-h->hash_items[checkMatches[closestMatch]].loc))
+ closestMatch=i;
+
+ unsigned p1 = h->hash_items[checkMatches[closestMatch]].loc, p2 = matchP2[closestMatch];
+ unsigned fnum = match_forward(b, p1, p2);
+ // if (fnum<blocksize) falsematches++; else truematches++;
+
+ if (fnum >= blocksize) {
+ unsigned bnum = match_backward(b, p1, p2, blocksize);
+ unsigned num=fnum+bnum;
+ p1 -= bnum; p2 -= bnum;
+ addMatch(b, p1, p2, num, place);
+ j=p2+num;
+ }
+ }
+ }
+}
+
+// TODO: maybe make this function a member of Checksums_Instance?
+void add_cksum(BDelta_Instance *b, Checksums_Instance *h, unsigned place) {
+ const unsigned blocksize = h->blocksize;
+ byte *blockbuf = (byte*)b->f1(place, blocksize);
+ unsigned sum;
+ Checksum accum;
+ calculate_block_checksum(blockbuf, blocksize, sum, accum);
+ Checksum ck = accum;
+ h->hash_items[h->numhashitems].cksum = ck;
+ h->hash_items[h->numhashitems].loc = place;
+ if (h->hash[ck&(h->hashsize-1)] != -1
+ // && (hash[ck&(hashsize-1)]->cksum1!=c->cksum1
+ // || hash[ck&(hashsize-1)]->cksum2!=c->cksum2)
+ ) {
+ h->hash_items[h->numhashitems].next =
+ h->hash_items[h->hash[ck&(h->hashsize-1)]].next;
+ h->hash_items[h->hash[ck&(h->hashsize-1)]].next = h->numhashitems;
+ } else
+ h->hash_items[h->numhashitems].next = h->numhashitems;
+ h->hash[ck&(h->hashsize-1)] = h->numhashitems;
+ // if (i < 10000000) printf("%*llx, %*x\n", 18, ck, 10, i); else exit(1);
+ ++h->numhashitems;
+}
+
+int comparep1(const void *r1, const void *r2) {
+ if (((Range*)r1)->p < ((Range*)r2)->p) return -1;
+ return 1;
+}
+
+void *bdelta_init_alg(unsigned f1_size, unsigned f2_size,
+ bdelta_readCallback f1, bdelta_readCallback f2) {
+ BDelta_Instance *b = new BDelta_Instance;
+ if (!b) return 0;
+ b->f1_size=f1_size;
+ b->f2_size=f2_size;
+ b->f1=f1;
+ b->f2=f2;
+ b->access_int=-1;
+ return b;
+}
+
+void bdelta_done_alg(void *instance) {
+ BDelta_Instance *b = (BDelta_Instance*)instance;
+ while (!b->matches.empty()) {
+ delete b->matches.first->obj;
+ b->matches.erase(b->matches.first);
+ }
+ delete b;
+}
+
+unsigned bdelta_pass(void *instance, unsigned blocksize) {
+ if (verbose) printf("Organizing leftover blocks\n");
+
+ Checksums_Instance h;
+ h.blocksize = blocksize;
+ BDelta_Instance *b = (BDelta_Instance*)instance;
+ b->access_int=-1;
+
+ Range *unused = new Range[b->matches.size() + 1];
+ if (!unused) {b->errorcode=BDELTA_MEM_ERROR; return 0;}
+ int numunused = 0;
+ for (DLink<Match> *l = b->matches.first; l; l=l->next)
+ unused[numunused++] = Range(l->obj->p1, l->obj->num);
+
+ qsort(unused, numunused, sizeof(Range), comparep1);
+/*
+ for (int i = 0; i < numunused; ++i)
+ for (int j = i+1; j < numunused; ++j)
+ if (unused[i].p > unused[j].p) {
+ Range temp = unused[i];
+ unused[i] = unused[j];
+ unused[j] = temp;
+ }
+*/
+
+ unsigned last = 0;
+ unsigned missing = 0;
+ for (unsigned i = 0; i < numunused; ++i) {
+ unsigned nextstart = unused[i].p + unused[i].num;
+ if (unused[i].p<=last)
+ ++missing;
+ else
+ unused[i-missing] = Range(last, unused[i].p-last);
+ last = nextstart;
+ }
+ numunused-=missing;
+ unused[numunused++] = Range(last, b->f1_size-last);
+
+
+
+ unsigned numblocks = 0;
+ for (unsigned i = 0; i < numunused; ++i) {
+ numblocks+=unused[i].num/blocksize;
+ }
+
+ if (verbose) printf("Starting search for matching blocks of size %i\n", blocksize);
+ // numblocks=size/blocksize;
+ if (verbose) printf("found %i blocks\n", numblocks);
+ h.hashsize = 1<<16;
+ while (h.hashsize<numblocks) h.hashsize<<=1;
+ // h.hashsize<<=2;
+ // hashsize>>=0;
+ if (verbose) printf("creating hash of size %i\n", h.hashsize);
+ h.hash = new int[h.hashsize];
+ if (!h.hash) {b->errorcode=BDELTA_MEM_ERROR; return 0;}
+ h.hash_items = new checksum_entry[numblocks];
+ if (!h.hash_items) {b->errorcode=BDELTA_MEM_ERROR; return 0;}
+
+ if (verbose) printf("find checksums\n");
+ for (unsigned i = 0; i < h.hashsize; ++i) h.hash[i]=-1;
+
+ h.numhashitems=0;
+ // unsigned numchecksums = 0;
+ for (unsigned i = 0; i < numunused; ++i) {
+ unsigned p1 = unused[i].p, p2 = unused[i].p + unused[i].num;
+ while (p1+blocksize <= p2) {
+ // ++numchecksums;
+ add_cksum(b, &h, p1);
+ p1+=blocksize;
+ }
+ }
+ // if (verbose) printf("%i checksums\n", h.numhashitems);
+ if (verbose) printf("compare files\n");
+
+ Checksum oldcoefficient = 1;
+ for (unsigned i = 1; i < blocksize; ++i) {
+ oldcoefficient*=multiplyamount;
+ ++oldcoefficient;
+ }
+
+ last = 0;
+ for (DLink<Match> *l = b->matches.first; l; l=l->next) {
+ if (l->obj->p2 - last >= blocksize)
+ findMatches(b, &h, last, l->obj->p2, l->prev, oldcoefficient);
+ last = l->obj->p2+l->obj->num;
+ }
+ if (b->f2_size-last>=blocksize)
+ findMatches(b, &h, last, b->f2_size, b->matches.last, oldcoefficient);
+ delete unused;
+ delete h.hash;
+ delete h.hash_items;
+ // printf("a = %.lli; b = %.lli\n", stata, statb);
+ // printf("Found %i matches\n", b->matches.size());
+ return b->matches.size();
+}
+
+
+void bdelta_getMatch(void *instance, unsigned matchNum,
+ unsigned *p1, unsigned *p2, unsigned *num) {
+ BDelta_Instance *b = (BDelta_Instance*)instance;
+ int &access_int = b->access_int;
+ DLink<Match> *&accessplace = b->accessplace;
+ if (access_int==-1) {access_int = 0; accessplace=b->matches.first;}
+ while (access_int<matchNum) {
+ accessplace=accessplace->next;
+ ++access_int;
+ }
+ while (access_int>matchNum) {
+ accessplace=accessplace->prev;
+ --access_int;
+ }
+ *p1 = accessplace->obj->p1;
+ *p2 = accessplace->obj->p2;
+ *num = accessplace->obj->num;
+}
+
+int bdelta_getError(void *instance) {
+ return ((BDelta_Instance*)instance)->errorcode;
+}
Please sign in to comment.
Something went wrong with that request. Please try again.