From 4610a070c0b95fd5e6afce69ee60d806edd7c25e Mon Sep 17 00:00:00 2001
From: Colin Phipps <cph@zeus>
Date: Sun, 15 Jan 2006 17:39:56 +0000
Subject: [PATCH] 0.2.1

---
 c/NEWS           | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 c/README         |  2 +-
 c/client.c       |  2 +-
 c/configure.in   |  2 +-
 c/lib/hash.c     | 10 +++++++-
 c/lib/internal.h |  2 +-
 c/lib/rsum.c     |  5 +++-
 c/lib/state.c    | 22 ++++++++----------
 c/zlib/inflate.c |  4 ++--
 paper/paper.xml  | 50 +++++++++++++++++++++++++++++++++++-----
 10 files changed, 132 insertions(+), 27 deletions(-)
 create mode 100644 c/NEWS

diff --git a/c/NEWS b/c/NEWS
new file mode 100644
index 0000000..e70a8cb
--- /dev/null
+++ b/c/NEWS
@@ -0,0 +1,60 @@
+Changes in 0.2.1
+- fixed bug where zsync would loop if fed more than one local source file
+- enabled zsync to handle gzip files containing stored (uncompressed) blocks (so we should do slightly better on gzipped binaries now - but this is still incomplete and will not work for all files)
+
+Changes in 0.2.0
+- major reduction in the size of the control file, due to some new optimisations 
+- further fixes for servers refusing to do multipart/byteranges responses
+
+Changes in 0.1.6
+- fixes some problems with compressed file downloads and unusual server responses
+- improved http_proxy parsing
+
+Changes in 0.1.5
+- fixes some minor HTTP problems
+
+Changes in 0.1.4
+- fixed compilation with gcc-3.4
+
+Changes in 0.1.3
+- HTTP proxy support
+- better HTTP support - we deal better with unusual server responses
+
+Changes in 0.1.2
+- fixes for Solaris/sparc
+
+Changes in 0.1.1
+- more efficient compressed stream support
+- code cleanups
+
+Changes in 0.1.0
+- finished the cleanup of the compressed file transfer code
+- major improvement in efficiency for compressed file transfers
+- OpenSSL is no longer required
+- now under the v2 Artistic License
+
+Changes in 0.0.6
+- just code cleanups and documentation updates
+
+Changes in 0.0.5
+- switch to OpenSSL's MD4 code
+- fix checksumming at stream end
+- fix various portability problems
+
+Changes in 0.0.4
+- relative URLs in .zsync files supported
+- HTTP redirects are followed for the metafile
+- now uses both compressed and uncompressed sources to further reduce download sizes
+
+Changes in 0.0.3
+- fix HTTP connection termination handling
+- speed up local file reading
+
+Changes in 0.0.2
+- HTTP/1.1, with pipelining
+- cleaning up more of the libc streams and mmap clutter
+- progress displays
+- lots of bugfixes
+
+First release 0.0.1.
+
diff --git a/c/README b/c/README
index ccf933e..d53dce1 100644
--- a/c/README
+++ b/c/README
@@ -1,4 +1,4 @@
-zsync 0.2.0
+zsync 0.2.1
 ===========
 
 zsync is a file transfer program. It allows you to download a file from a
diff --git a/c/client.c b/c/client.c
index 59227fc..f15cc65 100644
--- a/c/client.c
+++ b/c/client.c
@@ -150,7 +150,7 @@ int read_zsync_control_stream(FILE* f, struct zsync_state** z, const char* sourc
 	  fprintf(stderr,"nonsensical blocksize %d\n",blocksize); return -1;
 	}
       } else if (!strcmp(buf, "Hash-Lengths")) {
-        if (sscanf(p,"%d,%d,%d",&seq_matches,&rsum_bytes,&checksum_bytes) != 3 || rsum_bytes < 1 || rsum_bytes > 4 || checksum_bytes < 4 || checksum_bytes > 16 || seq_matches > 2 || seq_matches < 1) {
+        if (sscanf(p,"%d,%d,%d",&seq_matches,&rsum_bytes,&checksum_bytes) != 3 || rsum_bytes < 1 || rsum_bytes > 4 || checksum_bytes < 3 || checksum_bytes > 16 || seq_matches > 2 || seq_matches < 1) {
 	  fprintf(stderr,"nonsensical hash lengths line %s\n",p);
 	  return -1;
 	}
diff --git a/c/configure.in b/c/configure.in
index 82adc0a..f173219 100644
--- a/c/configure.in
+++ b/c/configure.in
@@ -5,7 +5,7 @@ AC_CONFIG_AUX_DIR(autotools)
 dnl --- Set version strings
 MAJOR_VERSION=0
 MINOR_VERSION=2
-MICRO_VERSION=0
+MICRO_VERSION=1
 VERSION=$MAJOR_VERSION.$MINOR_VERSION.$MICRO_VERSION
 
 AC_SUBST(VERSION)
diff --git a/c/lib/hash.c b/c/lib/hash.c
index f4515c6..9bc7c3d 100644
--- a/c/lib/hash.c
+++ b/c/lib/hash.c
@@ -31,13 +31,20 @@ void add_target_block(struct zsync_state* z, zs_blockid b, struct rsum r, void*
   memcpy(e->checksum, checksum, z->checksum_bytes);
   e->r.a = r.a & z->rsum_a_mask;
   e->r.b = r.b;
+  if (z->rsum_hash) {
+    free(z->rsum_hash); z->rsum_hash = NULL;
+  }
  }
 }
 
-void build_hash(struct zsync_state* z)
+int build_hash(struct zsync_state* z)
 {
   zs_blockid id;
 
+  z->hashmask = 0xffff;
+  z->rsum_hash = calloc(z->hashmask+1, sizeof *(z->rsum_hash));
+  if (!z->rsum_hash) return 0;
+
   for (id = 0; id < z->blocks; id++) {
     struct hash_entry* e = z->blockhashes + id;
     /* Prepend to linked list for this hash entry */
@@ -46,5 +53,6 @@ void build_hash(struct zsync_state* z)
     e->next = z->rsum_hash[h];
     z->rsum_hash[h] = e;
   }
+  return 1;
 }
 
diff --git a/c/lib/internal.h b/c/lib/internal.h
index 86b534d..d1e8a78 100644
--- a/c/lib/internal.h
+++ b/c/lib/internal.h
@@ -74,4 +74,4 @@ static inline unsigned calc_rhash(const struct zsync_state* const z, const struc
   return h & z->hashmask;
 }
 
-void build_hash(struct zsync_state* z);
+int build_hash(struct zsync_state* z);
diff --git a/c/lib/rsum.c b/c/lib/rsum.c
index 1c8e15d..6ad8edc 100644
--- a/c/lib/rsum.c
+++ b/c/lib/rsum.c
@@ -294,7 +294,10 @@ int submit_source_file(struct zsync_state* z, FILE* f)
 
   if (!buf) return 0;
 
-  build_hash(z);
+  if (!z->rsum_hash)
+    if (!build_hash(z))
+      return 0;
+
   while (!feof(f)) {
     size_t len;
     long long start_in = in;
diff --git a/c/lib/state.c b/c/lib/state.c
index 4c4ba06..f75deec 100644
--- a/c/lib/state.c
+++ b/c/lib/state.c
@@ -29,7 +29,7 @@ struct zsync_state* zsync_init(zs_blockid nblocks, size_t blocksize, int rsum_by
 {
   struct zsync_state* z = malloc(sizeof(struct zsync_state));
 
-  if (z != NULL){
+  if (z != NULL) {
     /* Setup blocksize and shift. Size must be a power of two. */
     z->blocksize = blocksize;
     z->blocks = nblocks;
@@ -53,19 +53,15 @@ struct zsync_state* zsync_init(zs_blockid nblocks, size_t blocksize, int rsum_by
 	    }
 	}
 	
-	z->hashmask = 0xffff;
-	z->rsum_hash = calloc(z->hashmask+1, sizeof *(z->rsum_hash));
-	if (z->rsum_hash != NULL) {
-	  z->ranges = NULL;
-	  z->numranges = 0;
+	z->ranges = NULL;
+	z->rsum_hash = NULL;
+	z->numranges = 0;
 	  
-	  z->blockhashes = malloc(sizeof(z->blockhashes[0]) * (z->blocks+z->seq_matches));
-	  if (z->blockhashes != NULL)
-	    return z;
+	z->blockhashes = malloc(sizeof(z->blockhashes[0]) * (z->blocks+z->seq_matches));
+	if (z->blockhashes != NULL)
+	  return z;
 
-	  /* All below is error handling */
-	  free(z->rsum_hash);
-	}
+	/* All below is error handling */
       }
     }
     free(z->filename);
@@ -96,7 +92,7 @@ void zsync_end(struct zsync_state* z)
     unlink(z->filename);
     free(z->filename);
   }
-  free(z->rsum_hash);
+  if (z->rsum_hash) free(z->rsum_hash);
   free(z->ranges); // Should be NULL already
   fprintf(stderr,"hashhit %d, weakhit %d, checksummed %d, stronghit %d\n",z->stats.hashhit, z->stats.weakhit, z->stats.checksummed, z->stats.stronghit);
   free(z);
diff --git a/c/zlib/inflate.c b/c/zlib/inflate.c
index 7c2c37c..5c1320e 100644
--- a/c/zlib/inflate.c
+++ b/c/zlib/inflate.c
@@ -1295,7 +1295,7 @@ void inflate_advance_bits(strm,b,s)
     state->bits = 0;
     state->hold = 0;
   }
-  state->mode = s ? TYPEDO : LENDO;
+  state->mode = s ? TYPEDO : state->mode == COPY ? COPY : LENDO;
 }
 
 int ZEXPORT inflateSafePoint(strm)
@@ -1305,6 +1305,6 @@ z_streamp strm;
 
     if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
-    return state->mode == LENDO;
+    return (state->mode == LENDO || state->mode == COPY);
 }
 
diff --git a/paper/paper.xml b/paper/paper.xml
index 6467f6d..7207bc3 100644
--- a/paper/paper.xml
+++ b/paper/paper.xml
@@ -7,7 +7,9 @@
 	 <para>
 		2004-10-24 &#8212; First published
 		2004-12-04 &#8212; Updated for zsync 0.1.2's improved compressed file support
-		2005-01-29 &#8212; Completed references, and other minor ammendments
+		2005-01-29 &#8212; Completed references, and other minor ammendments for first static release
+		2005-01-30 &#8212; Added hash length calculations
+		2005-02-05 &#8212; Added half-block alignment calculations
 	 </para>
 	</printhistory>
 	</bookinfo>
@@ -82,8 +84,20 @@ drarrow k &gt; d + log_2 N + log_2 { N over b}</alt>
 		<para>To get a reasonable certainty of no false matches, say one in a million, we can have say <inlineequation>d=20</inlineequation>, so this formula then gives us an easy way to calculate how many bits of the strong checksum have to be included in the .zsync file. It can be rounded up to the nearest byte for convenience; but by keeping this value low, we reduce the size of the .zsync. This reduces the storage requirement on the server, and the total amount that the client must download.</para> 
 		</sect2>
 		</sect1>
+		<sect1><title>Match Continuation</title>
+		 <para>Another source of information that can help in determining a match is the matching status of neighbouring blocks. There is no reason to believe that matching data in the target file will end neatly on block boundaries - quite the opposite, we will expect to see that after one block matches, neighbouring blocks of the source data will match corresponding neighbours in the target data, giving long areas in the source file that can be copied to the target file.</para>
+		 <para>One way to use this is by rejecting matches unless a certain number of consecutive neighbouring blocks also match (see <citation><xref linkend="CIS2004"/></citation>). If we insist on, say, 2 matching blocks in sequence, we greatly reduce the chance of false positives - assuming the checksums of these blocks remain independent, then we can halve the number of bytes of strong checksum transmitted per block. The only matches we lose by this restriction are single-block matches - but these are rare anyway, and are the least serious matches to miss (because we have normally have unmatched data either side that needs to be fetched, so the loss of transmitting the data for the extra block is partially offset by the reduced overhead of downloading a single range instead of the two ranges either side). (Alternatively, one can think of this as splitting a larger matching block into two parts and allowing half-block aligned matches, as discussed in <citation><xref linkend="CIS2004"/></citation>.)</para>
+		 <para>The first match of a sequence will involve two neighbouring blocks matching together; assuming this is equivalent to a single block matching with the hash lengths combined, then we can directly halve the required checksum bits from the previous section. For subsequent blocks, while we are testing with a reduced hash length, we are only testing against a single, specific location in the target file, so again the chance of a false match is reduced. So, to avoid false positives in these two cases, we must have enough hash bits to satisfy the following two conditions:</para>
+		 <inlineequation>
+		 <alt>2k &gt; d + log_2 N + log_2 { N over b} newline k &gt; d + log_2 { N over b}</alt>
+		 <graphic fileref="math/hba.png" />
+		 </inlineequation>
+		 <para>(that's a rough argument off the top of my head and with reference to other work - I am not claiming the above as a fully-reasoned result.)</para>
+		 <para>At the block sizes that zsync uses, the latter inequality is usually the stricter one, but the difference is small, so the saving in transmitted data is near, if not quite, 50%. For zsync in particular &#8212; which, unlike rsync, must always calculate and transmit the strong checksum data for every block in the target file &#8212; this is a worthwhile saving.</para>
+		 <para>Note that we can also reduce the amount of weak checksum data transmitted, by assuming that matches for consecutive blocks are needed - in testing it proved more efficient to calculate the weak checksum for both blocks, rather than testing the weak checksum of only the first and then calculating the strong checksum for both (because in a situation where a given block occurred very often in a file, for example an all-null block in an ISO image, a prior block match provides much weaker information about the likelyhood of a following block match). Once we are calculating both weak checksums, we can halve the amount of weak checksum data transmitted. In testing this was upheld, and checking both weak checksums did not significantly harm performance, while providing a definite reduction in the metadata transferred.</para>
+		</sect1>
 		<sect1><title>rsync Speed</title><para>Moving the work to the client relieves the server, but the client then has to deal with the problem of computing the rolling checksum over the old file. As explained in <citation><xref linkend="Rsync1998"/></citation>, although it is necessary to calculate the weak checksum at every possible offset in the old file, due to the choice of checksum the checksum at offset <inlineequation>x+1</inlineequation> can be calculated using the checksum at offset x in combination with the bytes at <inlineequation>x</inlineequation> and <inlineequation>x+blocksize</inlineequation>.</para>
-		<para>Despite this, when working on large files, for instance ISO files, the calculation can take some time &#8212; my Athlon XP 1600+ takes roughly 3 minutes to pass over an entire ISO file in zsync-0.1.6. One possible optimisation is skipping forward to the end of the block when a match is found &#8212; once one match is found, there is a good chance of a match at exactly one block further forward (where files have a section of more than once block in common), while a match before then is unlikely (this would mean that target file contained redundancy, with blocks containing similar but offset content). Skipping forward to the next block after a match can halve the time to process a file, if it has a lot in common with the target file, but may slightly increase the amount of data transfer required afterwards. More investigation is needed into this.</para></sect1>
+		<para>Despite this, when working on large files, for instance ISO files, the calculation can take some time &#8212; my Athlon XP 1600+ takes roughly 3 minutes to pass over an entire ISO file in zsync-0.2.0. Various optimisations in the implementation have helped get it to this level. The client skips forward to the end of a block once a match is obtained (thre would not be a match overlapping with an existing match except if the target file contains unusual redundancy), allowing it to parse files faster when there is a large amount in common with the target. The half-block alignment data transfer optimisation also helps speed up strong checksum checking, because often only the first of the two blocks needs to be checksummed in order to get a rejection (whereas using a larger blocksize, we would have to strong checksum the entire block to get a rejection).</para></sect1>
 		<sect1><title>Networking</title><para>HTTP is widely deployed and accepted, and supports Range: requests. But is it optimal from our point of view? HTTP's control data is text key: value pairs, and some control data is sent for every distinct block of data to be transferred. If a file is downloaded all at once, there is only one set of HTTP headers, so the overhead is negligible; once we begin transferring lots of disjoint blocks, this overhead must be quantified.</para><para>At its most basic, HTTP transfers one block of data per connection. Each request has a header like <literal>Range: bytes=1024-2047</literal> and each response contains a header Content-range: bytes 1024-2047. But the full set of headers can be 6 or 7 lines:</para>
 <blockquote><literallayout>HTTP/1.1 206 Partial Content
 Date: Sat, 30 Oct 2004 17:28:36 GMT
@@ -120,7 +134,7 @@ Content-range: bytes 3072-4095
 [end]
 </literallayout></blockquote>
 		<para>This reduces the overhead per block to around 90 bytes, a significant saving (but with the full HTTP headers once per request, so the total overhead remains higher). There is the risk that this encoding puts more load back on the server &#8212; it would not be advisable to request very large numbers of ranges in a single request. This area needs discussion with some web server developers, to decide where the balance lies between less traffic and more server overhead.</para><para>Using multiple ranges alleviates the network-level problems too &#8212; it means fewer requests, and servers (Apache, at least) do not issue the boundary headers in separate packets, so the total number of packets will fall too. Note that HTTP servers are required not to issue a <literal>multibyte/ranges</literal> response if there is only a single range given.</para>
-<para>HTTP/1.1 allows a further improvement, because the client and server can hold a connection open and issue multiple requests. The client can send multiple requests (each of which can include multiple ranges, as described above) over the ssame connection. This saves the overhead of connection setup and shutdown. It also allows the TCP stacks to get up to their best data transfer speed: TCP implementations usually use a slow start algorithm, where data is transmitted slowly at first, then increasing the speed until packet loss begins; this is a way of feeling out the available bandwidth between the two ends. Transfer speed is important, because even though zsync transmits less data, it could still take longer than a full transfer if the speed was much lower. TCP stacks are also free to perform other optimisations, like the Nagle algorithm, where packets are delayed so that ACK packets can be merged with outgoing data packets.</para>
+<para>HTTP/1.1 allows a further improvement, because the client and server can hold a connection open and issue multiple requests. The client can send multiple requests (each of which can include multiple ranges, as described above) over the same connection. This saves the overhead of connection setup and shutdown. It also allows the TCP stacks to get up to their best data transfer speed: TCP implementations usually use a slow start algorithm, where data is transmitted slowly at first, then increasing the speed until packet loss begins; this is a way of feeling out the available bandwidth between the two ends. Transfer speed is important, because even though zsync transmits less data, it could still take longer than a full transfer if the speed was much lower. TCP stacks are also free to perform other optimisations, like the Nagle algorithm, where packets are delayed so that ACK packets can be merged with outgoing data packets.</para>
 <para>Finally, HTTP/1.1 allows pipelining. This allows the client to submit multiple requests without waiting for responses to each request before issuing the next. This is the difference between a full-duplex and a half-duplex connection between the two ends &#8212; while the client will be transmitting little to the server, it would clearly be less than ideal if the server has to pause and wait after finishing one block before receiving instructions for the next. While this could be worked around by having multiple connections to the server (so while one was waiting the other would still be transmitting), this would be far more complicated to implement and would be subject to the arbitrary choice of the server and of the network as to which connection used the most bandwidth.</para>
 <para>zsync-0.0.1 used HTTP/1.0, with 5 ranges per request, a single connection to the server, and a new connection for every request. It could manage 200-350kbps per second on my ADSL line. zsync-0.0.2 uses HTTP/1.1, keeping the connection open as long as possible, and pipelining its requests, as well as issuing requests for up to 20 ranges per request &#8212; it achieves a  sustained 480kbps &#8212; which is about the normal limit of my 512kpbs ADSL line.</para><para>To minimise network load and maximise transfer speed, it is essential for any zsync implementation to use multiple ranges per request, HTTP/1.1 persistent connections and pipelining. See <citation><xref linkend="Pipe1999"/></citation> for more discussion of the performance advantage of HTTP/1.1 - although much of this paper is concerned about links between documents and retrieving links from small, partially downloaded files, some of the HTTP/1.1 and pipelining material is very relevant.</para></sect1>
 <sect1><title>Comparison with rsync</title><para>Really this section is only of theoretical value. The relevant difference between zsync and rsync is that rsync requires special server support, and uses this to enable a richer data transfer, with file permissions, tunnelling over SSH, etc. Whereas zsync can be used with no active server support. But it is interesting to compare the effect that this has on their data transfer abilities.</para><para>zsync incurs overheads due to HTTP headers. rsync must wrap the data in its own protocol, but has presumably chosen an efficient protocol for this purpose. rsync also has the overhead of rsh, ssh or whatever protocol it uses to talk to rsyncd with, but again this will be rather smaller than the overhead for HTTP.</para>
@@ -170,15 +184,39 @@ SHA-1: 97edb7d0d7daa7864c45edf14add33ec23ae94f8
 	<listitem><para>Work out what should go in the library, to be useful to other client programs. In theory, any web browser could have a zsync plugin and use it to save on large, regular downloads. A web proxy could even implement this transparently.</para></listitem>
 	<listitem><para>Integrate my local modifications back into zlib.</para></listitem></itemizedlist></sect1></chapter>
 	<chapter>
-		<title>Empirical Results</title><para>As zsync develops, I am performing a number of test runs, and cataloguing the results here. The numbers here must be taken in the context that the current implementation is not yet fully optimised.</para><para>Numbers given here reflect application layer traffic only - I have not attempted to account for TCP/IP headers. Generally speaking, provided the algorithm does not result in <emphasis>more</emphasis> data being transmitted, and provided it does not needlessly fragment packets or require lots of separate connections, there should be no extra overhead at the network level relative to a full download. zsync-0.0.2 and up satisfy these requirements in my view. I have done some empirical verification of this, but not to the same precision as the other numbers here.</para><para>Numbers for zsync are the figures given by zsync itself when exiting - this includes only downstream traffic (upstream traffic is typically negligible with zsync - necessarily so, as the client is doing all the work). Numbers for rsync are downstream, but with upstream traffic given in brackets afterwards, as returned by <literal>rsync -vv</literal>  (note in particular that rsync's figures appear to neglect the transport overhead or rsh/ssh, although for rsh I assume this overhead would be negligible anyway). zsync downloads the checksums and then downloads the data, whereas rsync uploads the checksums and then downloads the data, so roughly speaking the up+down data for rsync should equal the down data for zsync, if all is well.</para><sect1><title>Uncompressed Data</title><para>This section deals with data files which are not generally compressed, perhaps because the data they contain is already compressed, albeit not in a form recognisable to file handling tools - e.g. ISO files containing compressed data files, or JPEG images.</para><para>The first test file is <filename>sarge-i386-netinst.iso</filename> (Debian-Installer CD image), with the user updating from the 2004-10-29 snapshot (md5sum ca5b63d27a3bf2d30fe65429879f630b) to the 2004-10-30 snapshot (md5sum ef8bd520026cef6090a18d1c7ac66a39). Inter-day snapshots like this should have large amounts in common. Both files are around 110MB. I tried various block sizes.</para><informaltable><tgroup cols="3"><tbody><row><entry>Method</entry><entry>Block size (bytes)</entry><entry>Transferred (bytes)</entry></row><row><entry>zsync-0.0.6</entry><entry>512</entry><entry>13278966</entry></row><row><entry>zsync-0.0.6</entry><entry>1024</entry><entry>11347004</entry></row><row><entry>zsync-0.0.6</entry><entry>2048</entry><entry>10784543</entry></row><row><entry>zsync-0.0.6</entry><entry>4096</entry><entry>10409473</entry></row><row><entry>zsync-0.0.6</entry><entry>8192</entry><entry>10357172</entry></row><row><entry>zsync-0.0.6</entry><entry>16384</entry><entry>10562326</entry></row><row><entry>rsync</entry><entry>1024</entry><entry>9479309 (+770680 up)</entry></row><row><entry>rsync</entry><entry>2048</entry><entry>9867587 (+385358 up)</entry></row><row><entry>rsync</entry><entry>4096</entry><entry>9946883 (+192697)</entry></row><row><entry>rsync</entry><entry>8192</entry><entry>10109455 (+96370)</entry></row><row><entry>rsync</entry><entry>default (auto-select)</entry><entry>10210013 (+74380)</entry></row></tbody></tgroup></informaltable><para>zsync transferred more file data as the block size was increased, but this was more than offset by a smaller .zsync file to download initially. At a block size of 512, the .zsync file was over 4.4MB - this fell to 2,2MB, 1.1MB and 550kB and so on for the larger blocksizes. It is clear that rsync defaults to a much larger blocksize on files of this type, with under 100k of metadata transmitted up to the server. All the results were very close, however: the most obvious feature of the results is that in all cases only about 10MB was transferred, a saving of around 90% on the full download of 113MB.</para><para>Next, I tested an update from a Fedora Core 3 test2 iso image (668MB, md5sum ) to Fedora Core 3 test3  (640MB) (two Linux distribution CD images, with significant differences between them). </para><informaltable><tgroup cols="3"><tbody><row><entry>Method</entry><entry>Blocksize (bytes)</entry><entry>Transferred (bytes)</entry></row><row><entry>rsync</entry><entry>8192</entry><entry>363312079 (+571453)</entry></row><row><entry>zsync-0.0.6</entry><entry>8192</entry><entry>366356894</entry></row></tbody></tgroup></informaltable><para>zsync closely parallels rsync's result here. Roughly 50% of the files are in common I guess from these results, and somewhere around 60% is being transferred. zsync (paired with apache 2.0.52) took about 6 minutes in a local to local transfer, while rsync took about 7 minutes (over rsh).</para></sect1>
-		<sect1><title>Compressed Files</title><para>There are more combinations to consider in the case of compressed files. I have only got rsync numbers for a few of the files here so far. I have broken them down by how the file to be transferred is compressed (none, gzip, or gzip --rsync) and whether zsync's look-inside-gzip functionality was used. I have also included numbers for </para><para>I took two Debian Packages files, downloaded a day apart, as the source and target files. The target file was 12.1MB, or 3.1MB gzipped. A diff of the two (deflated) files took 58kb. I have included the transferred data as (file data + control data), where control data is just the size of the .zsync file (which clearly cannot be neglected as it must be  downloaded, so it is an overhead of the algorithm); except for rsync, where the checksums are transmitted upstream and are shown separately.</para><para>Several methods were used. Firstly, for comparison, working on the full deflated 12.1MB:</para>
+		<title>Empirical Results</title><para>As zsync develops, I am performing a number of test runs, and cataloguing the results here. The numbers here must be taken in the context that the current implementation is not yet fully optimised.</para><para>Numbers given here reflect application layer traffic only - I have not attempted to account for TCP/IP headers. Generally speaking, provided the algorithm does not result in <emphasis>more</emphasis> data being transmitted, and provided it does not needlessly fragment packets or require lots of separate connections, there should be no extra overhead at the network level relative to a full download. zsync-0.0.2 and up satisfy these requirements in my view. I have done some empirical verification of this, but not to the same precision as the other numbers here.</para><para>Numbers for zsync are the figures given by zsync itself when exiting - this includes only downstream traffic (upstream traffic is typically negligible with zsync - necessarily so, as the client is doing all the work). Numbers for rsync are downstream, but with upstream traffic given in brackets afterwards, as returned by <literal>rsync -vv</literal>  (note in particular that rsync's figures appear to neglect the transport overhead or rsh/ssh, although for rsh I assume this overhead would be negligible anyway). zsync downloads the checksums and then downloads the data, whereas rsync uploads the checksums and then downloads the data, so roughly speaking the up+down data for rsync should equal the down data for zsync, if all is well.</para><sect1><title>Uncompressed Data</title><para>This section deals with data files which are not generally compressed, perhaps because the data they contain is already compressed, albeit not in a form recognisable to file handling tools - e.g. ISO files containing compressed data files, or JPEG images.</para><para>The first test file is <filename>sarge-i386-netinst.iso</filename> (Debian-Installer CD image), with the user updating from the 2004-10-29 snapshot (md5sum ca5b63d27a3bf2d30fe65429879f630b) to the 2004-10-30 snapshot (md5sum ef8bd520026cef6090a18d1c7ac66a39). Inter-day snapshots like this should have large amounts in common. Both files are around 110MB.</para>
+		<para>I tried various block sizes (rsync's default for files of this size is around 8kB). I have included zsync prior to the checksum length optimisations, for historical reference. Bear in mind that zsync-0.2.0's block sizes are not directly comparable to rsync or earlier zsync, because it requires 2 consecutive matches; hence zsync-0.2.0 with a block size of 1024 may be more directly comparable to rsync with a block size of 1024.</para>
+		<informaltable><tgroup cols="3"><tbody>
+		<row><entry>Block size (bytes)</entry><entry>512</entry><entry>1024</entry><entry>2048</entry><entry>4096</entry><entry>8192</entry><entry>16384</entry></row>
+		<row><entry>zsync-0.0.6</entry><entry>13278966</entry><entry>11347004</entry><entry>10784543</entry><entry>10409473</entry><entry>10357172</entry><entry>10562326</entry></row>
+		<row><entry>rsync</entry><entry /><entry>9479309 (+770680 up)</entry><entry>9867587 (+385358 up)</entry><entry>9946883 (+192697)</entry><entry>10109455 (+96370)</entry></row>
+		<row><entry>zsync-0.2.0 (pre-release)</entry><entry>10420370</entry><entry>10367061</entry><entry>10093596</entry><entry>10111121</entry><entry>10250799</entry><entry>10684655</entry></row>
+		</tbody></tgroup></informaltable>
+		<para>zsync transferred more file data as the block size was increased, as expected. At a block size of 512, the .zsync file was around 1.5MB - this fell to 660kB, 330kB and so on for the larger blocksizes. All the results were very close, however: the most obvious feature of the results is that in all cases only about 10MB was transferred, a saving of around 90% on the full download of 113MB.</para><para>Next, I tested an update from a Fedora Core 3 test2 iso image (668MB, md5sum ) to Fedora Core 3 test3  (640MB) (two Linux distribution CD images, with significant differences between them). </para>
+		<informaltable><tgroup cols="3"><tbody>
+		<row><entry>Blocksize (bytes)</entry><entry>512</entry><entry>1024</entry><entry>2048</entry><entry>4096</entry><entry>8192</entry><entry>16384</entry></row>
+		<row><entry>rsync</entry><entry/><entry>339684147 (+5224424 up)</entry><entry>345822571 (+2612232 up)</entry><entry>353812835 (+1306136 up)</entry><entry>363311939 (+571457)</entry><entry>374611439 (+285752 up)</entry></row>
+		<row><entry>zsync-0.0.6</entry><entry/><entry/><entry/><entry/><entry>366356894</entry></row>
+		<row><entry>zsync-0.2.0</entry><entry>347181962</entry><entry>347151941</entry><entry>352041787</entry><entry>359541472</entry><entry>369585481</entry><entry>380574374</entry></row>
+		</tbody></tgroup></informaltable><para>zsync closely parallels rsync's result here. Roughly 50% of the files are in common I guess from these results, and somewhere around 60% is being transferred. zsync (paired with apache 2.0.52) took about 6 minutes in a local to local transfer, while rsync took about 7 minutes (over rsh).</para>
+		<para>For reference, here are the CPU times used corresponding to the table above, in seconds. These are just indicative, as they include downloading the control files and the final checksum verification (except for rsync, which does not do this), and the machine was not idle, nor did I flush disk cache etc between runs. Nonetheless, this gives an indication of how expensive the smaller block sizes are, which is an important consideration for larger files.</para>
+		<informaltable><tgroup cols="3"><tbody>
+		<row><entry>Blocksize (bytes)</entry><entry>512</entry><entry>1024</entry><entry>2048</entry><entry>4096</entry><entry>8192</entry><entry>16384</entry></row>
+		<row><entry>rsync</entry><entry/><entry>1113</entry><entry>570</entry><entry>418</entry><entry>314</entry><entry>205</entry></row>
+		<row><entry>zsync-0.2.0</entry><entry>1785</entry><entry>931</entry><entry>520</entry><entry>297</entry><entry>219</entry><entry>158</entry></row>
+		</tbody></tgroup></informaltable>
+		<para>zsync appears to be very close to rsync, both in CPU usage and transfer efficiency.</para>
+		</sect1>
+		<sect1><title>Compressed Files</title>
+		<para>This section pertains to zsync-0.1.x, and needs updating for the newest version.</para><para>There are more combinations to consider in the case of compressed files. I have only got rsync numbers for a few of the files here so far. I have broken them down by how the file to be transferred is compressed (none, gzip, or gzip --rsync) and whether zsync's look-inside-gzip functionality was used. I have also included numbers for </para><para>I took two Debian Packages files, downloaded a day apart, as the source and target files. The target file was 12.1MB, or 3.1MB gzipped. A diff of the two (deflated) files took 58kb. I have included the transferred data as (file data + control data), where control data is just the size of the .zsync file (which clearly cannot be neglected as it must be  downloaded, so it is an overhead of the algorithm); except for rsync, where the checksums are transmitted upstream and are shown separately.</para><para>Several methods were used. Firstly, for comparison, working on the full deflated 12.1MB:</para>
 <informaltable><tgroup cols="7"><tbody>
  <row><entry>Blocksize (bytes)</entry><entry>512</entry><entry>1024</entry><entry>2048</entry><entry>4096</entry><entry>8192</entry><entry>16384</entry></row>
  <row><entry>zsync-0.1.0 (pr)</entry><entry>564709</entry><entry>353690</entry><entry>279580</entry><entry>306050</entry><entry>468092</entry><entry>723102</entry></row><row><entry>rsync-2.6.3</entry><entry>175004 (+142414 up)</entry><entry>161581 (+71226 up)</entry><entry>190176 (+35634 up)</entry><entry>258256 (+17838 up)</entry><entry>403904 (+8940 up)</entry><entry>728634 (+4488 up)</entry></row>
  
  
 </tbody></tgroup></informaltable><para>Next, on the file compressed with <command>gzip --best</command>. For a fairer comparison with rsync, and to show the difference that the look-inside method makes, zsync without the look-inside method is shown too. As expected, without look-inside or with rsync, almost the entire 3.1MB compressed file is transferred.</para><informaltable><tgroup cols="7"><tbody><row><entry>Blocksize (bytes)</entry><entry>512</entry><entry>1024</entry><entry>2048</entry><entry>4096</entry><entry>8192</entry><entry>16384</entry></row><row><entry>zsync-0.1.2 with look-inside</entry><entry>613532</entry><entry>339527</entry><entry>217883</entry><entry>190338</entry><entry>230413</entry><entry>failed</entry></row><row><entry>zsync 0.1.0 (pr) without look-inside</entry><entry>3134061</entry><entry>3074067</entry><entry>3044591</entry><entry>3033427</entry><entry>3033999</entry><entry>3046564</entry></row><row><entry>rsync-2.6.3</entry><entry>3012791 (+36636 up)</entry><entry>3013371 (+18336 up)</entry><entry>3014172 (+9186 up)</entry><entry>3018156 (+4614 up)</entry><entry>3026296 (+2328 up)</entry><entry>3042650 (+1182 up)</entry></row></tbody></tgroup></informaltable><para>Finally, the file is compressed before-and-after with <command>gzip --best --rsync</command>.</para><informaltable><tgroup cols="7"><tbody><row><entry>Blocksize (bytes)</entry><entry>512</entry><entry>1024</entry><entry>2048</entry><entry>4096</entry><entry>8192</entry><entry>16384</entry></row><row><entry>zsync 0.1.2 with look-inside</entry><entry>625590</entry><entry>351942</entry><entry>228179</entry><entry>263135</entry><entry>354503</entry><entry>300098</entry></row><row><entry>zsync 0.1.0 (pr) without look-inside</entry><entry>496567</entry><entry>449475</entry><entry>444663</entry><entry>492377</entry><entry>607225</entry><entry>840588</entry></row><row><entry>rsync-2.6.3</entry><entry>390270 (+37632 up)</entry><entry>392418 (+18834 up)</entry><entry>417550 (+9438 up)</entry><entry>472108 (+4740 up)</entry><entry>581312 (+2388 up)</entry><entry>818212 (+1212 up)</entry></row></tbody></tgroup></informaltable><para>Debian Package files contain textual data. This is about half and half between plain English package descriptions, and key:value pairs of text data containing package names, versions, and such. The changes week to week are widespread and very scattered. Thus the compressed transfer, which effectively has larger blocks relative to the underlying content, is less efficient here.</para><para><command>gzip --rsync</command> does fairly well, with rsync transferring about 420KB and zsync transferring about 450KB. zsync with the look-inside method does much better than either of these, with as little as 190K transferred. At this optimum point, zsync transferred 75KB of (compressed) file data - close to the diff in size - and 142KB of the .zsync.</para><para>Note that the look-inside and uncompressed figures at a blocksize of 1024 bytes  include 250kb of data just transferring the .zsync file (and the 512 byte blocksize transfer had a 478kb control file, representing over one third of the data transfer) - given that the underlying data is plain text, transmitting a full 20 checksum bytes per block is probably excessive (especially for smaller blocks), so significant savings could be made here. The methods just looking at the compressed data only had to transfer a 60kb .zsync file at blocksize 1024 bytes  (smaller stream, so fewer blocks, so fewer checksums), but their greater inefficiency in identifying common data easily wiped out this saving.</para><para>The uncompressed data does quite well, better than most of the compressed transfers. However this good performance will only occur where the data files are very close - for updates more than a few days apart, where there is less data in common, compressed transfers can be expected to take a clear lead. Providing only a compressed stream also saves on disk space on the server.</para>
-</sect1><sect1><title>Observations</title><para>It is early to draw conclusions, but we can have a few observations:</para><itemizedlist><listitem><para>zsync without look-inside closely matches rsync's performance. I should investigate the block size that rsync uses. It seems that zsync's default closely parallels rsync's default on files of the order of megabytes in size ; but for files of hundreds of megabytes, rsync is using a rather larger blocksize. zsync with look-inside is significantly ahead of rsync, and is a clear winner with data compressed without --rsync.</para></listitem><listitem><para>Where available, it appears that zsync does best using the compressed data and the look-inside method.</para></listitem><listitem><para>Look-inside is beneficial, particularly on non-<command>--rsync</command> gzip files. There is not yet any evidence that the larger .zsync files it needs, relative to just using gzipped files with <command>--rsync</command>, are ever harmful, and certainly it appears beneficial.</para></listitem><listitem><para><command>gzip --rsync</command> produces files that work with zsync whether or not look-inside is used. But, in the current version of zsync, it is less effective than zsync's look-inside behaviour. zsync currently does worse in some situations when <command>--rsync</command> is used - but it is possible that zsync could be tuned so that <command>--rsync</command> was, at least, not harmful to its performance. Some of the results for <command>gzip --rsync</command> files  look anomalous.</para></listitem></itemizedlist></sect1>
+</sect1>
+<sect1><title>Observations</title><para>It is early to draw conclusions, but we can have a few observations:</para><itemizedlist><listitem><para>zsync without look-inside closely matches rsync's performance.</para></listitem><listitem><para>The checksum length and consecutive-matches optimisations have moved zsync to being competitive with rsync even as small block sizes, and have lowered the optimum block size to 1024 or 2048 bytes in my test cases.</para></listitem><listitem><para>The difference in data transferred, for large composite files like ISO images, between blocksizes of 1024 and 4096, is negligible, and so 4096is probably optimal once performance and disk usage are taken into account.</para></listitem><listitem><para>rsync's default choice of blocksize is designed to minimise load and not obtain minimal data transfer: its default block size was well above the optimum for data transfer that I measured.</para></listitem></itemizedlist></sect1>
 	</chapter>
 	
 	<bibliography>