From b148474ef84e6062178a8cb3ed2bdbdb098b633b Mon Sep 17 00:00:00 2001
From: Steeve Morin <steeve.morin@gmail.com>
Date: Sun, 13 Jan 2013 20:20:55 +0100
Subject: [PATCH] Updating to lz4 r88

---
 src/lz4.c   | 933 ++++++++++++++++++++++++++++------------------------
 src/lz4.h   |  94 +++---
 src/lz4hc.c | 799 ++++++++++++++++++++++++--------------------
 3 files changed, 982 insertions(+), 844 deletions(-)

diff --git a/src/lz4.c b/src/lz4.c
index 06e2829..a35f12b 100644
--- a/src/lz4.c
+++ b/src/lz4.c
@@ -34,31 +34,24 @@
 //**************************************
 // Tuning parameters
 //**************************************
-// COMPRESSIONLEVEL :
-// Increasing this value improves compression ratio
-// Lowering this value reduces memory usage
-// Reduced memory usage typically improves speed, due to cache effect (ex : L1 32KB for Intel, L1 64KB for AMD)
-// Memory usage formula : N->2^(N+2) Bytes (examples : 12 -> 16KB ; 17 -> 512KB)
-#define COMPRESSIONLEVEL 12
-
-// NOTCOMPRESSIBLE_CONFIRMATION :
+// MEMORY_USAGE :
+// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+// Increasing memory usage improves compression ratio
+// Reduced memory usage can improve speed, due to cache effect
+// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+#define MEMORY_USAGE 14
+
+// NOTCOMPRESSIBLE_DETECTIONLEVEL :
 // Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
 // This may decrease compression ratio dramatically, but will be faster on incompressible data
 // Increasing this value will make the algorithm search more before declaring a segment "incompressible"
 // This could improve compression a bit, but will be slower on incompressible data
 // The default value (6) is recommended
-#define NOTCOMPRESSIBLE_CONFIRMATION 6
-
-// LZ4_COMPRESSMIN :
-// Compression function will *fail* if it is not successful at compressing input by at least LZ4_COMPRESSMIN bytes
-// Since the compression function stops working prematurely, it results in a speed gain
-// The output however is unusable. Compression function result will be zero.
-// Default : 0 = disabled
-#define LZ4_COMPRESSMIN 0
+#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
 
 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
-// This will provide a boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
-// You can set this option to 1 in situations where data will stay within closed environment
+// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
+// You can set this option to 1 in situations where data will remain within closed environment
 // This option is useless on Little_Endian CPU (such as x86)
 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
 
@@ -75,8 +68,18 @@
 #endif
 
 // Little Endian or Big Endian ?
-// Note : overwrite the below #define if you know your architecture endianess
-#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) )
+// Overwrite the #define below if you know your architecture endianess
+#if defined (__GLIBC__)
+#  include <endian.h>
+#  if (__BYTE_ORDER == __BIG_ENDIAN)
+#     define LZ4_BIG_ENDIAN 1
+#  endif
+#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
+#  define LZ4_BIG_ENDIAN 1
+#elif defined(__sparc) || defined(__sparc__) \
+   || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
+   || defined(__hpux)  || defined(__hppa) \
+   || defined(_MIPSEB) || defined(__s390__)
 #  define LZ4_BIG_ENDIAN 1
 #else
 // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
@@ -84,7 +87,7 @@
 
 // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
 // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
-// If you know your target CPU supports unaligned memory access, you may want to force this option manually to improve performance
+// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
 #if defined(__ARM_FEATURE_UNALIGNED)
 #  define LZ4_FORCE_UNALIGNED_ACCESS 1
 #endif
@@ -98,7 +101,7 @@
 //**************************************
 // Compiler Options
 //**************************************
-#if __STDC_VERSION__ >= 199901L // C99
+#if __STDC_VERSION__ >= 199901L   // C99
 /* "restrict" is a known keyword */
 #else
 #  define restrict // Disable restrict
@@ -107,7 +110,7 @@
 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 
 #ifdef _MSC_VER  // Visual Studio
-#  define inline __forceinline           // Visual is not C99, but supports some kind of inline
+#  include <intrin.h>   // For Visual 2005
 #  if LZ4_ARCH64	// 64-bit
 #    pragma intrinsic(_BitScanForward64) // For Visual 2005
 #    pragma intrinsic(_BitScanReverse64) // For Visual 2005
@@ -181,11 +184,11 @@ typedef struct _U64_S { U64 v; } U64_S;
 //**************************************
 #define MINMATCH 4
 
-#define HASH_LOG COMPRESSIONLEVEL
+#define HASH_LOG (MEMORY_USAGE-2)
 #define HASHTABLESIZE (1 << HASH_LOG)
 #define HASH_MASK (HASHTABLESIZE - 1)
 
-#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION>2?NOTCOMPRESSIBLE_CONFIRMATION:2)
+#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
 #define STACKLIMIT 13
 #define HEAPMODE (HASH_LOG>STACKLIMIT)  // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
 #define COPYLENGTH 8
@@ -196,8 +199,8 @@ typedef struct _U64_S { U64 v; } U64_S;
 #define MAXD_LOG 16
 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
 
-#define ML_BITS 4
-#define ML_MASK ((1U<<ML_BITS)-1)
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
 #define RUN_BITS (8-ML_BITS)
 #define RUN_MASK ((1U<<RUN_BITS)-1)
 
@@ -209,20 +212,20 @@ typedef struct _U64_S { U64 v; } U64_S;
 #  define STEPSIZE 8
 #  define UARCH U64
 #  define AARCH A64
-#  define LZ4_COPYSTEP(s,d)		A64(d) = A64(s); d+=8; s+=8;
-#  define LZ4_COPYPACKET(s,d)		LZ4_COPYSTEP(s,d)
-#  define LZ4_SECURECOPY(s,d,e)	if (d<e) LZ4_WILDCOPY(s,d,e)
-#  define HTYPE U32
-#  define INITBASE(base)			const BYTE* const base = ip
+#  define LZ4_COPYSTEP(s,d)       A64(d) = A64(s); d+=8; s+=8;
+#  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d)
+#  define LZ4_SECURECOPY(s,d,e)   if (d<e) LZ4_WILDCOPY(s,d,e)
+#  define HTYPE                   U32
+#  define INITBASE(base)          const BYTE* const base = ip
 #else		// 32-bit
 #  define STEPSIZE 4
 #  define UARCH U32
 #  define AARCH A32
-#  define LZ4_COPYSTEP(s,d)		A32(d) = A32(s); d+=4; s+=4;
-#  define LZ4_COPYPACKET(s,d)		LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
-#  define LZ4_SECURECOPY			LZ4_WILDCOPY
-#  define HTYPE const BYTE*
-#  define INITBASE(base)			const int base = 0
+#  define LZ4_COPYSTEP(s,d)       A32(d) = A32(s); d+=4; s+=4;
+#  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
+#  define LZ4_SECURECOPY          LZ4_WILDCOPY
+#  define HTYPE                   const BYTE*
+#  define INITBASE(base)          const int base = 0
 #endif
 
 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
@@ -239,7 +242,7 @@ typedef struct _U64_S { U64 v; } U64_S;
 //**************************************
 struct refTables
 {
-	HTYPE hashTable[HASHTABLESIZE];
+    HTYPE hashTable[HASHTABLESIZE];
 };
 
 
@@ -257,7 +260,7 @@ struct refTables
 //****************************
 #if LZ4_ARCH64
 
-inline static int LZ4_NbCommonBytes (register U64 val)
+static inline int LZ4_NbCommonBytes (register U64 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -267,11 +270,11 @@ inline static int LZ4_NbCommonBytes (register U64 val)
     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_clzll(val) >> 3);
     #else
-	int r;
-	if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-	if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-	r += (!val);
-	return r;
+    int r;
+    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+    r += (!val);
+    return r;
     #endif
 #else
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -281,15 +284,15 @@ inline static int LZ4_NbCommonBytes (register U64 val)
     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_ctzll(val) >> 3);
     #else
-	static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-	return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
+    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+    return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
     #endif
 #endif
 }
 
 #else
 
-inline static int LZ4_NbCommonBytes (register U32 val)
+static inline int LZ4_NbCommonBytes (register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -299,10 +302,10 @@ inline static int LZ4_NbCommonBytes (register U32 val)
     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_clz(val) >> 3);
     #else
-	int r;
-	if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-	r += (!val);
-	return r;
+    int r;
+    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+    r += (!val);
+    return r;
     #endif
 #else
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -312,8 +315,8 @@ inline static int LZ4_NbCommonBytes (register U32 val)
     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_ctz(val) >> 3);
     #else
-	static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-	return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
     #endif
 #endif
 }
@@ -321,154 +324,173 @@ inline static int LZ4_NbCommonBytes (register U32 val)
 #endif
 
 
-//****************************
-// Public functions
-//****************************
-
-int LZ4_compressBound(int isize)
-{
-	return (isize + (isize/255) + 16);
-}
-
-
 
 //******************************
 // Compression functions
 //******************************
 
-int LZ4_compressCtx(void** ctx,
-				 const char* source,
-				 char* dest,
-				 int isize)
+// LZ4_compressCtx :
+// -----------------
+// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+// If it cannot achieve it, compression will stop, and result of the function will be zero.
+// return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+
+static inline int LZ4_compressCtx(void** ctx,
+                 const char* source,
+                 char* dest,
+                 int isize,
+                 int maxOutputSize)
 {
 #if HEAPMODE
-	struct refTables *srt = (struct refTables *) (*ctx);
-	HTYPE* HashTable;
+    struct refTables *srt = (struct refTables *) (*ctx);
+    HTYPE* HashTable;
 #else
-	HTYPE HashTable[HASHTABLESIZE] = {0};
+    HTYPE HashTable[HASHTABLESIZE] = {0};
 #endif
 
-	const BYTE* ip = (BYTE*) source;
-	INITBASE(base);
-	const BYTE* anchor = ip;
-	const BYTE* const iend = ip + isize;
-	const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* ip = (BYTE*) source;
+    INITBASE(base);
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + isize;
+    const BYTE* const mflimit = iend - MFLIMIT;
 #define matchlimit (iend - LASTLITERALS)
 
-	BYTE* op = (BYTE*) dest;
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
 
-	int len, length;
-	const int skipStrength = SKIPSTRENGTH;
-	U32 forwardH;
+    int len, length;
+    const int skipStrength = SKIPSTRENGTH;
+    U32 forwardH;
 
 
-	// Init
-	if (isize<MINLENGTH) goto _last_literals;
+    // Init
+    if (isize<MINLENGTH) goto _last_literals;
 #if HEAPMODE
-	if (*ctx == NULL)
-	{
-		srt = (struct refTables *) malloc ( sizeof(struct refTables) );
-		*ctx = (void*) srt;
-	}
-	HashTable = (HTYPE*)(srt->hashTable);
-	memset((void*)HashTable, 0, sizeof(srt->hashTable));
+    if (*ctx == NULL)
+    {
+        srt = (struct refTables *) malloc ( sizeof(struct refTables) );
+        *ctx = (void*) srt;
+    }
+    HashTable = (HTYPE*)(srt->hashTable);
+    memset((void*)HashTable, 0, sizeof(srt->hashTable));
 #else
-	(void) ctx;
+    (void) ctx;
 #endif
 
 
-	// First Byte
-	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++; forwardH = LZ4_HASH_VALUE(ip);
+    // First Byte
+    HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
+    ip++; forwardH = LZ4_HASH_VALUE(ip);
 
-	// Main Loop
+    // Main Loop
     for ( ; ; )
-	{
-		int findMatchAttempts = (1U << skipStrength) + 3;
-		const BYTE* forwardIp = ip;
-		const BYTE* ref;
-		BYTE* token;
-
-		// Find a match
-		do {
-			U32 h = forwardH;
-			int step = findMatchAttempts++ >> skipStrength;
-			ip = forwardIp;
-			forwardIp = ip + step;
-
-			if unlikely(forwardIp > mflimit) { goto _last_literals; }
-
-			forwardH = LZ4_HASH_VALUE(forwardIp);
-			ref = base + HashTable[h];
-			HashTable[h] = ip - base;
-
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
-
-		// Catch up
-		while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
-
-		// Encode Literal length
-		length = ip - anchor;
-		token = op++;
-		if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
-		else *token = (length<<ML_BITS);
+    {
+        int findMatchAttempts = (1U << skipStrength) + 3;
+        const BYTE* forwardIp = ip;
+        const BYTE* ref;
+        BYTE* token;
+
+        // Find a match
+        do {
+            U32 h = forwardH;
+            int step = findMatchAttempts++ >> skipStrength;
+            ip = forwardIp;
+            forwardIp = ip + step;
+
+            if unlikely(forwardIp > mflimit) { goto _last_literals; }
+
+            forwardH = LZ4_HASH_VALUE(forwardIp);
+            ref = base + HashTable[h];
+            HashTable[h] = ip - base;
+
+        } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+
+        // Catch up
+        while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
+
+        // Encode Literal length
+        length = (int)(ip - anchor);
+        token = op++;
+        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
+#ifdef _MSC_VER
+        if (length>=(int)RUN_MASK) 
+        { 
+            int len = length-RUN_MASK; 
+            *token=(RUN_MASK<<ML_BITS); 
+            if (len>254)
+            {
+                do { *op++ = 255; len -= 255; } while (len>254);
+                *op++ = (BYTE)len; 
+                memcpy(op, anchor, length);
+                op += length;
+                goto _next_match;
+            }
+            else
+            *op++ = (BYTE)len; 
+        }
+        else *token = (length<<ML_BITS);
+#else
+        if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
+        else *token = (length<<ML_BITS);
+#endif
 
-		// Copy Literals
-		LZ4_BLINDCOPY(anchor, op, length);
+        // Copy Literals
+        LZ4_BLINDCOPY(anchor, op, length);
 
 _next_match:
-		// Encode Offset
-		LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
-
-		// Start Counting
-		ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
-		anchor = ip;
-		while likely(ip<matchlimit-(STEPSIZE-1))
-		{
-			UARCH diff = AARCH(ref) ^ AARCH(ip);
-			if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-			ip += LZ4_NbCommonBytes(diff);
-			goto _endCount;
-		}
-		if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-		if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-		if ((ip<matchlimit) && (*ref == *ip)) ip++;
+        // Encode Offset
+        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+        // Start Counting
+        ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
+        anchor = ip;
+        while likely(ip<matchlimit-(STEPSIZE-1))
+        {
+            UARCH diff = AARCH(ref) ^ AARCH(ip);
+            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
+            ip += LZ4_NbCommonBytes(diff);
+            goto _endCount;
+        }
+        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
+        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
+        if ((ip<matchlimit) && (*ref == *ip)) ip++;
 _endCount:
 
-		// Encode MatchLength
-		len = (ip - anchor);
-		if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
-		else *token += len;
+        // Encode MatchLength
+        len = (int)(ip - anchor);
+        if unlikely(op + (1 + LASTLITERALS) + (len>>8) > oend) return 0; 		// Check output limit
+        if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
+        else *token += len;
 
-		// Test end of chunk
-		if (ip > mflimit) { anchor = ip;  break; }
+        // Test end of chunk
+        if (ip > mflimit) { anchor = ip;  break; }
 
-		// Fill table
-		HashTable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+        // Fill table
+        HashTable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
 
-		// Test next position
-		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
-		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
+        // Test next position
+        ref = base + HashTable[LZ4_HASH_VALUE(ip)];
+        HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
+        if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
 
-		// Prepare next loop
-		anchor = ip++;
-		forwardH = LZ4_HASH_VALUE(ip);
-	}
+        // Prepare next loop
+        anchor = ip++;
+        forwardH = LZ4_HASH_VALUE(ip);
+    }
 
 _last_literals:
-	// Encode Last Literals
-	{
-		int lastRun = iend - anchor;
-		if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
-		if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-		else *op++ = (lastRun<<ML_BITS);
-		memcpy(op, anchor, iend - anchor);
-		op += iend-anchor;
-	}
-
-	// End
-	return (int) (((char*)op)-dest);
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    // End
+    return (int) (((char*)op)-dest);
 }
 
 
@@ -479,161 +501,192 @@ int LZ4_compressCtx(void** ctx,
 #define HASH64KTABLESIZE (1U<<HASHLOG64K)
 #define LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
 #define LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
-int LZ4_compress64kCtx(void** ctx,
-				 const char* source,
-				 char* dest,
-				 int isize)
+static inline int LZ4_compress64kCtx(void** ctx,
+                 const char* source,
+                 char* dest,
+                 int isize,
+                 int maxOutputSize)
 {
 #if HEAPMODE
-	struct refTables *srt = (struct refTables *) (*ctx);
-	U16* HashTable;
+    struct refTables *srt = (struct refTables *) (*ctx);
+    U16* HashTable;
 #else
-	U16 HashTable[HASH64KTABLESIZE] = {0};
+    U16 HashTable[HASH64KTABLESIZE] = {0};
 #endif
 
-	const BYTE* ip = (BYTE*) source;
-	const BYTE* anchor = ip;
-	const BYTE* const base = ip;
-	const BYTE* const iend = ip + isize;
-	const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* ip = (BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const base = ip;
+    const BYTE* const iend = ip + isize;
+    const BYTE* const mflimit = iend - MFLIMIT;
 #define matchlimit (iend - LASTLITERALS)
 
-	BYTE* op = (BYTE*) dest;
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
 
-	int len, length;
-	const int skipStrength = SKIPSTRENGTH;
-	U32 forwardH;
+    int len, length;
+    const int skipStrength = SKIPSTRENGTH;
+    U32 forwardH;
 
 
-	// Init
-	if (isize<MINLENGTH) goto _last_literals;
+    // Init
+    if (isize<MINLENGTH) goto _last_literals;
 #if HEAPMODE
-	if (*ctx == NULL)
-	{
-		srt = (struct refTables *) malloc ( sizeof(struct refTables) );
-		*ctx = (void*) srt;
-	}
-	HashTable = (U16*)(srt->hashTable);
-	memset((void*)HashTable, 0, sizeof(srt->hashTable));
+    if (*ctx == NULL)
+    {
+        srt = (struct refTables *) malloc ( sizeof(struct refTables) );
+        *ctx = (void*) srt;
+    }
+    HashTable = (U16*)(srt->hashTable);
+    memset((void*)HashTable, 0, sizeof(srt->hashTable));
 #else
-	(void) ctx;
+    (void) ctx;
 #endif
 
 
-	// First Byte
-	ip++; forwardH = LZ4_HASH64K_VALUE(ip);
+    // First Byte
+    ip++; forwardH = LZ4_HASH64K_VALUE(ip);
 
-	// Main Loop
+    // Main Loop
     for ( ; ; )
-	{
-		int findMatchAttempts = (1U << skipStrength) + 3;
-		const BYTE* forwardIp = ip;
-		const BYTE* ref;
-		BYTE* token;
-
-		// Find a match
-		do {
-			U32 h = forwardH;
-			int step = findMatchAttempts++ >> skipStrength;
-			ip = forwardIp;
-			forwardIp = ip + step;
-
-			if (forwardIp > mflimit) { goto _last_literals; }
-
-			forwardH = LZ4_HASH64K_VALUE(forwardIp);
-			ref = base + HashTable[h];
-			HashTable[h] = ip - base;
-
-		} while (A32(ref) != A32(ip));
-
-		// Catch up
-		while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
-
-		// Encode Literal length
-		length = ip - anchor;
-		token = op++;
-		if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
-		else *token = (length<<ML_BITS);
+    {
+        int findMatchAttempts = (1U << skipStrength) + 3;
+        const BYTE* forwardIp = ip;
+        const BYTE* ref;
+        BYTE* token;
+
+        // Find a match
+        do {
+            U32 h = forwardH;
+            int step = findMatchAttempts++ >> skipStrength;
+            ip = forwardIp;
+            forwardIp = ip + step;
+
+            if (forwardIp > mflimit) { goto _last_literals; }
+
+            forwardH = LZ4_HASH64K_VALUE(forwardIp);
+            ref = base + HashTable[h];
+            HashTable[h] = (U16)(ip - base);
+
+        } while (A32(ref) != A32(ip));
+
+        // Catch up
+        while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
+
+        // Encode Literal length
+        length = (int)(ip - anchor);
+        token = op++;
+        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
+#ifdef _MSC_VER
+        if (length>=(int)RUN_MASK) 
+        { 
+            int len = length-RUN_MASK; 
+            *token=(RUN_MASK<<ML_BITS); 
+            if (len>254)
+            {
+                do { *op++ = 255; len -= 255; } while (len>254);
+                *op++ = (BYTE)len; 
+                memcpy(op, anchor, length);
+                op += length;
+                goto _next_match;
+            }
+            else
+            *op++ = (BYTE)len; 
+        }
+        else *token = (length<<ML_BITS);
+#else
+        if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
+        else *token = (length<<ML_BITS);
+#endif
 
-		// Copy Literals
-		LZ4_BLINDCOPY(anchor, op, length);
+        // Copy Literals
+        LZ4_BLINDCOPY(anchor, op, length);
 
 _next_match:
-		// Encode Offset
-		LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
-
-		// Start Counting
-		ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
-		anchor = ip;
-		while (ip<matchlimit-(STEPSIZE-1))
-		{
-			UARCH diff = AARCH(ref) ^ AARCH(ip);
-			if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-			ip += LZ4_NbCommonBytes(diff);
-			goto _endCount;
-		}
-		if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-		if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-		if ((ip<matchlimit) && (*ref == *ip)) ip++;
+        // Encode Offset
+        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+        // Start Counting
+        ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
+        anchor = ip;
+        while (ip<matchlimit-(STEPSIZE-1))
+        {
+            UARCH diff = AARCH(ref) ^ AARCH(ip);
+            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
+            ip += LZ4_NbCommonBytes(diff);
+            goto _endCount;
+        }
+        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
+        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
+        if ((ip<matchlimit) && (*ref == *ip)) ip++;
 _endCount:
 
-		// Encode MatchLength
-		len = (ip - anchor);
-		if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
-		else *token += len;
+        // Encode MatchLength
+        len = (int)(ip - anchor);
+        if unlikely(op + (1 + LASTLITERALS) + (len>>8) > oend) return 0; 		// Check output limit
+        if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
+        else *token += len;
 
-		// Test end of chunk
-		if (ip > mflimit) { anchor = ip;  break; }
+        // Test end of chunk
+        if (ip > mflimit) { anchor = ip;  break; }
 
-		// Fill table
-		HashTable[LZ4_HASH64K_VALUE(ip-2)] = ip - 2 - base;
+        // Fill table
+        HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
 
-		// Test next position
-		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
-		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
-		if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
+        // Test next position
+        ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
+        HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
+        if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
 
-		// Prepare next loop
-		anchor = ip++;
-		forwardH = LZ4_HASH64K_VALUE(ip);
-	}
+        // Prepare next loop
+        anchor = ip++;
+        forwardH = LZ4_HASH64K_VALUE(ip);
+    }
 
 _last_literals:
-	// Encode Last Literals
-	{
-		int lastRun = iend - anchor;
-		if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
-		if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-		else *op++ = (lastRun<<ML_BITS);
-		memcpy(op, anchor, iend - anchor);
-		op += iend-anchor;
-	}
-
-	// End
-	return (int) (((char*)op)-dest);
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if (op + lastRun + 1 + (lastRun-RUN_MASK+255)/255 > oend) return 0;
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    // End
+    return (int) (((char*)op)-dest);
 }
 
 
-
-int LZ4_compress(const char* source,
-				 char* dest,
-				 int isize)
+int LZ4_compress_limitedOutput(const char* source, 
+                               char* dest, 
+                               int isize, 
+                               int maxOutputSize)
 {
 #if HEAPMODE
-	void* ctx = malloc(sizeof(struct refTables));
-	int result;
-	if (isize < LZ4_64KLIMIT)
-		result = LZ4_compress64kCtx(&ctx, source, dest, isize);
-	else result = LZ4_compressCtx(&ctx, source, dest, isize);
-	free(ctx);
-	return result;
+    void* ctx = malloc(sizeof(struct refTables));
+    int result;
+    if (isize < LZ4_64KLIMIT)
+        result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
+    else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
+    free(ctx);
+    return result;
 #else
-	if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize);
-	return LZ4_compressCtx(NULL, source, dest, isize);
+    if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
+    return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
 #endif
 }
 
 
+int LZ4_compress(const char* source,
+                 char* dest,
+                 int isize)
+{
+    return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
+}
+
+
 
 
 //****************************
@@ -647,173 +700,175 @@ int LZ4_compress(const char* source,
 //		A corrupted input will produce an error result, a negative int, indicating the position of the error within input stream.
 
 int LZ4_uncompress(const char* source,
-				 char* dest,
-				 int osize)
+                 char* dest,
+                 int osize)
 {
-	// Local Variables
-	const BYTE* restrict ip = (const BYTE*) source;
-	const BYTE* restrict ref;
-
-	BYTE* restrict op = (BYTE*) dest;
-	BYTE* const oend = op + osize;
-	BYTE* cpy;
-
-	BYTE token;
-
-	int	len, length;
-	size_t dec[] ={0, 3, 2, 3, 0, 0, 0, 0};
-
-
-	// Main Loop
-	while (1)
-	{
-		// get runlength
-		token = *ip++;
-		if ((length=(token>>ML_BITS)) == RUN_MASK)  { for (;(len=*ip++)==255;length+=255){} length += len; }
-
-		// copy literals
-		cpy = op+length;
-		if unlikely(cpy>oend-COPYLENGTH)
-		{
-			if (cpy > oend) goto _output_error;          // Error : request to write beyond destination buffer
-			memcpy(op, ip, length);
-			ip += length;
-			break;    // Necessarily EOF
-		}
-		LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
-
-		// get offset
-		LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-		if (ref < (BYTE* const)dest) goto _output_error;   // Error : offset create reference outside destination buffer
-
-		// get matchlength
-		if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
-
-		// copy repeated sequence
-		if unlikely(op-ref<STEPSIZE)
-		{
+    // Local Variables
+    const BYTE* restrict ip = (const BYTE*) source;
+    const BYTE* ref;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + osize;
+    BYTE* cpy;
+
+    unsigned token;
+
+    size_t length;
+    size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+    size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+#endif
+
+
+    // Main Loop
+    while (1)
+    {
+        // get runlength
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)  { size_t len; for (;(len=*ip++)==255;length+=255){} length += len; }
+
+        // copy literals
+        cpy = op+length;
+        if unlikely(cpy>oend-COPYLENGTH)
+        {
+            if (cpy != oend) goto _output_error;         // Error : not enough place for another match (min 4) + 5 literals
+            memcpy(op, ip, length);
+            ip += length;
+            break;                                       // EOF
+        }
+        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
+
+        // get offset
+        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
+        if unlikely(ref < (BYTE* const)dest) goto _output_error;   // Error : offset create reference outside destination buffer
+
+        // get matchlength
+        if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
+
+        // copy repeated sequence
+        if unlikely((op-ref)<STEPSIZE)
+        {
 #if LZ4_ARCH64
-			size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
-			size_t dec2 = dec2table[op-ref];
+            size_t dec64 = dec64table[op-ref];
 #else
-			const int dec2 = 0;
+            const int dec64 = 0;
 #endif
-			*op++ = *ref++;
-			*op++ = *ref++;
-			*op++ = *ref++;
-			*op++ = *ref++;
-			ref -= dec[op-ref];
-			A32(op)=A32(ref); op += STEPSIZE-4;
-			ref -= dec2;
-		} else { LZ4_COPYSTEP(ref,op); }
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy>oend-COPYLENGTH)
-		{
-			if (cpy > oend) goto _output_error;             // Error : request to write beyond destination buffer
-			LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
-			while(op<cpy) *op++=*ref++;
-			op=cpy;
-			if (op == oend) break;    // Check EOF (should never happen, since last 5 bytes are supposed to be literals)
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op=cpy;		// correction
-	}
-
-	// end of decoding
-	return (int) (((char*)ip)-source);
-
-	// write overflow error detected
+            op[0] = ref[0];
+            op[1] = ref[1];
+            op[2] = ref[2];
+            op[3] = ref[3];
+            op += 4, ref += 4; ref -= dec32table[op-ref];
+            A32(op) = A32(ref); 
+            op += STEPSIZE-4; ref -= dec64;
+        } else { LZ4_COPYSTEP(ref,op); }
+        cpy = op + length - (STEPSIZE-4);
+        if (cpy>oend-COPYLENGTH)
+        {
+            if (cpy > oend) goto _output_error;             // Error : request to write beyond destination buffer
+            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
+            while(op<cpy) *op++=*ref++;
+            op=cpy;
+            if (op == oend) goto _output_error;    // Check EOF (should never happen, since last 5 bytes are supposed to be literals)
+            continue;
+        }
+        LZ4_SECURECOPY(ref, op, cpy);
+        op=cpy;		// correction
+    }
+
+    // end of decoding
+    return (int) (((char*)ip)-source);
+
+    // write overflow error detected
 _output_error:
-	return (int) (-(((char*)ip)-source));
+    return (int) (-(((char*)ip)-source));
 }
 
 
 int LZ4_uncompress_unknownOutputSize(
-				const char* source,
-				char* dest,
-				int isize,
-				int maxOutputSize)
+                const char* source,
+                char* dest,
+                int isize,
+                int maxOutputSize)
 {
-	// Local Variables
-	const BYTE* restrict ip = (const BYTE*) source;
-	const BYTE* const iend = ip + isize;
-	const BYTE* restrict ref;
-
-	BYTE* restrict op = (BYTE*) dest;
-	BYTE* const oend = op + maxOutputSize;
-	BYTE* cpy;
-
-	size_t dec[] ={0, 3, 2, 3, 0, 0, 0, 0};
-
-
-	// Main Loop
-	while (ip<iend)
-	{
-		BYTE token;
-		int length;
-
-		// get runlength
-		token = *ip++;
-		if ((length=(token>>ML_BITS)) == RUN_MASK) { int s=255; while ((ip<iend) && (s==255)) { s=*ip++; length += s; } }
-
-		// copy literals
-		cpy = op+length;
-		if ((cpy>oend-COPYLENGTH) || (ip+length>iend-COPYLENGTH))
-		{
-			if (cpy > oend) goto _output_error;          // Error : request to write beyond destination buffer
-			if (ip+length > iend) goto _output_error;    // Error : request to read beyond source buffer
-			memcpy(op, ip, length);
-			op += length;
-			ip += length;
-			if (ip<iend) goto _output_error;             // Error : LZ4 format violation
-			break;    // Necessarily EOF, due to parsing restrictions
-		}
-		LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
-
-		// get offset
-		LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-		if (ref < (BYTE* const)dest) goto _output_error;   // Error : offset creates reference outside of destination buffer
-
-		// get matchlength
-		if ((length=(token&ML_MASK)) == ML_MASK) { while (ip<iend) { int s = *ip++; length +=s; if (s==255) continue; break; } }
-
-		// copy repeated sequence
-		if unlikely(op-ref<STEPSIZE)
-		{
+    // Local Variables
+    const BYTE* restrict ip = (const BYTE*) source;
+    const BYTE* const iend = ip + isize;
+    const BYTE* ref;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+    BYTE* cpy;
+
+    size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+    size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+#endif
+
+
+    // Main Loop
+    while (ip<iend)
+    {
+        unsigned token;
+        size_t length;
+
+        // get runlength
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK) { int s=255; while ((ip<iend) && (s==255)) { s=*ip++; length += s; } }
+
+        // copy literals
+        cpy = op+length;
+        if ((cpy>oend-COPYLENGTH) || (ip+length>iend-COPYLENGTH))
+        {
+            if (cpy > oend) goto _output_error;          // Error : writes beyond output buffer
+            if (ip+length != iend) goto _output_error;   // Error : LZ4 format requires to consume all input at this stage
+            memcpy(op, ip, length);
+            op += length;
+            break;                                       // Necessarily EOF, due to parsing restrictions
+        }
+        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
+
+        // get offset
+        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
+        if (ref < (BYTE* const)dest) goto _output_error;   // Error : offset creates reference outside of destination buffer
+
+        // get matchlength
+        if ((length=(token&ML_MASK)) == ML_MASK) { while (ip<iend) { int s = *ip++; length +=s; if (s==255) continue; break; } }
+
+        // copy repeated sequence
+        if unlikely(op-ref<STEPSIZE)
+        {
 #if LZ4_ARCH64
-			size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
-			size_t dec2 = dec2table[op-ref];
+            size_t dec64 = dec64table[op-ref];
 #else
-			const int dec2 = 0;
+            const int dec64 = 0;
 #endif
-			*op++ = *ref++;
-			*op++ = *ref++;
-			*op++ = *ref++;
-			*op++ = *ref++;
-			ref -= dec[op-ref];
-			A32(op)=A32(ref); op += STEPSIZE-4;
-			ref -= dec2;
-		} else { LZ4_COPYSTEP(ref,op); }
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy>oend-COPYLENGTH)
-		{
-			if (cpy > oend) goto _output_error;           // Error : request to write outside of destination buffer
-			LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
-			while(op<cpy) *op++=*ref++;
-			op=cpy;
-			if (op == oend) break;    // Check EOF (should never happen, since last 5 bytes are supposed to be literals)
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op=cpy;		// correction
-	}
-
-	// end of decoding
-	return (int) (((char*)op)-dest);
-
-	// write overflow error detected
+            op[0] = ref[0];
+            op[1] = ref[1];
+            op[2] = ref[2];
+            op[3] = ref[3];
+            op += 4, ref += 4; ref -= dec32table[op-ref];
+            A32(op) = A32(ref); 
+            op += STEPSIZE-4; ref -= dec64;
+        } else { LZ4_COPYSTEP(ref,op); }
+        cpy = op + length - (STEPSIZE-4);
+        if (cpy>oend-COPYLENGTH)
+        {
+            if (cpy > oend) goto _output_error;    // Error : request to write outside of destination buffer
+            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
+            while(op<cpy) *op++=*ref++;
+            op=cpy;
+            if (op == oend) goto _output_error;    // Check EOF (should never happen, since last 5 bytes are supposed to be literals)
+            continue;
+        }
+        LZ4_SECURECOPY(ref, op, cpy);
+        op=cpy;		// correction
+    }
+
+    // end of decoding
+    return (int) (((char*)op)-dest);
+
+    // write overflow error detected
 _output_error:
-	return (int) (-(((char*)ip)-source));
+    return (int) (-(((char*)ip)-source));
 }
 
diff --git a/src/lz4.h b/src/lz4.h
index ebd62b6..3680121 100644
--- a/src/lz4.h
+++ b/src/lz4.h
@@ -38,6 +38,14 @@ extern "C" {
 #endif
 
 
+//**************************************
+// Compiler Options
+//**************************************
+#ifdef _MSC_VER   // Visual Studio
+#  define inline __inline           // Visual is not C99, but supports some kind of inline
+#endif
+
+
 //****************************
 // Simple Functions
 //****************************
@@ -47,19 +55,22 @@ int LZ4_uncompress (const char* source, char* dest, int osize);
 
 /*
 LZ4_compress() :
-	isize  : is the input size. Max supported value is ~1.9GB
-	return : the number of bytes written in buffer dest
-			 or 0 if the compression fails (if LZ4_COMPRESSMIN is set)
-	note : destination buffer must be already allocated.
-		destination buffer must be sized to handle worst cases situations (input data not compressible)
-		worst case size evaluation is provided by function LZ4_compressBound()
+    Compresses 'isize' bytes from 'source' into 'dest'.
+    Destination buffer must be already allocated,
+    and must be sized to handle worst cases situations (input data not compressible)
+    Worst case size evaluation is provided by function LZ4_compressBound()
+
+    isize  : is the input size. Max supported value is ~1.9GB
+    return : the number of bytes written in buffer dest
+
 
 LZ4_uncompress() :
-	osize  : is the output size, therefore the original size
-	return : the number of bytes read in the source buffer
-			 If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
-			 This function never writes beyond dest + osize, and is therefore protected against malicious data packets
-	note : destination buffer must be already allocated
+    osize  : is the output size, therefore the original size
+    return : the number of bytes read in the source buffer
+             If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
+             This function never writes outside of provided buffers, and never modifies input buffer.
+    note : destination buffer must be already allocated.
+           its size must be a minimum of 'osize' bytes.
 */
 
 
@@ -67,51 +78,48 @@ LZ4_uncompress() :
 // Advanced Functions
 //****************************
 
-int LZ4_compressBound(int isize);
+static inline int LZ4_compressBound(int isize)   { return ((isize) + ((isize)/255) + 16); }
+#define           LZ4_COMPRESSBOUND(    isize)            ((isize) + ((isize)/255) + 16)
 
 /*
 LZ4_compressBound() :
-	Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
-	primarily useful for memory allocation of output buffer.
-
-	isize  : is the input size. Max supported value is ~1.9GB
-	return : maximum output size in a "worst case" scenario
-	note : this function is limited by "int" range (2^31-1)
+    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
+    primarily useful for memory allocation of output buffer.
+	inline function is recommended for the general case,
+	but macro is also provided when results need to be evaluated at compile time (such as table size allocation).
+
+    isize  : is the input size. Max supported value is ~1.9GB
+    return : maximum output size in a "worst case" scenario
+    note : this function is limited by "int" range (2^31-1)
 */
 
 
-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+int LZ4_compress_limitedOutput   (const char* source, char* dest, int isize, int maxOutputSize);
 
 /*
-LZ4_uncompress_unknownOutputSize() :
-	isize  : is the input size, therefore the compressed size
-	maxOutputSize : is the size of the destination buffer (which must be already allocated)
-	return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-			 If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
-			 This function never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
-	note   : Destination buffer must be already allocated.
-	         This version is slightly slower than LZ4_uncompress()
+LZ4_compress_limitedOutput() :
+    Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This function never writes outside of provided output buffer.
+
+    isize  : is the input size. Max supported value is ~1.9GB
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes written in buffer 'dest'
+             or 0 if the compression fails
 */
 
 
-int LZ4_compressCtx(void** ctx, const char* source,  char* dest, int isize);
-int LZ4_compress64kCtx(void** ctx, const char* source,  char* dest, int isize);
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
 
 /*
-LZ4_compressCtx() :
-	This function explicitly handles the CTX memory structure.
-	It avoids allocating/deallocating memory between each call, improving performance when malloc is heavily invoked.
-	This function is only useful when memory is allocated into the heap (HASH_LOG value beyond STACK_LIMIT)
-	Performance difference will be noticeable only when repetitively calling the compression function over many small segments.
-	Note : by default, memory is allocated into the stack, therefore "malloc" is not invoked.
-LZ4_compress64kCtx() :
-	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
-	isize *Must* be <64KB, otherwise the output will be corrupted.
-
-	On first call : provide a *ctx=NULL; It will be automatically allocated.
-	On next calls : reuse the same ctx pointer.
-	Use different pointers for different threads when doing multi-threading.
-
+LZ4_uncompress_unknownOutputSize() :
+    isize  : is the input size, therefore the compressed size
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+             If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
+             This function never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
+    note   : Destination buffer must be already allocated.
+             This version is slightly slower than LZ4_uncompress()
 */
 
 
diff --git a/src/lz4hc.c b/src/lz4hc.c
index cca755c..7fe78da 100644
--- a/src/lz4hc.c
+++ b/src/lz4hc.c
@@ -37,23 +37,39 @@
 //**************************************
 // 32 or 64 bits ?
 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) )   // Detects 64 bits mode
-#define LZ4_ARCH64 1
+#  define LZ4_ARCH64 1
 #else
-#define LZ4_ARCH64 0
+#  define LZ4_ARCH64 0
 #endif
 
-// Little Endian or Big Endian ? 
-#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) )
-#define LZ4_BIG_ENDIAN 1
+// Little Endian or Big Endian ?
+// Overwrite the #define below if you know your architecture endianess
+#if defined (__GLIBC__)
+#  include <endian.h>
+#  if (__BYTE_ORDER == __BIG_ENDIAN)
+#     define LZ4_BIG_ENDIAN 1
+#  endif
+#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
+#  define LZ4_BIG_ENDIAN 1
+#elif defined(__sparc) || defined(__sparc__) \
+   || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
+   || defined(__hpux)  || defined(__hppa) \
+   || defined(_MIPSEB) || defined(__s390__)
+#  define LZ4_BIG_ENDIAN 1
 #else
 // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
 #endif
 
 // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
 // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
-// If you know your target CPU supports unaligned memory access, you may want to force this option manually to improve performance
+// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
 #if defined(__ARM_FEATURE_UNALIGNED)
-#define LZ4_FORCE_UNALIGNED_ACCESS 1
+#  define LZ4_FORCE_UNALIGNED_ACCESS 1
+#endif
+
+// Define this parameter if your target system or compiler does not support hardware bit count
+#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
+#  define LZ4_FORCE_SW_BITCOUNT
 #endif
 
 
@@ -63,17 +79,32 @@
 #if __STDC_VERSION__ >= 199901L    // C99
   /* "restrict" is a known keyword */
 #else
-#define restrict  // Disable restrict
+#  define restrict  // Disable restrict
 #endif
 
 #ifdef _MSC_VER
-#define inline __forceinline    // Visual is not C99, but supports some kind of inline
+#  define inline __inline             // Visual is not C99, but supports some kind of inline
+#  define forceinline __forceinline   
+#  include <intrin.h>                 // For Visual 2005
+#  if LZ4_ARCH64	// 64-bit
+#    pragma intrinsic(_BitScanForward64) // For Visual 2005
+#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
+#  else
+#    pragma intrinsic(_BitScanForward)   // For Visual 2005
+#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
+#  endif
+#else 
+#  ifdef __GNUC__
+#    define forceinline inline __attribute__((always_inline))
+#  else
+#    define forceinline inline
+#  endif
 #endif
 
 #ifdef _MSC_VER  // Visual Studio
-#define bswap16(x) _byteswap_ushort(x)
+#define lz4_bswap16(x) _byteswap_ushort(x)
 #else
-#define bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+#define lz4_bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
 #endif
 
 
@@ -174,8 +205,8 @@ typedef struct _U64_S { U64 v; } U64_S;
 #endif
 
 #if defined(LZ4_BIG_ENDIAN)
-#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; }
-#define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; }
+#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
+#define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
 #else		// Little Endian
 #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
 #define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
@@ -187,24 +218,23 @@ typedef struct _U64_S { U64 v; } U64_S;
 //************************************************************
 typedef struct 
 {
-	const BYTE* base;
-	HTYPE hashTable[HASHTABLESIZE];
-	U16 chainTable[MAXD];
-	const BYTE* nextToUpdate;
+    const BYTE* base;
+    HTYPE hashTable[HASHTABLESIZE];
+    U16 chainTable[MAXD];
+    const BYTE* nextToUpdate;
 } LZ4HC_Data_Structure;
 
 
 //**************************************
 // Macros
 //**************************************
-#define LZ4_WILDCOPY(s,d,e)		do { LZ4_COPYPACKET(s,d) } while (d<e);
-#define LZ4_BLINDCOPY(s,d,l)	{ BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
-#define HASH_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
-#define HASH_VALUE(p)		HASH_FUNCTION(*(U32*)(p))
-#define HASH_POINTER(p)		(HashTable[HASH_VALUE(p)] + base)
-#define DELTANEXT(p)		chainTable[(size_t)(p) & MAXD_MASK] 
-#define GETNEXT(p)			((p) - (size_t)DELTANEXT(p))
-#define ADD_HASH(p)			{ size_t delta = (p) - HASH_POINTER(p); if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; DELTANEXT(p) = (U16)delta; HashTable[HASH_VALUE(p)] = (p) - base; }
+#define LZ4_WILDCOPY(s,d,e)    do { LZ4_COPYPACKET(s,d) } while (d<e);
+#define LZ4_BLINDCOPY(s,d,l)   { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
+#define HASH_FUNCTION(i)	   (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
+#define HASH_VALUE(p)		   HASH_FUNCTION(A32(p))
+#define HASH_POINTER(p)		   (HashTable[HASH_VALUE(p)] + base)
+#define DELTANEXT(p)		   chainTable[(size_t)(p) & MAXD_MASK] 
+#define GETNEXT(p)			   ((p) - (size_t)DELTANEXT(p))
 
 
 //**************************************
@@ -222,11 +252,11 @@ inline static int LZ4_NbCommonBytes (register U64 val)
     #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_clzll(val) >> 3); 
     #else
-	int r;
-	if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-	if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-	r += (!val);
-	return r;
+    int r;
+    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+    r += (!val);
+    return r;
     #endif
 #else
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -236,8 +266,8 @@ inline static int LZ4_NbCommonBytes (register U64 val)
     #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_ctzll(val) >> 3); 
     #else
-	static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-	return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
+    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+    return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
     #endif
 #endif
 }
@@ -248,27 +278,27 @@ inline static int LZ4_NbCommonBytes (register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
+    unsigned long r;
     _BitScanReverse( &r, val );
     return (int)(r>>3);
     #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_clz(val) >> 3); 
     #else
-	int r;
-	if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-	r += (!val);
-	return r;
+    int r;
+    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+    r += (!val);
+    return r;
     #endif
 #else
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
+    unsigned long r;
     _BitScanForward( &r, val );
     return (int)(r>>3);
     #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
     return (__builtin_ctz(val) >> 3); 
     #else
-	static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-	return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
     #endif
 #endif
 }
@@ -278,166 +308,211 @@ inline static int LZ4_NbCommonBytes (register U32 val)
 
 inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base)
 {
-	MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
-	MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
-	hc4->nextToUpdate = base + LZ4_ARCH64;
-	hc4->base = base;
-	return 1;
+    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+    hc4->nextToUpdate = base + LZ4_ARCH64;
+    hc4->base = base;
+    return 1;
 }
 
 
 inline static void* LZ4HC_Create (const BYTE* base)
 {
-	void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
+    void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
 
-	LZ4HC_Init (hc4, base);
-	return hc4;
+    LZ4HC_Init ((LZ4HC_Data_Structure*)hc4, base);
+    return hc4;
 }
 
 
 inline static int LZ4HC_Free (void** LZ4HC_Data)
 {
-	FREEMEM(*LZ4HC_Data);
-	*LZ4HC_Data = NULL;
-	return (1);
+    FREEMEM(*LZ4HC_Data);
+    *LZ4HC_Data = NULL;
+    return (1);
 }
 
 
-inline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+// Update chains up to ip (excluded)
+forceinline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
 {
-	U16*   chainTable = hc4->chainTable;
-	HTYPE* HashTable  = hc4->hashTable;
-	INITBASE(base,hc4->base);
-
-	while(hc4->nextToUpdate < ip)
-	{
-		ADD_HASH(hc4->nextToUpdate);
-		hc4->nextToUpdate++;
-	}
+    U16*   chainTable = hc4->chainTable;
+    HTYPE* HashTable  = hc4->hashTable;
+    INITBASE(base,hc4->base);
+
+    while(hc4->nextToUpdate < ip)
+    {
+        const BYTE* p = hc4->nextToUpdate;
+        size_t delta = (p) - HASH_POINTER(p); 
+        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; 
+        DELTANEXT(p) = (U16)delta; 
+        HashTable[HASH_VALUE(p)] = (p) - base;
+        hc4->nextToUpdate++;
+    }
 }
 
 
-inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
+forceinline static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
 {
-	U16* const chainTable = hc4->chainTable;
-	HTYPE* const HashTable = hc4->hashTable;
-	const BYTE* ref;
-	INITBASE(base,hc4->base);
-	int nbAttempts=MAX_NB_ATTEMPTS;
-	int ml=0;
-
-	// HC4 match finder
-	LZ4HC_Insert(hc4, ip);
-	ref = HASH_POINTER(ip);
-	while ((ref > (ip-MAX_DISTANCE)) && (nbAttempts))
-	{
-		nbAttempts--;
-		if (*(ref+ml) == *(ip+ml))
-		if (*(U32*)ref == *(U32*)ip)
-		{
-			const BYTE* reft = ref+MINMATCH;
-			const BYTE* ipt = ip+MINMATCH;
-
-			while (ipt<matchlimit-(STEPSIZE-1))
-			{
-				UARCH diff = AARCH(reft) ^ AARCH(ipt);
-				if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
-				ipt += LZ4_NbCommonBytes(diff);
-				goto _endCount;
-			}
-			if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
-			if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
-			if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
-_endCount:
+    const BYTE* p1t = p1;
+
+    while (p1t<matchlimit-(STEPSIZE-1))
+    {
+        UARCH diff = AARCH(p2) ^ AARCH(p1t);
+        if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
+        p1t += LZ4_NbCommonBytes(diff);
+        return (p1t - p1);
+    }
+    if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
+    if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
+    if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
+    return (p1t - p1);
+}
 
-			if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; }
-		}
-		ref = GETNEXT(ref);
-	}
 
-	return ml;
+forceinline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
+{
+    U16* const chainTable = hc4->chainTable;
+    HTYPE* const HashTable = hc4->hashTable;
+    const BYTE* ref;
+    INITBASE(base,hc4->base);
+    int nbAttempts=MAX_NB_ATTEMPTS;
+    size_t ml=0;
+
+    // HC4 match finder
+    LZ4HC_Insert(hc4, ip);
+    ref = HASH_POINTER(ip);
+
+#if 1
+    if (ref >= ip-4)               // potential repetition
+    {
+        if (A32(ref) == A32(ip))   // confirmed
+        {
+            const U16 delta = (U16)(ip-ref);
+            const BYTE* ptr = ip;
+            const BYTE* end;
+            ml  = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
+            end = ip + ml - (MINMATCH-1);
+            while(ptr < end-delta)
+            {
+                DELTANEXT(ptr) = delta;    // Pre-Load
+                ptr++;
+            }
+            do
+            {
+                DELTANEXT(ptr) = delta;    
+                HashTable[HASH_VALUE(ptr)] = (ptr) - base;     // Head of chain
+                ptr++;
+            } while(ptr < end);
+            hc4->nextToUpdate = end;
+            *matchpos = ref;
+        }
+        ref = GETNEXT(ref);
+    }
+#endif
+
+    while ((ref >= (ip-MAX_DISTANCE)) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (*(ref+ml) == *(ip+ml))
+        if (A32(ref) == A32(ip))
+        {
+            size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
+            if (mlt > ml) { ml = mlt; *matchpos = ref; }
+        }
+        ref = GETNEXT(ref);
+    }
+
+    return (int)ml;
 }
 
 
-inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
+forceinline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
 {
-	U16* const  chainTable = hc4->chainTable;
-	HTYPE* const HashTable = hc4->hashTable;
-	INITBASE(base,hc4->base);
-	const BYTE*  ref;
-	int nbAttempts = MAX_NB_ATTEMPTS;
-	int delta = ip-startLimit;
-
-	// First Match
-	LZ4HC_Insert(hc4, ip);
-	ref = HASH_POINTER(ip);
-
-	while ((ref > ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts))
-	{
-		nbAttempts--;
-		if (*(startLimit + longest) == *(ref - delta + longest))
-		if (*(U32*)ref == *(U32*)ip)
-		{
-			const BYTE* reft = ref+MINMATCH;
-			const BYTE* ipt = ip+MINMATCH;
-			const BYTE* startt = ip;
-
-			while (ipt<matchlimit-(STEPSIZE-1))
-			{
-				UARCH diff = AARCH(reft) ^ AARCH(ipt);
-				if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
-				ipt += LZ4_NbCommonBytes(diff);
-				goto _endCount;
-			}
-			if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
-			if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
-			if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
+    U16* const  chainTable = hc4->chainTable;
+    HTYPE* const HashTable = hc4->hashTable;
+    INITBASE(base,hc4->base);
+    const BYTE*  ref;
+    int nbAttempts = MAX_NB_ATTEMPTS;
+    int delta = (int)(ip-startLimit);
+
+    // First Match
+    LZ4HC_Insert(hc4, ip);
+    ref = HASH_POINTER(ip);
+
+    while ((ref >= ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (*(startLimit + longest) == *(ref - delta + longest))
+        if (A32(ref) == A32(ip))
+        {
+#if 1
+            const BYTE* reft = ref+MINMATCH;
+            const BYTE* ipt = ip+MINMATCH;
+            const BYTE* startt = ip;
+
+            while (ipt<matchlimit-(STEPSIZE-1))
+            {
+                UARCH diff = AARCH(reft) ^ AARCH(ipt);
+                if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
+                ipt += LZ4_NbCommonBytes(diff);
+                goto _endCount;
+            }
+            if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
+            if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
+            if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
 _endCount:
+            reft = ref;
+#else
+            // Easier for code maintenance, but unfortunately slower too
+            const BYTE* startt = ip;
+            const BYTE* reft = ref;
+            const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
+#endif
 
-			reft = ref;
-			while ((startt>startLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;}
+            while ((startt>startLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;}
 
-			if ((ipt-startt) > longest)
-			{
-				longest = ipt-startt;
-				*matchpos = reft;
-				*startpos = startt;
-			}
-		}
-		ref = GETNEXT(ref);
-	}
+            if ((ipt-startt) > longest)
+            {
+                longest = (int)(ipt-startt);
+                *matchpos = reft;
+                *startpos = startt;
+            }
+        }
+        ref = GETNEXT(ref);
+    }
 
-	return longest;
+    return longest;
 }
 
 
-inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref)
+forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref)
 {
-	int length, len; 
-	BYTE* token;
+    int length, len; 
+    BYTE* token;
 
-	// Encode Literal length
-	length = *ip - *anchor;
-	token = (*op)++;
-	if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
-	else *token = (length<<ML_BITS);
+    // Encode Literal length
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
+    else *token = (length<<ML_BITS);
 
-	// Copy Literals
-	LZ4_BLINDCOPY(*anchor, *op, length);
+    // Copy Literals
+    LZ4_BLINDCOPY(*anchor, *op, length);
 
-	// Encode Offset
-	LZ4_WRITE_LITTLEENDIAN_16(*op,*ip-ref);
+    // Encode Offset
+    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
 
-	// Encode MatchLength
-	len = (int)(ml-MINMATCH);
-	if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } 
-	else *token += len;	
+    // Encode MatchLength
+    len = (int)(ml-MINMATCH);
+    if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } 
+    else *token += len;	
 
-	// Prepare next loop
-	*ip += ml;
-	*anchor = *ip; 
+    // Prepare next loop
+    *ip += ml;
+    *anchor = *ip; 
 
-	return 0;
+    return 0;
 }
 
 
@@ -446,218 +521,218 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
 //****************************
 
 int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx,
-				 const char* source, 
-				 char* dest,
-				 int isize)
+                 const char* source, 
+                 char* dest,
+                 int isize)
 {	
-	const BYTE* ip = (const BYTE*) source;
-	const BYTE* anchor = ip;
-	const BYTE* const iend = ip + isize;
-	const BYTE* const mflimit = iend - MFLIMIT;
-	const BYTE* const matchlimit = (iend - LASTLITERALS);
-
-	BYTE* op = (BYTE*) dest;
-
-	int	ml, ml2, ml3, ml0;
-	const BYTE* ref=NULL;
-	const BYTE* start2=NULL;
-	const BYTE* ref2=NULL;
-	const BYTE* start3=NULL;
-	const BYTE* ref3=NULL;
-	const BYTE* start0;
-	const BYTE* ref0;
-
-	ip++;
-
-	// Main Loop
-	while (ip < mflimit)
-	{
-		ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
-		if (!ml) { ip++; continue; }
-
-		// saved, in case we would skip too much
-		start0 = ip;
-		ref0 = ref;
-		ml0 = ml;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + isize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+
+    int	ml, ml2, ml3, ml0;
+    const BYTE* ref=NULL;
+    const BYTE* start2=NULL;
+    const BYTE* ref2=NULL;
+    const BYTE* start3=NULL;
+    const BYTE* ref3=NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+    ip++;
+
+    // Main Loop
+    while (ip < mflimit)
+    {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
+        if (!ml) { ip++; continue; }
+
+        // saved, in case we would skip too much
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
 
 _Search2:
-		if (ip+ml < mflimit)
-			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
-		else ml2=ml;
-
-		if (ml2 == ml)  // No better match
-		{
-			LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-			continue;
-		}
-
-		if (start0 < ip)
-		{
-			if (start2 < ip + ml0)   // empirical
-			{
-				ip = start0;
-				ref = ref0;
-				ml = ml0;
-			}
-		}
-
-		// Here, start0==ip
-		if ((start2 - ip) < 3)   // First Match too small : removed
-		{
-			ml = ml2;
-			ip = start2;
-			ref =ref2;
-			goto _Search2;
-		}
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
+        else ml2=ml;
+
+        if (ml2 == ml)  // No better match
+        {
+            LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+            continue;
+        }
+
+        if (start0 < ip)
+        {
+            if (start2 < ip + ml0)   // empirical
+            {
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        // Here, start0==ip
+        if ((start2 - ip) < 3)   // First Match too small : removed
+        {
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
 
 _Search3:
-		// Currently we have :
-		// ml2 > ml1, and
-		// ip1+3 <= ip2 (usually < ip1+ml1)
-		if ((start2 - ip) < OPTIMAL_ML)
-		{
-			int correction;
-			int new_ml = ml;
-			if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
-			if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH;
-			correction = new_ml - (start2 - ip);
-			if (correction > 0)
-			{
-				start2 += correction;
-				ref2 += correction;
-				ml2 -= correction;
-			}
-		}
-		// Now, we have start2 = ip+new_ml, with new_ml=min(ml, OPTIMAL_ML=18)
-
-		if (start2 + ml2 < mflimit)
-			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
-		else ml3=ml2;
-
-		if (ml3 == ml2) // No better match : 2 sequences to encode
-		{
-			// ip & ref are known; Now for ml
-			if (start2 < ip+ml)
-			{
-				if ((start2 - ip) < OPTIMAL_ML)
-				{
-					int correction;
-					if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
-					if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
-					correction = ml - (start2 - ip);
-					if (correction > 0)
-					{
-						start2 += correction;
-						ref2 += correction;
-						ml2 -= correction;
-					}
-				}
-				else
-				{
-					ml = start2 - ip;
-				}
-			}
-			// Now, encode 2 sequences
-			LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-			ip = start2;
-			LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2);
-			continue;
-		}
-
-		if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
-		{
-			if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
-			{
-				if (start2 < ip+ml)
-				{
-					int correction = (ip+ml) - start2;
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-					if (ml2 < MINMATCH)
-					{
-						start2 = start3;
-						ref2 = ref3;
-						ml2 = ml3;
-					}
-				}
-
-				LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
-				ref = ref3;
-				ml  = ml3;
-
-				start0 = start2;
-				ref0 = ref2;
-				ml0 = ml2;
-				goto _Search2;
-			}
-
-			start2 = start3;
-			ref2 = ref3;
-			ml2 = ml3;
-			goto _Search3;
-		}
-
-		// OK, now we have 3 ascending matches; let's write at least the first one
-		// ip & ref are known; Now for ml
-		if (start2 < ip+ml)
-		{
-			if ((start2 - ip) < (int)ML_MASK)
-			{
-				int correction;
-				if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
-				if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
-				correction = ml - (start2 - ip);
-				if (correction > 0)
-				{
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-				}
-			}
-			else
-			{
-				ml = start2 - ip;
-			}
-		}
-		LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-
-		ip = start2;
-		ref = ref2;
-		ml = ml2;
-
-		start2 = start3;
-		ref2 = ref3;
-		ml2 = ml3;
-
-		goto _Search3;
-
-	}
-
-	// Encode Last Literals
-	{
-		int lastRun = iend - anchor;
-		if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
-		else *op++ = (lastRun<<ML_BITS);
-		memcpy(op, anchor, iend - anchor);
-		op += iend-anchor;
-	} 
-
-	// End
-	return (int) (((char*)op)-dest);
+        // Currently we have :
+        // ml2 > ml1, and
+        // ip1+3 <= ip2 (usually < ip1+ml1)
+        if ((start2 - ip) < OPTIMAL_ML)
+        {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0)
+            {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        // Now, we have start2 = ip+new_ml, with new_ml=min(ml, OPTIMAL_ML=18)
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
+        else ml3=ml2;
+
+        if (ml3 == ml2) // No better match : 2 sequences to encode
+        {
+            // ip & ref are known; Now for ml
+            if (start2 < ip+ml)
+            {
+                if ((start2 - ip) < OPTIMAL_ML)
+                {
+                    int correction;
+                    if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                    if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                    correction = ml - (int)(start2 - ip);
+                    if (correction > 0)
+                    {
+                        start2 += correction;
+                        ref2 += correction;
+                        ml2 -= correction;
+                    }
+                }
+                else
+                {
+                    ml = (int)(start2 - ip);
+                }
+            }
+            // Now, encode 2 sequences
+            LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+            ip = start2;
+            LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2);
+            continue;
+        }
+
+        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
+        {
+            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
+            {
+                if (start2 < ip+ml)
+                {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH)
+                    {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        // OK, now we have 3 ascending matches; let's write at least the first one
+        // ip & ref are known; Now for ml
+        if (start2 < ip+ml)
+        {
+            if ((start2 - ip) < (int)ML_MASK)
+            {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0)
+                {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            }
+            else
+            {
+                ml = (int)(start2 - ip);
+            }
+        }
+        LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+
+    }
+
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
+        else *op++ = (lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    } 
+
+    // End
+    return (int) (((char*)op)-dest);
 }
 
 
 int LZ4_compressHC(const char* source, 
-				 char* dest,
-				 int isize)
+                 char* dest,
+                 int isize)
 {
-	void* ctx = LZ4HC_Create((const BYTE*)source);
-	int result = LZ4_compressHCCtx(ctx, source, dest, isize);
-	LZ4HC_Free (&ctx);
+    void* ctx = LZ4HC_Create((const BYTE*)source);
+    int result = LZ4_compressHCCtx(ctx, source, dest, isize);
+    LZ4HC_Free (&ctx);
 
-	return result;
+    return result;
 }