From 1521c9697c64ffbcbe4a31aabcd13d182d330f42 Mon Sep 17 00:00:00 2001 From: Olly Date: Wed, 28 Feb 2024 22:46:32 +0000 Subject: [PATCH] add a fast way to compress/decompress images using SynLZ Also include the asm variants of SynLZCompress --- Source/Simba.lpi | 6 +- Source/image/simba.image.pas | 7 +- Source/image/simba.image_fastcompress.pas | 82 ++ Source/image/simba.image_utils.pas | 5 + .../script/imports/simba.import_encoding.pas | 6 +- Source/script/imports/simba.import_image.pas | 15 +- Source/simba.compress.pas | 7 +- Third-Party/synlz.pas | 944 +++++++++++------- 8 files changed, 696 insertions(+), 376 deletions(-) create mode 100644 Source/image/simba.image_fastcompress.pas diff --git a/Source/Simba.lpi b/Source/Simba.lpi index f412832c6..ccf8723be 100644 --- a/Source/Simba.lpi +++ b/Source/Simba.lpi @@ -380,7 +380,7 @@ - + @@ -960,6 +960,10 @@ + + + + diff --git a/Source/image/simba.image.pas b/Source/image/simba.image.pas index 193439027..9fca8f6bc 100644 --- a/Source/image/simba.image.pas +++ b/Source/image/simba.image.pas @@ -24,7 +24,6 @@ interface TSimbaImageLineStarts = array of PColorBGRA; - PSimbaImage = ^TSimbaImage; TSimbaImage = class(TSimbaBaseClass) protected FWidth: Integer; @@ -254,14 +253,16 @@ TSimbaImage = class(TSimbaBaseClass) function PixelDifferenceTPA(Other: TSimbaImage): TPointArray; overload; function PixelDifferenceTPA(Other: TSimbaImage; Tolerance: Single): TPointArray; overload; end; - TSimbaImageArray = array of TSimbaImage; + PSimbaImage = ^TSimbaImage; + PSimbaImageArray = ^TSimbaImageArray; + implementation uses Math, FPImage, - simba.files, simba.zip, simba.box, simba.quad, simba.geometry, simba.nativeinterface, + simba.zip, simba.box, simba.quad, simba.geometry, simba.nativeinterface, simba.matrix_float, simba.matrix_int, simba.array_point, simba.arraybuffer, simba.algo_sort, simba.image_lazbridge, simba.image_integral, simba.image_gaussblur, simba.image_bitmaparealoader, simba.image_stringconv; diff --git a/Source/image/simba.image_fastcompress.pas b/Source/image/simba.image_fastcompress.pas new file mode 100644 index 000000000..51324fef4 --- /dev/null +++ b/Source/image/simba.image_fastcompress.pas @@ -0,0 +1,82 @@ +{ + Author: Raymond van Venetië and Merlijn Wajer + Project: Simba (https://github.com/MerlijnWajer/Simba) + License: GNU General Public License (https://www.gnu.org/licenses/gpl-3.0) + -------------------------------------------------------------------------- + + Compress images fast using SynLZ. +} +unit simba.image_fastcompress; + +{$i simba.inc} + +interface + +uses + Classes, SysUtils, + simba.base, simba.image; + +procedure SimbaImage_FastCompress(Images: TSimbaImageArray; var Data: Pointer; out DataSize: SizeUInt); +function SimbaImage_FastDeCompress(Data: Pointer; DataLen: SizeUInt): TSimbaImageArray; + +implementation + +uses + SynLZ; + +type + PImageHeader = ^TImageHeader; + TImageHeader = packed record + Size: SizeUInt; // compressed size in bytes + Width, Height: Integer; + end; + +procedure SimbaImage_FastCompress(Images: TSimbaImageArray; var Data: Pointer; out DataSize: SizeUInt); +var + TotalSize: SizeUInt; + I: Integer; + Ptr: PByte; +begin + DataSize := 0; + TotalSize := SizeOf(Integer) + (Length(Images) * SizeOf(TImageHeader)); + for I := 0 to High(Images) do + Inc(TotalSize, Images[I].DataSize); + + if (Data = nil) or (MemSize(Data) < SynLZcompressdestlen(TotalSize)) then + ReAllocMem(Data, SynLZcompressdestlen(TotalSize)); + + Ptr := Data; + PInteger(Ptr)^ := Length(Images); + Inc(Ptr, SizeOf(Integer) + (Length(Images) * SizeOf(TImageHeader))); + + for I := 0 to High(Images) do + with PImageHeader(Data + SizeOf(Integer) + (I * SizeOf(TImageHeader)))^ do + begin + Width := Images[I].Width; + Height := Images[I].Height; + Size := SynLZcompress(PByte(Images[I].Data), Images[I].DataSize, Ptr); + Inc(Ptr, Size); + Inc(DataSize, Size); + end; +end; + +function SimbaImage_FastDeCompress(Data: Pointer; DataLen: SizeUInt): TSimbaImageArray; +var + Ptr: PByte; + I: Integer; +begin + Ptr := Data; + SetLength(Result, PInteger(Ptr)^); + Inc(Ptr, SizeOf(Integer) + (Length(Result) * SizeOf(TImageHeader))); + + for I := 0 to High(Result) do + with PImageHeader(Data + SizeOf(Integer) + (I * SizeOf(TImageHeader)))^ do + begin + Result[I] := TSimbaImage.Create(Width, Height); + SynLZdecompress(Ptr, Size, PByte(Result[I].Data)); + Inc(Ptr, Size); + end; +end; + +end. + diff --git a/Source/image/simba.image_utils.pas b/Source/image/simba.image_utils.pas index cdd643036..5ff80c480 100644 --- a/Source/image/simba.image_utils.pas +++ b/Source/image/simba.image_utils.pas @@ -1,3 +1,8 @@ +{ + Author: Raymond van Venetië and Merlijn Wajer + Project: Simba (https://github.com/MerlijnWajer/Simba) + License: GNU General Public License (https://www.gnu.org/licenses/gpl-3.0) +} unit simba.image_utils; {$i simba.inc} diff --git a/Source/script/imports/simba.import_encoding.pas b/Source/script/imports/simba.import_encoding.pas index 445862b2e..564fc0d20 100644 --- a/Source/script/imports/simba.import_encoding.pas +++ b/Source/script/imports/simba.import_encoding.pas @@ -225,7 +225,7 @@ procedure _LapeZDecompressString(const Params: PParamArray; const Result: Pointe *) procedure _LapeSynLZCompress(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV begin - PInteger(Result)^ := SynLZcompress1pas(PPAnsiChar(Params^[0])^, PInteger(Params^[1])^, PPAnsiChar(Params^[2])^); + PInteger(Result)^ := SynLZcompress(PPointer(Params^[0])^, PInteger(Params^[1])^, PPointer(Params^[2])^); end; (* @@ -235,7 +235,7 @@ procedure _LapeSynLZCompress(const Params: PParamArray; const Result: Pointer); *) procedure _LapeSynLZDecompress(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV begin - PInteger(Result)^ := SynLZdecompress1pas(PPAnsiChar(Params^[0])^, PInteger(Params^[1])^, PPAnsiChar(Params^[2])^); + PInteger(Result)^ := SynLZdecompress(PPointer(Params^[0])^, PInteger(Params^[1])^, PPointer(Params^[2])^); end; (* @@ -255,7 +255,7 @@ procedure _LapeSynLZCompressDestLen(const Params: PParamArray; const Result: Poi *) procedure _LapeSynLZDecompressDestLen(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV begin - PInteger(Result)^ := SynLZdecompressdestlen(PPAnsiChar(Params^[0])^); + PInteger(Result)^ := SynLZdecompressdestlen(PPointer(Params^[0])^); end; procedure _LapeLZCompressionThread_Create(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV diff --git a/Source/script/imports/simba.import_image.pas b/Source/script/imports/simba.import_image.pas index 5e25841ee..35513fe27 100644 --- a/Source/script/imports/simba.import_image.pas +++ b/Source/script/imports/simba.import_image.pas @@ -15,7 +15,7 @@ implementation uses Graphics, lptypes, - simba.image, simba.image_textdrawer; + simba.image, simba.image_textdrawer, simba.image_fastcompress; type PBitmap = ^TBitmap; @@ -1254,6 +1254,16 @@ procedure _LapeImage_DrawCircleAA(const Params: PParamArray); LAPE_WRAPPER_CALLI PSimbaImage(Params^[0])^.DrawCircleAA(PPoint(Params^[1])^, Pinteger(Params^[2])^, PColor(Params^[3])^, PSingle(Params^[4])^); end; +procedure _LapeFastCompressImages(const Params: PParamArray); LAPE_WRAPPER_CALLING_CONV +begin + SimbaImage_FastCompress(TSimbaImageArray(Params^[0]^), PPointer(Params^[1])^, PSizeUInt(Params^[2])^); +end; + +procedure _LapeFastDeCompressImages(const Params: PParamArray; const Result: Pointer); LAPE_WRAPPER_CALLING_CONV +begin + TSimbaImageArray(Result^) := SimbaImage_FastDeCompress(PPointer(Params^[0])^, PSizeUInt(Params^[1])^); +end; + (* TImage.Finder ------------- @@ -1466,6 +1476,9 @@ procedure ImportSimbaImage(Compiler: TSimbaScript_Compiler); addGlobalFunc('procedure TImage.DrawEllipseAA(ACenter: TPoint; XRadius, YRadius: Integer; Color: TColor; Thickness: Single = 1.5);', @_LapeImage_DrawEllipseAA); addGlobalFunc('procedure TImage.DrawCircleAA(ACenter: TPoint; Radius: Integer; Color: TColor; Thickness: Single = 1.5);', @_LapeImage_DrawCircleAA); + addGlobalFunc('procedure FastCompressImages(Images: TImageArray; var Data: Pointer; out DataSize: SizeUInt);', @_LapeFastCompressImages); + addGlobalFunc('function FastDeCompressImages(Data: Pointer; out DataLen: SizeUInt): TImageArray', @_LapeFastDeCompressImages); + ImportingSection := ''; end; end; diff --git a/Source/simba.compress.pas b/Source/simba.compress.pas index 2979b2064..e4dd701a2 100644 --- a/Source/simba.compress.pas +++ b/Source/simba.compress.pas @@ -1,3 +1,8 @@ +{ + Author: Raymond van Venetië and Merlijn Wajer + Project: Simba (https://github.com/MerlijnWajer/Simba) + License: GNU General Public License (https://www.gnu.org/licenses/gpl-3.0) +} unit simba.compress; {$i simba.inc} @@ -138,7 +143,7 @@ function TCompressingStream.Write(const Buffer; Count: LongInt): LongInt; Capacity := NeededCapacity; TimeUsed := HighResolutionTime(); - Result := SynLZcompress1pas(Pointer(Buffer), Count, Self.Memory); + Result := SynLZcompress(Pointer(Buffer), Count, Self.Memory); TimeUsed := HighResolutionTime() - TimeUsed; end; diff --git a/Third-Party/synlz.pas b/Third-Party/synlz.pas index 6ee809fa4..a684f62ef 100644 --- a/Third-Party/synlz.pas +++ b/Third-Party/synlz.pas @@ -1,8 +1,4 @@ -/// SynLZ Compression routines -// - licensed under a MPL/GPL/LGPL tri-license; version 1.18 -unit SynLZ; - -{$mode objfpc}{$H+} +// https://github.com/synopse/mORMot2 { This file is part of Synopse SynLZ Compression. @@ -118,6 +114,9 @@ SynLZ uncompress in 61.27ms, 1.5 GB/s } +unit SynLZ; + +{$mode objfpc}{$H+} interface @@ -125,324 +124,141 @@ interface function SynLZcompressdestlen(in_len: integer): integer; /// get uncompressed size from lz-compressed buffer (to reserve memory, e.g.) -function SynLZdecompressdestlen(in_p: PAnsiChar): integer; +function SynLZdecompressdestlen(in_p: PByte): integer; /// 1st compression algorithm uses hashing with a 32bits control word -function SynLZcompress1pas(src: PAnsiChar; size: integer; dst: PAnsiChar): integer; +function SynLZcompress(src: PByte; size: integer; dst: PByte): integer; /// 1st compression algorithm uses hashing with a 32bits control word // - this is the fastest pure pascal implementation -function SynLZdecompress1pas(src: PAnsiChar; size: integer; dst: PAnsiChar): integer; - -/// 1st compression algorithm uses hashing with a 32bits control word -// - this overload function is slower, but will allow to uncompress only the start -// of the content (e.g. to read some metadata header) -// - it will also check for dst buffer overflow, so will be more secure than -// other functions, which expect the content to be verified (e.g. via CRC) -function SynLZdecompress1partial(src: PAnsiChar; size: integer; dst: PAnsiChar; maxDst: integer): integer; +function SynLZdecompress(src: PByte; size: integer; dst: PByte): integer; implementation +// disabled some FPC paranoid warnings +{$WARN 7119 off : Exported/global symbols should be accessed via the GOT } +{$WARN 7121 off : Check size of memory operand "$1: memory-operand-size is $2 bits, but expected [$3 bits]" } +{$WARN 7122 off : Check size of memory operand "$1: memory-operand-size is $2 bits, but expected [$3 bits + $4 byte offset]" } +{$WARN 7123 off : Check "$1: offset of memory operand is negative "$2 byte" } + {$R-} {$Q-} +procedure MoveByOne(Source, Dest: Pointer; Count: PtrUInt); inline; +var + c: Byte; // better code generation on FPC +begin + inc(PtrUInt(Source), Count); + inc(PtrUInt(Dest), Count); + PtrInt(Count) := -PtrInt(Count); + repeat + c := PByte(Source)[Count]; + PByte(Dest)[Count] := c; + inc(Count); + until Count = 0; +end; + function SynLZcompressdestlen(in_len: integer): integer; begin // get maximum possible (worse) compressed size for out_p result := in_len+in_len shr 3+16; end; type // some cross-platform and cross-compiler definitions - TOffsets = array[0..4095] of PAnsiChar; // 16KB/32KB hashing code + TOffsets = array[0..4095] of PByte; // 16KB/32KB hashing code -function SynLZdecompressdestlen(in_p: PAnsiChar): integer; +function SynLZdecompressdestlen(in_p: PByte): integer; begin // get uncompressed size from lz-compressed buffer (to reserve memory, e.g.) result := PWord(in_p)^; if result and $8000<>0 then result := (result and $7fff) or (integer(PWord(in_p+2)^) shl 15); end; -function SynLZcompress1pas(src: PAnsiChar; size: integer; dst: PAnsiChar): integer; -var dst_beg, // initial dst value - src_end, // real last byte available in src - src_endmatch, // last byte to try for hashing - o: PAnsiChar; - CWbit: byte; - CWpoint: PCardinal; - v, h, cached, t, tmax: PtrUInt; - offset: TOffsets; - cache: array[0..4095] of cardinal; // 16KB+16KB=32KB on stack (48KB under Win64) +procedure SynLZdecompress1passub(src, src_end, dst: PByte; var offset: TOffsets); +var + last_hashed: PByte; // initial src and dst value + {$ifdef CPU64} + o: PByte; + {$endif CPU64} + CW, CWbit: cardinal; + v, t, h: PtrUInt; +label + nextCW; begin - dst_beg := dst; - // 1. store in_len - if size>=$8000 then begin // size in 32KB..2GB -> stored as integer - PWord(dst)^ := $8000 or (size and $7fff); - PWord(dst+2)^ := size shr 15; - inc(dst,4); - end else begin - PWord(dst)^ := size ; // size<32768 -> stored as word - if size=0 then begin - result := 2; - exit; - end; - inc(dst,2); - end; - // 2. compress - src_end := src+size; - src_endmatch := src_end-(6+5); - CWbit := 0; - CWpoint := pointer(dst); - PCardinal(dst)^ := 0; - inc(dst,sizeof(CWpoint^)); - fillchar(offset{%H-},sizeof(offset),0); // fast 16KB reset to 0 - // 1. main loop to search using hash[] - if src<=src_endmatch then - repeat - v := PCardinal(src)^; - h := ((v shr 12) xor v) and 4095; - o := offset[h]; - offset[h] := src; - cached := v xor {%H-}cache[h]; // o=nil if cache[h] is uninitialized - cache[h] := v; - if (cached and $00ffffff=0) and (o<>nil) and (src-o>2) then begin - CWpoint^ := CWpoint^ or (cardinal(1) shl CWbit); - inc(src,2); - inc(o,2); - t := 1; - tmax := src_end-src-1; - if tmax>=(255+16) then - tmax := (255+16); - while (o[t]=src[t]) and (t0 - if t<=15 then begin // mark 2 to 17 bytes -> size=1..15 - PWord(dst)^ := integer(t or h); - inc(dst,2); - end else begin // mark 18 to (255+16) bytes -> size=0, next byte=t - dec(t,16); - PWord(dst)^ := h; // size=0 - dst[2] := ansichar(t); - inc(dst,3); - end; - end else begin - dst^ := src^; - inc(src); - inc(dst); - end; - if CWbit<31 then begin - inc(CWbit); - if src<=src_endmatch then continue else break; - end else begin - CWpoint := pointer(dst); - PCardinal(dst)^ := 0; - inc(dst,sizeof(CWpoint^)); - CWbit := 0; - if src<=src_endmatch then continue else break; - end; - until false; - // 2. store remaining bytes - if src0 then begin - result := (result and $7fff) or (integer(PWord(src)^) shl 15); - inc(src,2); - end; - // 2. decompress - last_hashed := dst-1; - CWbit := 32; + last_hashed := dst - 1; nextCW: CW := PCardinal(src)^; - inc(src,4); - CWbit := CWbit-32; - if src=src_end then break; - while last_hashed=src_end then break; - if last_hashed= src_end then + break; + if last_hashed < dst - 3 then + begin inc(last_hashed); v := PCardinal(last_hashed)^; offset[((v shr 12) xor v) and 4095] := last_hashed; end; - inc(CWbit); - CW := CW shr 1; - if CWbit<32 then - continue else + CWbit := CWbit shl 1; + if CWbit <> 0 then + continue + else + goto nextCW; + end + else + begin + h := PWord(src)^; + inc(src, 2); + t := (h and 15) + 2; + if t = 2 then + begin + t := ord(src^) + (16 + 2); + inc(src); + end; + h := h shr 4; + {$ifdef CPU64} + o := offset[h]; + if PtrUInt(dst - o) < t then // overlap -> move byte-by-byte + MoveByOne(o, dst, t) + else if t <= 8 then + PInt64(dst)^ := PInt64(o)^ // much faster in practice + else + Move(o^, dst^, t); // safe since src_endmatch := src_end-(6+5) + {$else} + if PtrUInt(dst - offset[h]) < t then + MoveByOne(offset[h], dst, t) + else if t > 8 then + Move(offset[h]^, dst^, t) + else + PInt64(dst)^ := PInt64(offset[h])^; + {$endif CPU64} + if src >= src_end then + break; + if last_hashed < dst then + repeat // decompressed bytes should update the hash table + inc(last_hashed); + v := PCardinal(last_hashed)^; + offset[((v shr 12) xor v) and 4095] := last_hashed; + until last_hashed >= dst; + inc(dst, t); + last_hashed := dst - 1; + CWbit := CWbit shl 1; + if CWbit <> 0 then + continue + else goto nextCW; end; - end else begin - h := PWord(src)^; - inc(src,2); - t := (h and 15)+2; - h := h shr 4; - if t=2 then begin - t := ord(src^)+(16+2); - inc(src); - end; - if dst-{%H-}offset[h]=dst; - inc(dst,t); - if src>=src_end then break; - last_hashed := dst-1; - inc(CWbit); - CW := CW shr 1; - if CWbit<32 then - continue else - goto nextCW; - end; - until false; -// assert(result=dst-dst_beg); -end; - -// better code generation with sub-functions for raw decoding -procedure SynLZdecompress1passub(src, src_end, dst: PAnsiChar; var offset: TOffsets); -var last_hashed: PAnsiChar; // initial src and dst value - {$ifdef CPU64} - o: PAnsiChar; - {$endif} - CW, CWbit: cardinal; - v, t, h: PtrUInt; -label nextCW; -begin - last_hashed := dst-1; -nextCW: - CW := PCardinal(src)^; - inc(src,4); - CWbit := 1; - if src=src_end then break; - if last_hashed0 then - continue else - goto nextCW; - end else begin - h := PWord(src)^; - inc(src,2); - t := (h and 15)+2; - h := h shr 4; - if t=2 then begin - t := ord(src^)+(16+2); - inc(src); - end; - {$ifdef CPU64} - o := offset[h]; - if PtrUInt(dst-o)8 then // safe since src_endmatch := src_end-(6+5) - move(offset[h]^,dst^,t) else - PInt64(dst)^ := PInt64(offset[h])^; // much faster in practice - {$endif} - if src>=src_end then break; - if last_hashed=dst; - inc(dst,t); - last_hashed := dst-1; - CWbit := CWbit shl 1; - if CWbit<>0 then - continue else - goto nextCW; - end; - until false; + until false; end; -function SynLZdecompress1pas(src: PAnsiChar; size: integer; dst: PAnsiChar): integer; +function SynLZdecompress(src: PByte; size: integer; dst: PByte): integer; var offset: TOffsets; - src_end: PAnsiChar; + src_end: PByte; begin src_end := src+size; result := PWord(src)^; @@ -455,94 +271,488 @@ function SynLZdecompress1pas(src: PAnsiChar; size: integer; dst: PAnsiChar): int SynLZdecompress1passub(src, src_end, dst, offset{%H-}); end; -procedure SynLZdecompress1partialsub(src, dst, src_end, dst_end: PAnsiChar; var offset: TOffsets); -var last_hashed: PAnsiChar; // initial src and dst value - CWbit, CW: integer; - v, t, h: PtrUInt; - {$ifdef CPU64} - o: PAnsiChar; - {$endif} -label nextCW; +{$IFDEF CPUX86} +{$DEFINE HasSynLZCompress} +{$ASMMODE INTEL} +function SynLZcompress(src: PByte; size: integer; dst: PByte): integer; nostackframe; assembler; +asm + push ebp + push ebx + push esi + push edi + push eax + add esp, -4092 + push eax + add esp, -4092 + push eax + add esp, -4092 + push eax + add esp, -4092 + push eax + add esp, -4092 + push eax + add esp, -4092 + push eax + add esp, -4092 + push eax + add esp, -4092 + push eax + add esp, -32 + mov esi, eax // esi=src + mov edi, ecx // edi=dst + mov [esp+08H], ecx + mov eax, edx + cmp eax, 32768 + jl @@0889 + or ax, 8000H + mov [edi], eax + mov eax, edx + shr eax, 15 + mov [edi + 2], eax + add edi, 4 + jmp @@0891 +@@0890: mov eax, 2 + jmp @@0904 +@@0889: mov [edi], eax + test eax, eax + jz @@0890 + add edi, 2 +@@0891: lea eax, [edx + esi] + mov [esp+18H], edi + mov [esp+0CH], eax + sub eax, 11 + mov [esp+4], eax + xor eax, eax + lea ebx, [esp+24H] // reset offsets lookup table + {$ifdef HASNOSSE2} + mov ecx, 1024 +@@089I: mov [ebx], eax + mov [ebx + 4], eax + mov [ebx + 8], eax + mov [ebx + 12], eax + add ebx, 16 + {$else} + pxor xmm0, xmm0 + mov ecx, 256 +@@089I: movups dqword ptr [ebx], xmm0 + movups dqword ptr [ebx + 16], xmm0 + movups dqword ptr [ebx + 32], xmm0 + movups dqword ptr [ebx + 48], xmm0 + add ebx, 64 + {$endif HASNOSSE2} + dec ecx + jnz @@089I + mov [edi], eax + add edi, 4 + mov ebx, 1 // ebx=1 shl CWbit + // main loop: + cmp esi, [esp+4] + ja @@0900 +@@0892: mov edx, [esi] + mov eax, edx + shr edx, 12 + xor edx, eax + and edx, 0FFFH + mov ebp, [esp+24H + edx * 4] + mov ecx, [esp+4024H + edx * 4] + mov [esp+24H + edx * 4], esi + xor ecx, eax + test ecx, 0FFFFFFH + mov [esp+4024H + edx * 4], eax + jnz @@0897 + mov eax, esi + or ebp, ebp + jz @@0897 + sub eax, ebp + mov ecx, [esp+18H] + cmp eax, 2 + jle @@0897 + add esi, 2 + or dword ptr [ecx], ebx + mov ecx, [esp+0CH] + add ebp, 2 + mov eax, 1 + sub ecx, esi + dec ecx + mov [esp], ecx + cmp ecx, 271 + jl @@0894 + mov dword ptr [esp], 271 + jmp @@0894 +@@0893: inc eax +@@0894: mov ecx, [ebp + eax] + cmp cl, [esi + eax] + jnz @@0895 + cmp eax, [esp] + jge @@0895 + inc eax + cmp ch, [esi + eax] + jnz @@0895 + shr ecx, 16 + cmp eax, [esp] + jge @@0895 + inc eax + cmp cl, [esi + eax] + jnz @@0895 + cmp eax, [esp] + jge @@0895 + inc eax + cmp ch, [esi + eax] + jnz @@0895 + cmp eax, [esp] + jl @@0893 +@@0895: add esi, eax + shl edx, 4 + cmp eax, 15 + jg @@0896 + or eax, edx + mov word ptr [edi], ax + add edi, 2 + jmp @@0898 +@@0896: sub eax, 16 + mov [edi], dx + mov [edi + 2H], al + add edi, 3 + jmp @@0898 +@@0897: mov al, [esi] // movsb is actually slower! + mov [edi], al + inc esi + inc edi +@@0898: add ebx, ebx + jz @@0899 + cmp esi, [esp+4] + jbe @@0892 + jmp @@0900 +@@0899: mov [esp+18H], edi + mov [edi], ebx + inc ebx + add edi, 4 + cmp esi, [esp+4] + jbe @@0892 +@@0900: cmp esi, [esp+0CH] + jnc @@0903 +@@0901: mov al, [esi] + mov [edi], al + inc esi + inc edi + add ebx, ebx + jz @@0902 + cmp esi, [esp+0CH] + jc @@0901 + jmp @@0903 +@@0902: mov [edi], ebx + inc ebx + add edi, 4 + cmp esi, [esp+0CH] + jc @@0901 +@@0903: mov eax, edi + sub eax, [esp+08H] +@@0904: add esp, 32804 + pop edi + pop esi + pop ebx + pop ebp +end; +{$ENDIF} + +{$IFDEF CPUX86_64} +{$DEFINE HasSynLZCompress} +{$ASMMODE INTEL} +function SynLZcompress(src: PByte; size: integer; dst: PByte): integer; +var + off: TOffsets; + cache: array[0..4095] of cardinal; // uses 32KB+16KB=48KB on stack begin - last_hashed := dst-1; -nextCW: - CW := PCardinal(src)^; - inc(src,4); - CWbit := 1; - if src= $8000 then + begin + // size in 32KB..2GB -> stored as integer + PWord(dst)^ := $8000 or (size and $7fff); + PWord(dst + 2)^ := size shr 15; + inc(dst, 4); + end + else + begin + PWord(dst)^ := size; // size<32768 -> stored as word + if size = 0 then + begin + result := 2; + exit; + end; + inc(dst, 2); + end; + // 2. compress + src_end := src + size; + src_endmatch := src_end - (6 + 5); + CWbit := 0; + CWpoint := pointer(dst); + PCardinal(dst)^ := 0; + inc(dst, SizeOf(CWpoint^)); + FillChar(offset, SizeOf(offset), 0); // fast 16KB reset to 0 + // 1. main loop to search using hash[] + if src <= src_endmatch then + repeat + v := PCardinal(src)^; + h := ((v shr 12) xor v) and 4095; + o := offset[h]; + offset[h] := src; + cached := v xor {%H-}cache[h]; // o=nil if cache[h] is uninitialized + cache[h] := v; + if (cached and $00ffffff = 0) and + (o <> nil) and + (src - o > 2) then + begin + CWpoint^ := CWpoint^ or (cardinal(1) shl CWbit); + inc(src, 2); + inc(o, 2); + t := 1; + tmax := src_end - src - 1; + if tmax >= (255 + 16) then + tmax := (255 + 16); + while (o[t] = src[t]) and + (t < tmax) do + inc(t); + inc(src, t); + h := h shl 4; + // here we have always t>0 + if t <= 15 then + begin + // mark 2 to 17 bytes -> size=1..15 + PWord(dst)^ := integer(t or h); + inc(dst, 2); + end + else + begin + // mark 18 to (255+16) bytes -> size=0, next byte=t + dec(t, 16); + PWord(dst)^ := h; // size=0 + dst[2] := Byte(t); + inc(dst, 3); + end; + end + else + begin + dst^ := src^; + inc(src); + inc(dst); + end; + if CWbit < 31 then + begin + inc(CWbit); + if src <= src_endmatch then + continue + else + break; + end + else + begin + CWpoint := pointer(dst); + PCardinal(dst)^ := 0; + inc(dst, SizeOf(CWpoint^)); + CWbit := 0; + if src <= src_endmatch then + continue + else + break; + end; + until false; + // 2. store remaining bytes + if src < src_end then + repeat dst^ := src^; inc(src); inc(dst); - if (src>=src_end) or (dst>=dst_end) then - break; - if last_hashed0 then - continue else - goto nextCW; - end else begin - h := PWord(src)^; - inc(src,2); - t := (h and 15)+2; - h := h shr 4; - if t=2 then begin - t := ord(src^)+(16+2); - inc(src); - end; - if dst+t>=dst_end then begin // avoid buffer overflow by all means - movechars(offset[h],dst,dst_end-dst); - break; + if CWbit < 31 then + begin + inc(CWbit); + if src < src_end then + continue + else + break; + end + else + begin + PCardinal(dst)^ := 0; + inc(dst, 4); + CWbit := 0; + if src < src_end then + continue + else + break; end; - {$ifdef CPU64} - o := offset[h]; - if (t<=8) or (PtrUInt(dst-o)=src_end then - break; - if last_hashed=dst; - inc(dst,t); - last_hashed := dst-1; - CWbit := CWbit shl 1; - if CWbit<>0 then - continue else - goto nextCW; - end; - until false; -end; - -function SynLZdecompress1partial(src: PAnsiChar; size: integer; dst: PAnsiChar; maxDst: integer): integer; -var offset: TOffsets; - src_end: PAnsiChar; -begin - src_end := src+size; - result := PWord(src)^; - if result=0 then exit; - inc(src,2); - if result and $8000<>0 then begin - result := (result and $7fff) or (integer(PWord(src)^) shl 15); - inc(src,2); - end; - if maxDst0 then - SynLZdecompress1partialsub(src, dst, src_end, dst+result, offset{%H-}); + until false; + result := dst - dst_beg; end; +{$ENDIF} end.