Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
5328 lines (4692 sloc) 201 KB
{
A collection of often needed functions missing in FPC
Copyright (C) 2008 - 2019 Benito van der Zander (BeniBela)
benito@benibela.de
www.benibela.de
This file is distributed under under the same license as Lazarus and the LCL itself:
This file is distributed under the Library GNU General Public License
with the following modification:
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent modules,
and to copy and distribute the resulting executable under terms of your choice,
provided that you also meet, for each linked independent module, the terms
and conditions of the license of that module. An independent module is a
module which is not derived from or based on this library. If you modify this
library, you may extend this exception to your version of the library, but
you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version.
}
(***
@abstract(This unit contains some basic functions missing in fpc)@br
It uses the following naming convention:@br
@br
All functions starting with @code(str) are related to strings and work on ansistring or pchar,
so you can use them for latin1 and utf-8.@br
The prefix @code(strl) means the string length is given, @code(str?i) means the function is case insensitive@br
@br@br
The prefix @code(array) means the function works with dynamical arrays.@br
If the suffix @code(Fast) is given, the length of the array is different of the count of contained elements i.e.
the standard length is actually a capacity so you can resize it without reallocating the array.@br
Some array functions have two optional slice parameters: if you give none of them the function will affect the whole
array; if you give one of them, the function will affect elements in the inclusive interval [0, slice] and if you give both,
it will affect elements in the inclusive interval [slice1, slice2].
@br@br
Encodings: @br
Most functions are encoding-agnostic and work on CP_ACP strings, they have @code(string) arguments and return @code(string).@br
It is recommended to use the LCL mode with CP_ACP = CP_UTF8, but it is not required. @br
Functions only working on utf-8 take @code(RawByteString) arguments and return @code(UTF8String). They do not take UTF8String arguments as that type behaves weirdly with utf-8 in other string types. @br
Functions that depend on the encoding and are encoding aware, like encoding conversions, take @code(RawByteString) arguments and return @code(RawByteString). @br
pchars are assumed to have CP_ACP encoding. @br
@author Benito van der Zander, (http://www.benibela.de)
*)
unit bbutils;
{$define allowyearzero} //there is no year zero in the BC/AD calendar. But there is in ISO 8601:2004. Although this unit uses the Julian calendar, so it is wrong before years 1582 (Gregorian calendar) anyways
{$DEFINE HASISNAN}
{$IFDEF FPC}
{$mode objfpc}{$H+}
{$ModeSwitch advancedrecords}
{$ModeSwitch typehelpers}
{$COPERATORS OFF}
{$DEFINE HASINLINE}
{$DEFINE HASDefaultFormatSettings}
{$DEFINE HASDeprecated}
{$ELSE} //DELPHI
{$IFDEF VER120}{$UNDEF HASISNAN}{$ENDIF}
{$IFDEF VER110}{$UNDEF HASISNAN}{$ENDIF}
{$IFDEF VER100}{$UNDEF HASISNAN}{$ENDIF}
{$ENDIF}
interface
uses
Classes, SysUtils,math
{$IFDEF windows}
, windows
{$ENDIF};
//-------------------------Array functions-----------------------------
type
{$IFDEF FPC}
{$ifdef FPC_HAS_CPSTRING}{$define HAS_CPSTRING}{$endif}
{$ifdef FPC_HAS_TYPE_Extended}float = extended;
{$else} {$ifdef FPC_HAS_TYPE_Double}float = double;
{$else} {$ifdef FPC_HAS_TYPE_Single}float = single;
{$endif}{$endif}{$endif}
{$else}
//delphi
float = extended;
TTime = TDateTime;
SizeInt = integer;
TValueSign = -1..1;
{$IFDEF CPU386}
PtrUInt = DWORD;
PtrInt = longint;
{$ELSE}{$IFDEF CPUX64}
PtrUInt = QWORD;
PtrInt = int64;
{$ENDIF}{$ENDIF}
{$IFNDEF UNICODE}
UnicodeString = WideString;
PUnicodeChar = ^WideChar;
{$else}
{$define HAS_CPSTRING}
{$ENDIF}
const
NaN = 0.0/0.0;
Infinity = 1.0/0.0;
NegInfinity = -1.0/0.0;
LineEnding = #13#10;
{$endif}
{$ifndef HAS_CPSTRING}
type RawByteString = AnsiString;
{$endif}
{$ifndef FPC_HAS_CPSTRING}
type TSystemCodePage = Word;
const
CP_ACP = 0;
CP_OEMCP = 1;
CP_UTF16 = 1200;
CP_UTF16BE = 1201;
CP_UTF8 = 65001;
CP_NONE = $FFFF;
CP_ASCII = 20127;
{$endif}
const CP_UTF32 = 12000;
CP_UTF32BE = 12001;
CP_WINDOWS1252 = 1252; //a super set of ISO 8859-1. ISO-8859-1 is a different superset of ISO 8859-1 (beware the dash). But most people mean this when they say iso-8859-1
CP_LATIN1 = 28591; //this is the actual ISO-8859-1.
type
TStringArray=array of string;
TLongintArray =array of longint;
TSizeintArray =array of SizeInt;
TLongwordArray =array of longword;
TInt64Array =array of int64;
TFloatArray = array of float;
TCharSet = set of ansichar;
{$ifdef FPC_HAS_CPSTRING}
TBBStringHelper = Type Helper(TStringHelper) for AnsiString
function EncodeHex: String; inline;
function DecodeHex: String; inline;
function RemoveFromLeft(chopoff: SizeInt): String;
{
function AfterOrEmpty(const sep: String): String; inline;
function AfterLastOrEmpty(const sep: String): String; inline;
function BeforeOrEmpty(const sep: String): String; inline;
function BeforeLastOrEmpty(const sep: String): String; inline;
}
end;
{$endif}
//-----------------------Pointer functions------------------------
type TProcedureOfObject=procedure () of object;
TStreamLikeWrite = procedure(const Buffer; Count: Longint) of object;
function procedureToMethod(proc: TProcedure): TMethod;
function makeMethod(code, data: pointer): TMethod; {$IFDEF HASINLINE} inline; {$ENDIF}
function PtrToUInt(p: pointer): UIntPtr; inline;
function UIntToPtr(i: UIntPtr): pointer; inline;
function ObjToUInt(p: TObject): UIntPtr; inline;
function UIntToObj(i: UIntPtr): TObject; inline;
//**Calls proc in an new thread
procedure threadedCall(proc: TProcedureOfObject; isfinished: TNotifyEvent); overload;
//**Calls proc in an new thread
procedure threadedCall(proc: TProcedureOfObject; isfinished: TProcedureOfObject);overload;
//**Calls proc in an new thread
procedure threadedCall(proc: TProcedure; isfinished: TProcedureOfObject);overload;
//------------------------------Charfunctions--------------------------
//Converts 0..9A..Za..z to a corresponding integer digit
function charDecodeDigit(c: char): integer; {$IFDEF HASINLINE} inline; {$ENDIF}
//Converts 0..9A..Fa..f to a corresponding integer digit
function charDecodeHexDigit(c: char): integer; {$IFDEF HASINLINE} inline; {$ENDIF}
function charEncodeHexDigitUp(digit: integer): char;
//------------------------------Stringfunctions--------------------------
//All of them start with 'str' or 'widestr' so can find them easily
//Naming scheme str <l> <i> <name>
//L: use length (ignoring #0 characters, so the string must be at least length characters long)
//I: case insensitive
//copy
//**Copies min(sourceLen, destLen) characters from source to dest and returns dest
function strlmove(dest,source:pansichar;destLen,sourceLen: SizeInt):pansichar;
//**Copies min(sourceLen, destLen) characters from source to dest and returns dest
function widestrlmove(dest,source:pwidechar;destLen,sourceLen: SizeInt):pwidechar;
//**Returns the substring of s containing all characters after start (including s[start]
function strCopyFrom(const s: string; start:SizeInt): string; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Returns a string with all characters between first and last (including first, last)
function strSlice(const first,last:pansichar):string; overload;
//**Returns a string with all characters between start and last (including start, last)
function strSlice(const s: string; start,last:SizeInt): string; overload;
//**Like move: moves count strings from source memory to dest memory. Keeps the reference count intact. Size is count of strings * sizeof(string)!
procedure strMoveRef(var source: string; var dest: string; const size: SizeInt); {$IFDEF HASINLINE} inline; {$ENDIF}
//comparison
//all pansichar<->pansichar comparisons are null-terminated (except strls.. functions with length-strict)
//all pansichar<->string comparisons are null-terminated iff the string doesn't contain #0 characters
//length limited
function strlEqual(const p1,p2:pansichar;const l: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the strings are case-sensitive equal (same length and same characters) (null-terminated, stops comparison when meeting #0 )
function strlEqual(const p1,p2:pansichar;const l1,l2: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the strings are case-sensitive equal (same length and same characters) (null-terminated, stops comparison when meeting #0 )
function strliEqual(const p1,p2:pansichar;const l: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the strings are case-insensitive equal (same length and same characters) (null-terminated, stops comparison when meeting #0 )
function strliEqual(const p1,p2:pansichar;const l1,l2: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the strings are case-insensitive equal (same length and same characters) (null-terminated, stops comparison when meeting #0 )
function strlsEqual(const p1,p2:pansichar;const l: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the strings are case-sensitive equal (same length and same characters) (strict-length, can continue comparison after #0)
function strlsEqual(const p1,p2:pansichar;const l1,l2: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the strings are case-sensitive equal (same length and same characters) (strict-length, can continue comparison after #0)
function strlsiEqual(const p1,p2:pansichar;const l: SizeInt):boolean; overload; //**< Tests if the strings are case-insensitive equal (same length and same characters) (strict-length, can continue comparison after #0)
function strlsiEqual(const p1,p2:pansichar;const l1,l2: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the strings are case-insensitive equal (same length and same characters) (strict-length, can continue comparison after #0)
function strlsequal(p: pansichar; const s: string; l: SizeInt): boolean; overload;
//equal comparison, case insensitive, stopping at #0-bytes in p1, ignoring #0-bytes in l2
function strlnsiequal(p1,p2:pansichar;l2: SizeInt):boolean;
//equal comparison, case sensitive, stopping at #0-bytes in p1, ignoring #0-bytes in l2
function strlnsequal(p1,p2:pansichar;l2: SizeInt):boolean;
function strlEqual(p:pansichar;const s:string; l: SizeInt):boolean; overload; //**< Tests if the strings are case-sensitive equal (same length and same characters)
function strliEqual(p:pansichar;const s:string;l: SizeInt):boolean; overload; //**< Tests if the strings are case-insensitive equal (same length and same characters)
function strlBeginsWith(const p:pansichar; l:SizeInt; const expectedStart:string):boolean; //**< Test if p begins with expectedStart (__STRICT_HELP__, case-sensitive)
function strliBeginsWith(const p:pansichar;l: SizeInt;const expectedStart:string):boolean; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Test if p begins with expectedStart (__STRICT_HELP__, case-insensitive)
//not length limited
function strEqual(const s1,s2:RawByteString):boolean; //**< Tests if the strings are case-insensitive equal (same length and same characters)
function striEqual(const s1,s2:RawByteString):boolean; {$IFDEF HASINLINE} inline; {$ENDIF}//**< Tests if the strings are case-insensitive equal (same length and same characters)
function strBeginsWith(const strToBeExaminated,expectedStart:string):boolean; overload; //**< Tests if the @code(strToBeExaminated) starts with @code(expectedStart)
function striBeginsWith(const strToBeExaminated,expectedStart:string):boolean; overload; //**< Tests if the @code(strToBeExaminated) starts with @code(expectedStart)
function strBeginsWith(const p:pansichar; const expectedStart:string):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the @code(p) starts with @code(expectedStart) (p is null-terminated)
function striBeginsWith(const p:pansichar; const expectedStart:string):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF} //**< Tests if the @code(p) starts with @code(expectedStart) (p is null-terminated)
function strEndsWith(const strToBeExaminated,expectedEnd:string):boolean; //**< Tests if the @code(strToBeExaminated) ends with @code(expectedEnd)
function striEndsWith(const strToBeExaminated,expectedEnd:string):boolean; //**< Tests if the @code(strToBeExaminated) ends with @code(expectedEnd)
//**Case sensitive, clever comparison, that basically splits the string into
//**lexicographical and numerical parts and compares them accordingly
function strCompareClever(const s1, s2: string): longint;
//**Case insensitive, clever comparison, that basically splits the string into
//**lexicographical and numerical parts and compares them accordingly
function striCompareClever(const s1, s2: string): longint; {$IFDEF HASINLINE} inline; {$ENDIF}
//search
//**Searchs the last index of c in s
function strRpos(c:ansichar;const s:string):SizeInt;
//**Counts all occurrences of searched in searchIn (case sensitive)
function strCount(const str: string; const searched: ansichar; from: SizeInt = 1): SizeInt; overload;
//**Counts all occurrences of searched in searchIn (case sensitive)
function strCount(const str: string; const searched: TCharSet; from: SizeInt = 1): SizeInt; overload;
//**Searchs @code(searched) in @code(str) case-sensitive (Attention: opposite parameter to pos) (strict length, this function can find #0-bytes)
function strlsIndexOf(str,searched:pansichar; l1, l2: SizeInt): SizeInt; overload;
//**Searchs @code(searched) in @code(str) case-sensitive (Attention: opposite parameter to pos) (strict length, this function can find #0-bytes)
function strlsIndexOf(str:pansichar; const searched: TCharSet; length: SizeInt): SizeInt; overload;
//**Searchs @code(searched) in @code(str) case-insensitive (Attention: opposite parameter to pos) (strict length, this function can find #0-bytes)
function strlsiIndexOf(str,searched:pansichar; l1, l2: SizeInt): SizeInt;
//**Searchs @code(searched) in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strIndexOf(const str,searched:string):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs @code(searched) in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strIndexOf(const str: string; const searched: TCharSet):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs @code(searched) in @code(str) case-insensitive (Attention: opposite parameter to pos)
function striIndexOf(const str,searched:string):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs @code(searched) in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strIndexOf(const str,searched:string; from: SizeInt):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs @code(searched) in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strIndexOf(const str: string; const searched: TCharSet; from: SizeInt):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs @code(searched) in @code(str) case-insensitive (Attention: opposite parameter to pos)
function striIndexOf(const str,searched:string; from: SizeInt):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs @code(searched) in @code(str), case-sensitive, returns -1 on no occurrence (Attention: opposite parameter to pos) (strict length, this function can find #0-bytes)
function strlsLastIndexOf(str,searched:pansichar; l1, l2: SizeInt): SizeInt; overload;
//**Searchs @code(searched) in @code(str), case-sensitive, returns -1 on no occurrence (Attention: opposite parameter to pos) (strict length, this function can find #0-bytes)
function strlsLastIndexOf(str:pansichar; const searched: TCharSet; length: SizeInt): SizeInt; overload;
//**Searchs @code(searched) in @code(str), case-insensitive, returns -1 on no occurrence (Attention: opposite parameter to pos) (strict length, this function can find #0-bytes)
function strlsiLastIndexOf(str,searched:pansichar; l1, l2: SizeInt): SizeInt;
//**Searchs the last occurrence of @code(searched) in @code(str), case-sensitive, returns 0 on no occurrence (Attention: opposite parameter to pos)
function strLastIndexOf(const str: string; const searched: string):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs the last occurrence of @code(searched) in @code(str), case-sensitive, returns 0 on no occurrence (Attention: opposite parameter to pos)
function strLastIndexOf(const str: string; const searched: string; from: SizeInt):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs the last occurrence of @code(searched) in @code(str), case-sensitive, returns 0 on no occurrence (Attention: opposite parameter to pos)
function strLastIndexOf(const str: string; const searched: TCharSet):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs the last occurrence of @code(searched) in @code(str), case-sensitive, returns 0 on no occurrence (Attention: opposite parameter to pos)
function strLastIndexOf(const str: string; const searched: TCharSet; from: SizeInt):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs the last occurrence of @code(searched) in @code(str), case-insensitive, returns 0 on no occurrence (Attention: opposite parameter to pos)
function striLastIndexOf(const str: string; const searched: string):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Searchs the last occurrence of @code(searched) in @code(str), case-insensitive, returns 0 on no occurrence (Attention: opposite parameter to pos)
function striLastIndexOf(const str: string; const searched: string; from: SizeInt):SizeInt; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Tests if @code(searched) exists in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strContains(const str,searched:string):boolean;overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Tests if @code(searched) exists in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strContains(const str:string; const searched: TCharSet):boolean;overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Tests if @code(searched) exists in @code(str) case-insensitive (Attention: opposite parameter to pos)
function striContains(const str,searched:string):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Tests if @code(searched) exists in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strContains(const str,searched:string; from: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Tests if @code(searched) exists in @code(str) case-sensitive (Attention: opposite parameter to pos)
function strContains(const str:string; const searched: TCharSet; from: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Tests if @code(searched) exists in @code(str) case-insensitive (Attention: opposite parameter to pos)
function striContains(const str,searched:string; from: SizeInt):boolean; overload; {$IFDEF HASINLINE} inline; {$ENDIF}
//more specialized
//**Removes all occurrences of trimCharacter from the left/right side of the string@br
//**It will move the pointer and change length, not modifying the memory pointed to
procedure strlTrimLeft(var p: pansichar; var l: SizeInt; const trimCharacters: TCharSet = [#0..' ']);
//**Removes all occurrences of trimCharacter from the left/right side of the string@br
//**It will move the pointer and change length, not modifying the memory pointed to
procedure strlTrimRight(var p: pansichar; var l: SizeInt; const trimCharacters: TCharSet = [#0..' ']);
//**Removes all occurrences of trimCharacter from the left/right side of the string@br
//**It will move the pointer and change length, not modifying the memory pointed to
procedure strlTrim(var p: pansichar; var l: SizeInt; const trimCharacters: TCharSet = [#0..' ']);
//for internal use
type TStrTrimProcedure = procedure (var p: pansichar; var l: SizeInt; const trimCharacters: TCharSet);
function strTrimCommon(const s: string; const trimCharacters: TCharSet; const trimProc: TStrTrimProcedure): string;
//**Removes all occurrences of trimCharacter from the left/right side of the string
function strTrimLeft(const s:string; const trimCharacters: TCharSet = [#0..' ']):string; {$IFDEF HASINLINE} inline; {$ENDIF}
function strTrimRight(const s:string; const trimCharacters: TCharSet = [#0..' ']):string; {$IFDEF HASINLINE} inline; {$ENDIF}
function strTrim(const s: string; const trimCharacters: TCharSet = [#0..' ']):string; {$IFDEF HASINLINE} inline; {$ENDIF}
function strTrimAndNormalize(const s: string; const trimCharacters: TCharSet = [#0..' ']):string;
//**<Replaces all #13#10 or #13 by #10
function strNormalizeLineEndings(const s: string): string;
//**<Replaces all #$D#$A, #$D #$85, #$85, #$2028, or #13 by #10. Experimental, behaviour might change in future
function strNormalizeLineEndingsUTF8(const s: RawByteString): UTF8String;
//**< Prepends expectedStart, if s does not starts with expectedStart
function strPrependIfMissing(const s: string; const expectedStart: string): string;
//**< Appends expectedEnd, if s does not end with expectedEnd
function strAppendIfMissing(const s: string; const expectedEnd: string): string;
//**Splits the string remainingPart into two parts at the first position of separator, the
//**first part is returned as function result, the second one is again assign to remainingPart
//**(If remainingPart does not contain separator, it returns remainingPart and sets remainingPart := '')
function strSplitGet(const separator: string; var remainingPart: string):string;overload;
//**Splits the string remainingPart into two parts at the first position of separator, the
//**first is assign to firstPart, the second one is again assign to remainingPart
procedure strSplit(out firstPart: string; const separator: string; var remainingPart: string);overload;
//**Splits the string s into the array splitted at every occurrence of sep
procedure strSplit(out splitted: TStringArray;s: string; sep:string=',';includeEmpty:boolean=true);overload;
//**Splits the string s into the array splitted at every occurrence of sep
function strSplit(s:string;sep:string=',';includeEmpty:boolean=true):TStringArray;overload;
function strWrapSplit(const Line: string; MaxCol: SizeInt = 80; const BreakChars: TCharSet = [' ', #9]): TStringArray;
function strWrap(Line: string; MaxCol: Integer = 80; const BreakChars: TCharSet = [' ', #9]): string;
function strReverse(s: string): string; //**< reverses a string. Assumes the encoding is utf-8
//Given a string like openBracket .. openBracket ... closingBracket closingBracket closingBracket closingBracket , this will return everything between
//the string start and the second last closingBracket (it assumes one bracket is already opened, so 3 open vs. 4 closing => second last).
//If updateText, it will replace text with everything after that closingBracket. (always excluding the bracket itself)
function strSplitGetUntilBracketClosing(var text: string; const openBracket, closingBracket: string; updateText: boolean): string;
function strSplitGetBetweenBrackets(var text: string; const openBracket, closingBracket: string; updateText: boolean): string;
//** If the string s has the form 'STARTsep...' it returns 'START'. E.g. for /foo/bar it returns /foo with AllowDirectorySeparators do
function strBeforeLast(const s: string; const sep: TCharSet): string; overload;
//** If the string s has the form '...sepEND' it returns 'END'. E.g. for /foo/bar it returns bar with AllowDirectorySeparators
function strAfterLast(const s: string; const sep: TCharSet): string; overload;
//**Joins all string list items to a single string separated by @code(sep).@br
//**If @code(limit) is set, the string is limited to @code(abs(limit)) items.
//**if limit is positive, limitStr is appended; if limitStr is negative, limitStr is inserted in the middle
function strJoin(const sl: TStrings; const sep: string = ', '; limit: Integer=0; const limitStr: string='...'): string;overload;
//**Joins all string list items to a single string separated by @code(sep).@br
function strJoin(const sl: TStringArray; const sep: string = ', '; limit: SizeInt=0; const limitStr: string='...'): string;overload;//{$ifdef HASINLINE} inline; {$endif}
function strJoin(strings: PString; stringsLength: SizeInt; const sep: string = ', '): string;overload;
//**Converts a str to a bool (for fpc versions previous 2.2)
function StrToBoolDef(const S: string;const Def:Boolean): Boolean;
//**Removes a file:// prefix from filename if it is there
function strRemoveFileURLPrefix(const filename: string): string;
//**loads a file as string. The filename is directly passed to the fpc rtl and uses the system
//**encoding @seealso(strLoadFromFileUTF8)
function strLoadFromFile(filename:string):string;
//**saves a string as file. The filename is directly passed to the fpc rtl and uses the system
//**encoding @seealso(strSaveToFileUTF8)
procedure strSaveToFile(filename: string;str:string);
//**loads a file as string. The filename should be encoded in utf-8
//**@seealso(strLoadFromFile)
function strLoadFromFileUTF8(filename:RawByteString): string;
//**saves a string as file. The filename should be encoded in utf-8
//**@seealso(strSaveToFile)
procedure strSaveToFileUTF8(filename: RawByteString; str: String);
//**converts a size (measured in bytes) to a string (e.g. 1025 -> 1 KiB)
function strFromSIze(size: int64):string;
//encoding things
//Conversions between UTF-8 and 1-Byte encodings are in strConvert
//Conversions between UTF-16 and UTF-8/32/1-Byte-encodings in the moveProcs
//**length of an utf8 string @br
function strLengthUtf8(const str: RawByteString; out invalid: SizeInt): SizeInt;
function strLengthUtf8(const str: RawByteString): SizeInt;
function strConvertToUtf8(str: RawByteString; from: TSystemCodePage): UTF8String; //**< Returns a utf-8 RawByteString from the string in encoding @code(from)
function strConvertFromUtf8(str: RawByteString; toe: TSystemCodePage): RawByteString; //**< Converts a utf-8 string to the encoding @code(from)
//** Converts a string from one encoding to another. @br
//** It primarily converts between latin-1 and utf-8 without needing a widestring manager.
//** It performs the conversion directly without converting to UTF-16, which should be much faster than fpc's default conversions. But there are no low-level optimizations @br
//** For other encodings it falls back to the moveprocs (which allows to store utf-16/32 in RawByteString) and SetCodePage
function strConvert(const str: RawByteString; from, toCP: TSystemCodePage): RawByteString;
function strChangeEncoding(const str: RawByteString; from, toe: TSystemCodePage): RawByteString; {$ifdef HASINLINE} inline; deprecated 'Use strConvert';{$endif}
function strDecodeUTF16Character(var source: PUnicodeChar): integer;
procedure strUnicode2AnsiMoveProc(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt); //**<converts utf16 to other unicode pages and latin1. The signature matches the function of fpc's widestringmanager, so this function replaces cwstring. len is in chars.
procedure strAnsi2UnicodeMoveProc(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt); //**<converts unicode pages and latin1 to utf16. The signature matches the function of fpc's widestringmanager, so this function replaces cwstring. len is in bytes
{$IFDEF fpc}
procedure registerFallbackUnicodeConversion; {$ifndef HAS_CPSTRING} deprecated 'Codepage aware extension requires fpc >=3';{$endif}
function strEncodingFromName(str:string):TSystemCodePage; //**< Gets the encoding from an encoding name (e.g. from http-equiv)
//this can return CP_ACP (perhaps i will change that)
function strActualEncoding(const str: RawByteString): TSystemCodePage; {$ifdef HASINLINE} inline; {$endif}
function strActualEncoding(e: TSystemCodePage): TSystemCodePage; {$ifdef HASINLINE} inline; {$endif}
{$ENDIF}
{$ifndef HAS_CPSTRING}
function StringCodePage(const str: RawByteString): TSystemCodePage;
procedure SetCodePage(var s: RawByteString; CodePage: TSystemCodePage; Convert: Boolean=True); //**< no-op function, so not every SetCodePage has to be wrapped in ifdefs
{$endif}
function strGetUnicodeCharacter(const character: integer; encoding: TSystemCodePage = CP_UTF8): RawByteString; //**< Get unicode character @code(character) in a certain encoding
function strGetUnicodeCharacterUTFLength(const character: integer): integer;
procedure strGetUnicodeCharacterUTF(const character: integer; buffer: pansichar);
function strDecodeUTF8Character(const str: RawByteString; var curpos: SizeInt): integer; overload; deprecated 'Use (pchar,pchar) overload or strIterator.'; //**< Returns the unicode code point of the utf-8 character starting at @code(str[curpos]) and increments @code(curpos) to the next utf-8 character. Returns a negative value if the character is invalid.
function strDecodeUTF8Character(var source: PChar; var remainingLength: SizeInt): integer; overload; deprecated 'Use (pchar,pchar) overload.'; //**< Returns the unicode code point of the utf-8 character starting at @code(source) and decrements @code(remainingLength) to the next utf-8 character. Returns a negative value if the character is invalid.
function strDecodeUTF8Character(var currentpos: pchar; afterlast: PChar): integer; overload; //**< Returns the unicode code point of the utf-8 character starting at @code(source). Returns a negative value if the character is invalid.
function strEncodingFromBOMRemove(var str:RawByteString):TSystemCodePage; //**< Gets the encoding from an unicode bom and removes it
{$ifdef HAS_CPSTRING}function strEncodingFromBOMRemove(var str:string):TSystemCodePage; inline;{$endif}
//** This function converts codePoint to the corresponding uppercase codepoint according to the unconditional cases of SpecialCasing.txt of Unicode 8. @br
//** It cannot be used to convert a character to uppercase, as SpecialCasing.txt is not a map from normal characters to their uppercase variants.
//** It is a collection of special characters that do not have an ordinary uppercase variant and are converted to something else. (e.g. ß -> SS) @br
//** The function signature is preliminary and likely to change.
function strUpperCaseSpecialUTF8(codePoint: integer): string;
//** This function converts codePoint to the corresponding lowercase codepoint according to the unconditional cases of SpecialCasing.txt of Unicode 8. @br
//** It cannot be used to convert a character to lowercase, as SpecialCasing.txt is not a map from normal characters to their lowercase variants.
//** It is a collection of special characters that do not have an ordinary lowercase variant and are converted to something else. @br
//** The function signature is preliminary and likely to change.
function strLowerCaseSpecialUTF8(codePoint: integer): string;
type TDecodeHTMLEntitiesFlags = set of (dhefStrict, dhefAttribute);
EDecodeHTMLEntitiesException = class(Exception);
//**This decodes all html entities to the given encoding. If strict is not set
//**it will ignore wrong entities (so e.g. X&Y will remain X&Y and you can call the function
//**even if it contains rogue &).
function strDecodeHTMLEntities(p:pansichar;l:SizeInt;encoding:TSystemCodePage; flags: TDecodeHTMLEntitiesFlags = []):RawByteString; overload;
//**This decodes all html entities to the given encoding. If strict is not set
//**it will ignore wrong entities (so e.g. X&Y will remain X&Y and you can call the function
//**even if it contains rogue &).
function strDecodeHTMLEntities(s:string;encoding:TSystemCodePage; flags: TDecodeHTMLEntitiesFlags = []):RawByteString; overload;
//**Replace all occurences of x \in toEscape with escapeChar + x
function strEscape(s:string; const toEscape: TCharSet; escapeChar: ansichar = '\'): string;
//**Replace all occurences of x \in toEscape with escape + hex(ord(x))
function strEscapeToHex(s:string; const toEscape: TCharSet; escape: string = '\x'): string;
//**Replace all occurences of escape + XX with chr(XX)
function strUnescapeHex(s:string; escape: string = '\x'): string;
//**Returns a regex matching s
function strEscapeRegex(const s:string): string;
//**Decodes a binary hex string like 202020 where every pair of hex digits corresponds to one char (deprecated, use strUnescapeHex)
function strDecodeHex(s:string):string; {$ifdef HASDeprecated}deprecated;{$endif}
//**Encodes to a binary hex string like 202020 where every pair of hex digits corresponds to one char (deprecated, use strEscapeToHex)
function strEncodeHex(s:string; const code: string = '0123456789ABCDEF'):string;{$ifdef HASDeprecated}deprecated;{$endif}
//**Returns the first l bytes of p (copies them so O(n))
function strFromPchar(p:pansichar;l:SizeInt):string;
//function strFromPchar(p:pansichar;l:SizeInt; encoding: TSystemCodePage):RawByteString;
//**Creates a string to display the value of a pointer (e.g. 0xDEADBEEF)
function strFromPtr(p: pointer): string;
//**Creates a string to display an integer. The result will have at least displayLength digits (digits, not characters, so -1 with length 2, will become -02).
function strFromInt(i: int64; displayLength: longint): string;
//**Creates count copies of rep
function strDup(rep: string; const count: SizeInt): string;
//**Checks if s is an absolute uri (i.e. has a [a-zA-Z][a-zA-Z0-9+-.]:// prefix)
function strIsAbsoluteURI(const s: string): boolean;
//**Returns a absolute uri for a uri relative to the uri base.@br
//**E.g. strResolveURI('foo/bar', 'http://example.org/abc/def') returns 'http://example.org/abc/foo/bar'@br
//**Or. strResolveURI('foo/bar', 'http://example.org/abc/def/') returns 'http://example.org/abc/def/foo/bar'@br
//**base may be relative itself (e.g. strResolveURI('foo/bar', 'test/') becomes 'test/foo/bar')
function strResolveURI(rel, base: string): string;
//**Expands a path to an absolute path, if it not already is one
function fileNameExpand(const rel: string): string;
//**Expands a path to an absolute path starting with file://
function fileNameExpandToURI(const rel: string): string;
//**Moves oldname to newname, replacing newname if it exists
function fileMoveReplace(const oldname,newname: string): boolean;
type TFileSaveSafe = procedure (stream: TStream; data: pointer);
procedure fileSaveSafe(filename: string; callback: TFileSaveSafe; data: pointer);
//**Levenshtein distance between s and t
//**(i.e. the minimal count of characters to change/add/remove to convert s to t). O(n**2) time, O(n) space
function strSimilarity(const s, t: string): SizeInt;
{$ifdef fpc}
//** Str iterator. Preliminary. Interface might change at any time
type TStrIterator = record
FCurrent: integer;
s: RawByteString;
pos: SizeInt;
property Current: integer read FCurrent;
function MoveNext: Boolean;
function GetEnumerator: TStrIterator;
end;
//** Str iterator. Preliminary. Interface might change at any time
function strIterator(const s: RawByteString): TStrIterator;
//** Str builder. Preliminary. Interface might change at any time
type TStrBuilder = object
protected
next, bufferend: pchar; //next empty pchar and first pos after the string
encoding: TSystemCodePage;
procedure appendWithEncodingConversion(const s: RawByteString);
procedure appendCodePointToUtf8String(const codepoint: integer); inline;
procedure appendCodePointWithEncodingConversion(const codepoint: integer);
procedure appendRaw(const s: RawByteString); inline;
public
buffer: pstring;
procedure init(abuffer:pstring; basecapacity: SizeInt = 64; aencoding: TSystemCodePage = {$ifdef HAS_CPSTRING}CP_ACP{$else}CP_UTF8{$endif});
procedure clear;
procedure final;
function count: SizeInt; inline;
function isEmpty: boolean; inline;
procedure reserveadd(delta: SizeInt);
procedure append(c: char); inline;
procedure append(const s: RawByteString);
procedure appendCodePoint(const codepoint: integer);
procedure append(const p: pchar; const l: SizeInt);
procedure appendBuffer(const block; l: LongInt);
procedure appendHexNumber(number: integer);
procedure appendHexNumber(number, digits: integer);
procedure appendNumber(number: Int64);
procedure chop(removedCount: SizeInt);
end;
type
generic TBaseArrayList<TElement> = object
type PElement = ^TElement;
TArrayBuffer = array of TElement;
private
function getCapacity: SizeInt; inline;
protected
FBuffer: TArrayBuffer;
FCount: SizeInt;
procedure setCount(NewCount: SizeInt);
procedure checkIndex(AIndex : SizeInt); inline;
public
procedure init;
procedure addAll(other: TBaseArrayList);
procedure addAll(const other: array of TElement);
procedure add(const item: TElement);
procedure clear;
procedure delete(Index: SizeInt);
procedure exchange(Index1, Index2: SizeInt);
procedure expand;
function toSharedArray: TArrayBuffer;
property capacity: SizeInt read getCapacity write setCount;
property count: SizeInt read fcount write setCount;
end;
generic TCopyingArrayList<TElement> = object(specialize TBaseArrayList<TElement>)
type TEnumerator = record
private
function GetCurrent: TElement;
public
fcurrent, fend: PElement;
function MoveNext: Boolean;
property current: TElement read GetCurrent;
end;
protected
function get(Index: SizeInt): TElement; inline;
procedure put(Index: SizeInt; const Item: TElement); inline;
function first: TElement;
function last: TElement;
public
function GetEnumerator: TEnumerator;
property Items[Index: SizeInt]: TElement read get write put; default;
end;
generic TCopyingPtrArrayList<TElement> = object(specialize TCopyingArrayList<TElement>)
end;
TPointerArrayList = specialize TCopyingArrayList<pointer>;
TObjectArrayList = specialize TCopyingArrayList<TObject>;
TSizeIntArrayList = specialize TCopyingArrayList<SizeInt>;
TStringArrayList = specialize TCopyingArrayList<String>;
generic TRecordArrayList<TElement> = object(specialize TBaseArrayList<TElement>)
protected
function get(Index: SizeInt): PElement; inline;
procedure put(Index: SizeInt; Item: PElement); inline;
function first: PElement;
function last: PElement;
public
property Items[Index: SizeInt]: PElement read get write put; default;
end;
{$endif}
//----------------Mathematical functions-------------------------------
{$IFNDEF FPC}
function SwapEndian(const w: Word): Word; overload;
function SwapEndian(const w: DWord): DWord; overload;
{$ENDIF}
const powersOf10: array[0..9] of longint = (1,10,100,1000,10000,100000,1000000,10000000,100000000,1000000000);
//**log 10 rounded down (= number of digits in base 10 - 1)
function intLog10(i:longint):longint; overload;
//**log_b n rounded down (= number of digits of n in base b - 1)
function intLog(n,b: longint): longint; overload;
//**Given a number n, this procedure calculates the maximal integer e, so that n = p^e * r
procedure intFactor(const n,p: longint; out e, r:longint);
function gcd(a,b: integer): integer; overload; //**< Calculates the greatest common denominator
function gcd(a,b: cardinal): cardinal; overload; //**< Calculates the greatest common denominator
function gcd(a,b: int64): int64; overload; //**< Calculates the greatest common denominator
function lcm(a,b: int64): int64; //**< Calculates the least common multiple (just a*b div gcd(a,b), so it can easily overflow)
function coprime(a,b:cardinal): boolean; //**< Checks if two numbers are coprime
function factorial(i:longint):float; //**< Calculates i!
function binomial(n,k: longint): float;//**< Calculates n|k = n!/k!(n-k)!
//probability
//**expectated value of a binomial distribution
function binomialExpectation(n:longint;p:float):float;
//**variance of a binomial distribution
function binomialVariance(n:longint;p:float):float;
//**deviation(=sqrt(variance)) of a binomial distribution
function binomialDeviation(n:longint;p:float):float;
//**probability: P(X = k) where X is binomial distributed with n possible values (exact value calculated
//**with binomial coefficients, @seealso(binomialProbabilityApprox))
function binomialProbability(n:longint;p:float;k:longint):float; //P(X = k)
//**probability: P(X >= k) where X is binomial distributed with n possible values
function binomialProbabilityGE(n:longint;p:float;k:longint):float; //P(X >= k)
//**probability: P(X <= k) where X is binomial distributed with n possible values
function binomialProbabilityLE(n:longint;p:float;k:longint):float; //P(X <= k)
//**probability: P(X >= mu + d or X <= mu - d) where X is binomial distributed with n possible values
function binomialProbabilityDeviationOf(n:longint;p:float;dif:float):float; //P(X >= � + d or X <= � - d)
//**expectated value of a binomial distribution (approximates the value with either Poisson or
//**Moivre and Laplace, depending on the variance of the distribution) @seealso(binomialProbability))
function binomialProbabilityApprox(n:longint;p:float;k:longint):float;
//**Z-Score of the value k in a distribution with n outcomes
function binomialZScore(n:longint;p:float;k:longint):float;
//**This calculates the euler phi function totient[i] := phi(i) = |{1 <= j <= i | gcd(i,j) = 0}| for all i <= n.@br
//**It uses a sieve approach and is quite fast (10^7 in 3s)@br
//**You can also use it to calculate all primes (i is prime iff phi(i) = i - 1)
procedure intSieveEulerPhi(const n: cardinal; var totient: TLongwordArray);
//**This calculates the number of divisors: divcount[i] := |{1 <= j <= i | i mod j = 0}| for all i <= n.@br
//**Speed: 10^7 in 5s@br
procedure intSieveDivisorCount(n: integer; var divcount: TLongintArray);
//--------------------Time functions-----------------------------------
{$IFDEF windows}
function dateTimeToFileTime(const date: TDateTime): TFileTime;
function fileTimeToDateTime(const fileTime: TFileTime;convertTolocalTimeZone: boolean=true): TDateTime;
{$ENDIF}
//**cumulative sum of month days (so. days in month i = dmdcs[i] - dmdcs[i-1])
const DateMonthDaysCumSum: array[false..true,0..12] of integer =
((00, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365),
(00, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366));
//**Week of year
function dateWeekOfYear(const date:TDateTime):word; overload;
function dateWeekOfYear(year, month, day: integer):word; overload;
//**@returns if year is a leap year (supports negative years, i think)
function dateIsLeapYear(const year: integer): boolean; {$IFDEF HASINLINE} inline; {$ENDIF}
//**Encodes a date time
function dateTimeEncode(const y,m,d,h,n,s:integer; nanoseconds: integer = 0): TDateTime;
type EDateTimeParsingException = class(Exception);
type TDateTimeParsingFlag = (dtpfStrict);
TDateTimeParsingFlags = set of TDateTimeParsingFlag;
TDateTimeParsingResult = (dtprSuccess, dtprFailureValueTooHigh, dtprFailureValueTooHigh2, dtprFailure);
//**Reads a date time string given a certain mask (mask is case-sensitive)@br
//**The uses the same mask types as FormatDate:@br
//**s or ss for a second @br
//**n or nn for a minute @br
//**h or hh for a hour @br
//**d or dd for a numerical day @br
//**m or mm for a numerical month, mmm for a short month name, mmmm for a long month name@br
//**am/pm or a/p match am/pm or a/p
//**yy, yyyy or [yy]yy for the year. (if the year is < 90, it will become 20yy, else if it is < 100, it will become 19yy, unless you use uppercase Y instead of y) @br
//**YY, YYYY or [YY]YY for the year @br
//**z, zz, zzz, zzzz for microseconds (e.g. use [.zzzzzz] for optional ms with exactly 6 digit precision, use [.z[z[z[z[z[z]]]]]] for optional µs with up to 6 digit precision)
//**Z for the ISO time zone (written as regular expressions, it matches 'Z | [+-]hh(:?mm)?'. Z is the only format ansichar (except mmm) matching several characters)
//**The letter formats d/y/h/n/s matches one or two digits, the dd/mm/yy formats require exactly two.@br
//**yyyy requires exactly 4 digits, and [yy]yy works with 2 or 4 (there is also [y]yyy for 3 to 4). The year always matches an optional - (e.g. yyyy also matches -0012, but not -012)@br
//**Generally [x] marks the part x as optional (it tries all possible combinations, so you shouldn't have more than 10 optional parts)@br
//**x+ will match any additional amount of x. (e.g. yy -> 2 digit year, yy+ -> at least 2 digit year, yyyy -> 4 digit year, [yy]yy -> 2 or 4 digit year) (mmm+ for short or long dates)@br
//**"something" can be used to match the input verbatim@br
//**whitespace is matched against whitespace (i.e. [ #9#10#13]+ matches [ #9#10#13]+)
//**The function works if the string is latin-1 or utf-8, and it also supports German month names@br
//**If a part is not found, it returns high(integer) there@br@br
//**There are old and new functions, because the signature has changed from double to int. Do not use the OLD functions unless you are porting existing code.@br@br
//**@return(If input could be matched with mask. It does not check, if the returned values are valid (e.g. month = 13 is allowed, in case you have to match durations))
function dateTimeParsePartsTry(const input,mask:string; outYear, outMonth, outDay: PInteger; outHour, outMinutes, outSeconds: PInteger; outSecondFraction: PInteger = nil; outtimezone: PInteger = nil; options: TDateTimeParsingFlags = []): TDateTimeParsingResult;
//**Reads date/time parts from a input matching a given mask (@see dateTimeParsePartsTry)
procedure dateTimeParseParts(const input,mask:string; outYear, outMonth, outDay: PInteger; outHour, outMinutes, outSeconds: PInteger; outSecondFraction: PInteger = nil; outtimezone: PInteger = nil);
//**Reads date/time from a input matching a given mask (@see dateTimeParsePartsTry)
function dateTimeParse(const input,mask:string; outtimezone: PInteger = nil): TDateTime;
//**Converts a dateTime to a string corresponding to the given mask (same mask as dateTimeParsePartsTry)
function dateTimeFormat(const mask: string; y, m,d, h, n, s: Integer; nanoseconds: integer = 0; timezone: integer = high(integer)): string; overload;
//**Converts a dateTime to a string corresponding to the given mask (same mask as dateTimeParsePartsTry)
function dateTimeFormat(const mask: string; const dateTime: TDateTime): string; overload;
//**Reads a time string given a certain mask (@see dateTimeParsePartsTry)@br
procedure timeParseParts(const input,mask:string; outHour, outMinutes, outSeconds: PInteger; outSecondFraction: PInteger = nil; outtimezone: PInteger = nil);
//**Reads a time string given a certain mask (@see dateTimeParsePartsTry).@br This function checks, if the time is valid.
function timeParse(const input,mask:string): TTime;
//**Converts a time to a string corresponding to the given mask (same mask as dateTimeParsePartsTry)
function timeFormat(const mask: string; h, n, s: Integer): string; overload;
function timeFormatNEW(const mask: string; h, n, s: Integer; nanoseconds: integer; timezone: integer = high(integer)): string; overload;
//**Reads a date string given a certain mask (@see dateTimeParsePartsTry)@br
procedure dateParseParts(const input,mask:string; outYear, outMonth, outDay: PInteger; outtimezone: PInteger = nil);
//**Reads a date string given a certain mask (@see dateTimeParsePartsTry)@br This function checks, if the date is valid.
function dateParse(const input,mask:string): longint;
//**Converts a date to a string corresponding to the given mask (same mask as dateTimeParsePartsTry)
function dateFormat(const mask: string; const y, m, d: integer): string;
function dateFormatNew(const mask: string; const y, m, d: integer; timezone: integer): string;
//**Converts a date to a string corresponding to the given mask (same mask as dateTimeParsePartsTry)
function dateFormatOld(const mask: string; const y, m, d: integer; const timezone: TDateTime): string; {$ifdef HASDeprecated}deprecated 'use dateFormatNew';{$endif}
//**Encodes a date as datetime (supports negative years)
function dateEncodeTry(year, month, day: integer; out dt: TDateTime): boolean;
//**Encodes a date as datetime (supports negative years)
function dateEncode(year, month, day: integer): TDateTime;
//**Encodes a date as datetime (supports negative years)
procedure dateDecode(date: TDateTime; year, month, day: PInteger);
const WHITE_SPACE=[#9,#10,#13,' '];
//----------------------------Others-----------------------------------
//**Compare function to compare the two values to which a and b, ideally returning -1 for a^<b^, 0 for a^=b^, +1 for a^>b^
//**The data is an TObject to prevent confusing it with a and b. It is the first parameter,
//**so the function use the same call convention like a method
type TPointerCompareFunction = function (data: TObject; a, b: pointer): longint;
//**General stable sort function @br
//**a is the first element in the array to sort, and b is the last. size is the size of every element@br
//**compareFunction is a function which compares two pointer to elements of the array, if it is nil, it will compare the raw bytes (which will correspond to an ascending sorting of positive integers). @br
//**Only the > 0 and <= 0 return values are discerned. (i.e. you can safely use a comparison function that e.g. only returns +7 and 0) @br
//**Currently it uses a combination of merge and insert sort. Merge requires the allocation of additional memory.
procedure stableSort(a,b: pointer; size: SizeInt; compareFunction: TPointerCompareFunction = nil; compareFunctionData: TObject=nil); overload;
//**general stable sort functions for arrays (modifying the array inline and returning it)
function stableSort(intArray: TLongintArray; compareFunction: TPointerCompareFunction = nil; compareFunctionData: TObject=nil): TLongintArray; overload;
function stableSort(strArray: TStringArray; compareFunction: TPointerCompareFunction = nil; compareFunctionData: TObject=nil): TStringArray; overload;
function stableSort(intArray: TSizeintArray; compareFunction: TPointerCompareFunction = nil; compareFunctionData: TObject=nil): TSizeintArray; overload;
type TBinarySearchChoosen = (bsAny, bsFirst, bsLast);
TBinarySearchAcceptedCondition = (bsLower, bsEqual, bsGreater);
type TBinarySearchAcceptedConditions = set of TBinarySearchAcceptedCondition;
//**Should return 0 if the searched element is equal to a,
//** -1 if the searched element is smaller than a, and
//** +1 if the searched element is larger than a.
//**(that is the opposite of what you might expect, but it is logical: the data parameter has to come first to match a method signature. The data parameter is compared to a parameter (to match a standalone comparison function signature))
type TBinarySearchFunction = function (data: TObject; a: pointer): longint;
//** General binary search function. It can find an element or a lower/upper bound.
//** @br @code(a) points to the first element in the (ascending, sorted) array, @code(b) to the last, @code(size) the size of each element
//** @br @code(compareFunction) is a TBinarySearchFunction comparing the searched element to another element
//** @br @code(compareFunctionData) is the data passed to the comparison function as first argument (you can think of it as searched element)
//** @br @code(choosen) is the element that should be returned, if there are multiple matches (bsFirst, bsLast or bsAny) .
//** @br @code(condition) the comparison relation between the returned and searched element (E.g. for [bsGreater, bsEqual] the returned element satisfies @code(compareFunction(reference, returned) <= 0).)
//** @br returns a pointer to the found match or nil if there is none.
//** @br (note that you can combine, e.g. bsGreater and bsLast, which will always return the last element, unless all are lower)
//** @br
//** @br Beware of the pointers. You need to be very carefully. @code(a) and @code(b) point to the first and last elements. They are not the element, they are pointers to the location of the element in the array. If a = b, the array has one element. You need to pass a < b for empty arrays.
//** @br The first parameter of @code(compareFunction) is user defined data, so it can be anything. The second parameter is a pointer to the location of an array element. It is not the array element itself, not even on a list of pointers.
//** @br The same holds for the pointer returned by the function, i.e. it is like that second parameter.
function binarySearch(a,b: pointer; size: SizeInt; compareFunction: TBinarySearchFunction = nil; compareFunctionData: TObject=nil; choosen: TBinarySearchChoosen = bsAny; condition: TBinarySearchAcceptedConditions = [bsEqual]): pointer;
function eUTF8: TSystemCodePage; {$IFDEF HASINLINE} inline; {$ENDIF} {$ifdef HASDeprecated}deprecated;{$endif}
function eWindows1252: TSystemCodePage; {$IFDEF HASINLINE} inline; {$ENDIF} {$ifdef HASDeprecated}deprecated;{$endif}
{$I bbutilsh.inc}
implementation
const MinsPerDay = 24 * 60;
{$IFNDEF HASSIGN}
function Sign(a: SizeInt): TValueSign;
begin
if a < 0 then result := -1
else if a > 0 then result := 1
else result := 0;
end;
{$ENDIF}
{$IFNDEF HASISNAN}
function IsNan(const d: double): boolean;
var data: array[0..1] of longword absolute d;
const LO = 0; HI = 1;
begin
//sign := (PQWord(@d)^ shr 63) <> 0;
result := ((data[HI] and $7FF00000) = $7FF00000) and
((data[LO] <> 0) or (data[HI] and not $FFF00000 <> 0));
end;
{$ENDIF}
//========================array functions========================
procedure arraySliceIndices(const higha: SizeInt; var slice1, slice2: SizeInt); overload;
begin
if (slice2 = -1) and (slice1 = -1) then begin
slice2 := higha;
slice1 := 0;
end else if slice2 = -1 then begin
slice2 := slice1;
slice1 := 0;
end;
end;
//=========================Flow control functions======================
type
{ TThreadedCall }
TThreadedCall = class(TThread)
proc: TProcedureOfObject;
procedure Execute; override;
constructor create(aproc: TProcedureOfObject;isfinished: TNotifyEvent);
end;
function TCopyingArrayList.TEnumerator.GetCurrent: TElement;
begin
result := fcurrent^;
end;
function TCopyingArrayList.TEnumerator.MoveNext: Boolean;
begin
inc(fcurrent);
result := fcurrent < fend;
end;
procedure TThreadedCall.Execute;
begin
proc();
end;
constructor TThreadedCall.create(aproc: TProcedureOfObject;isfinished: TNotifyEvent);
begin
self.proc:=aproc;
FreeOnTerminate:=true;
OnTerminate:=isfinished;
inherited create(false);
end;
function procedureToMethod(proc: TProcedure): TMethod;
begin
assert(sizeof(result.code) = sizeof(proc));
result.Data:=nil;
move(proc, result.code, sizeof(proc));
//result.code:=proc;
end;
function makeMethod(code, data: pointer): TMethod;
begin
result.Code:=code;
result.Data:=data;
end;
procedure threadedCallBase(proc: TProcedureOfObject; isfinished: TNotifyEvent);
begin
TThreadedCall.Create(proc,isfinished);
end;
function PtrToUInt(p: pointer): UIntPtr;
begin
result := {%H-}UIntPtr(p);
end;
function UIntToPtr(i: UIntPtr): pointer;
begin
result := {%H-}pointer(i);
end;
function ObjToUInt(p: TObject): UIntPtr;
begin
result := UIntPtr(p);
end;
function UIntToObj(i: UIntPtr): TObject;
begin
result := TObject(i);
end;
procedure threadedCall(proc: TProcedureOfObject; isfinished: TNotifyEvent);
begin
threadedCallBase(proc,isfinished);
end;
procedure threadedCall(proc: TProcedureOfObject; isfinished: TProcedureOfObject);
begin
threadedCallBase(proc, TNotifyEvent(isfinished));
end;
procedure threadedCall(proc: TProcedure; isfinished: TProcedureOfObject);
begin
threadedCallBase(TProcedureOfObject(procedureToMethod(proc)),TNotifyEvent(isfinished));
end;
{$ImplicitExceptions off}
function charDecodeDigit(c: char): integer;
begin
case c of
'0'..'9': result := ord(c) - ord('0');
'a'..'z': result := ord(c) - ord('a') + 10;
'A'..'Z': result := ord(c) - ord('A') + 10;
else raise Exception.Create('Character '+c+' is not a valid digit');
end;
end;
function charDecodeHexDigit(c: char): integer;
begin
case c of
'0'..'9': result := ord(c) - ord('0');
'a'..'f': result := ord(c) - ord('a') + 10;
'A'..'F': result := ord(c) - ord('A') + 10;
else raise Exception.Create('Character '+c+' is not a valid hex digit');
end;
end;
function charEncodeHexDigitUp(digit: integer): char;
begin
case digit of
0..9: result := chr(ord('0') + digit);
$A..$F: result := chr(ord('A') - $A + digit);
else begin assert(false); result := #0; end;
end;
end;
{$ImplicitExceptions on}
//=========================String functions======================
function strActualEncoding(e: TSystemCodePage): TSystemCodePage; {$ifdef HASINLINE} inline; {$endif}
begin
//this is basically TranslatePlaceholderCP, but that is unaccessible in fpc's astrings.inc
case e of
CP_ACP: result := {$IFDEF FPC_HAS_CPSTRING}DefaultSystemCodePage
{$else}{$ifdef windows}GetACP
{$else}CP_UTF8
{$endif}{$endif};
{$ifdef windows}{$ifndef WINCE}
CP_OEMCP: result := GetOEMCP;
{$endif}{$endif}
else result := e;
end;
end;
function strActualEncoding(const str: RawByteString): TSystemCodePage;
begin
result := strActualEncoding(StringCodePage(str));
end;
{$ifdef fpc}
function TStrIterator.MoveNext: Boolean;
begin
result := pos <= length(s);
fcurrent := strDecodeUTF8Character(s, pos);
end;
function TStrIterator.GetEnumerator: TStrIterator;
begin
result := self;
end;
function strIterator(const s: RawByteString): TStrIterator;
begin
result.s := s;
result.pos := 1;
end;
procedure TStrBuilder.appendWithEncodingConversion(const s: RawByteString);
var temp: RawByteString;
begin
temp := s;
SetCodePage(temp, encoding);
append(pchar(temp), length(temp));
end;
procedure TStrBuilder.appendCodePointToUtf8String(const codepoint: integer);
var
l: Integer;
begin
l := strGetUnicodeCharacterUTFLength(codepoint);
if next + l > bufferend then reserveadd(l);
strGetUnicodeCharacterUTF(codepoint, next);
inc(next, l);
end;
procedure TStrBuilder.appendCodePointWithEncodingConversion(const codepoint: integer);
var temp: RawByteString;
p: PChar;
begin
temp := strGetUnicodeCharacter(codepoint, encoding);
p := pchar(pointer(temp));
append(p, length(temp));
end;
procedure TStrBuilder.init(abuffer: pstring; basecapacity: SizeInt; aencoding: TSystemCodePage);
begin
buffer := abuffer;
if basecapacity <= 0 then basecapacity := 1;
SetLength(buffer^, basecapacity); //need to create a new string to prevent aliasing
//if length(buffer^) < basecapacity then
//else UniqueString(buffer^); //or could uniquestring be enough?
next := pchar(buffer^);
bufferend := next + length(buffer^);
//encoding := strActualEncoding(buffer^);
SetCodePage(RawByteString(buffer^), aencoding, false);
encoding := strActualEncoding(aencoding);
end;
procedure TStrBuilder.clear;
begin
next := Pointer(buffer^);
end;
function TStrBuilder.count: SizeInt;
begin
result := next - pointer(buffer^);
end;
function TStrBuilder.isEmpty: boolean;
begin
result := pchar(buffer^) = next;
end;
procedure TStrBuilder.final;
begin
if next <> bufferend then begin
setlength(buffer^, count);
next := pchar(buffer^) + length(buffer^);
bufferend := next;
end;
end;
procedure TStrBuilder.reserveadd(delta: SizeInt);
var
oldlen: SizeInt;
begin
if next + delta > bufferend then begin
oldlen := count;
SetLength(buffer^, max(2*length(buffer^), oldlen + delta));
next := pchar(buffer^) + oldlen;
bufferend := pchar(buffer^) + length(buffer^);
end;
end;
procedure TStrBuilder.append(c: char);
begin
if next >= bufferend then reserveadd(1);
next^ := c;
inc(next);
end;
procedure TStrBuilder.append(const s: RawByteString);
var
l: SizeInt;
begin
if length(s) <= 0 then exit;
if strActualEncoding(s) = encoding then begin
l := length(s);
if next + l > bufferend then reserveadd(l);
move(pbyte(s)^, next^, l);
inc(next, l);
end else
appendWithEncodingConversion(s);
end;
procedure TStrBuilder.appendRaw(const s: RawByteString);
begin
append(pchar(pointer(s)), length(s));
end;
procedure TStrBuilder.appendCodePoint(const codepoint: integer);
begin
case encoding of
CP_NONE, CP_UTF8: begin
appendCodePointToUtf8String(codepoint);
exit;
end;
CP_ASCII, CP_LATIN1:
if codepoint <= 127 then begin
append(chr(codepoint));
exit;
end
end;
appendCodePointWithEncodingConversion(codepoint);
end;
procedure TStrBuilder.append(const p: pchar; const l: SizeInt);
begin
if l <= 0 then exit;
if next + l > bufferend then reserveadd(l);
move(p^, next^, l);
inc(next, l);
end;
procedure TStrBuilder.appendBuffer(const block; l: LongInt);
begin
if l <= 0 then exit;
if next + l > bufferend then reserveadd(l);
move(block, next^, l);
inc(next, l);
end;
procedure TStrBuilder.chop(removedCount: SizeInt);
begin
dec(next, removedCount);
if next < pointer(buffer^) then next := pointer(buffer^);
end;
procedure TStrBuilder.appendHexNumber(number: integer);
var
digits, tempnumber: Integer;
begin
assert(number >= 0);
tempnumber := number;
digits := 0;
while tempnumber > 0 do begin
inc(digits);
tempnumber := tempnumber shr 4;
end;
appendHexNumber(number, digits);
end;
procedure TStrBuilder.appendHexNumber(number, digits: integer);
var
i: Integer;
e: pchar;
begin
assert(number >= 0);
if digits <= 0 then digits := 1;
while (digits < sizeof(integer)*2) and (number shr (4 * digits) > 0) do inc(digits);
reserveadd(digits);
e := next + digits - 1;
for i := 1 to digits do begin
e^ := charEncodeHexDigitUp(number and $F);
dec(e);
number := number shr 4;
end;
next := next + digits;
end;
procedure TStrBuilder.appendNumber(number: Int64);
var s: shortstring;
begin
Str(number, s);
append(@s[1], length(s));
end;
function TBaseArrayList.getCapacity: SizeInt;
begin
result := length(FBuffer)
end;
procedure TBaseArrayList.setCount(NewCount: SizeInt);
begin
SetLength(FBuffer, NewCount);
FCount := NewCount;
end;
procedure TBaseArrayList.checkIndex(AIndex: SizeInt);
begin
if (AIndex < 0) or (AIndex >= FCount) then raise ERangeError.Create('Invalid array list index: ' + IntToStr(AIndex) + ', count: ' +IntToStr(FCount));
end;
procedure TBaseArrayList.init;
begin
FBuffer := nil;
FCount := 0;
end;
procedure TBaseArrayList.addAll(other: TBaseArrayList);
var
i: SizeInt;
begin
for i := 0 to other.FCount - 1 do
add(other.FBuffer[i]);
end;
procedure TBaseArrayList.addAll(const other: array of TElement);
var
i: SizeInt;
begin
for i := 0 to high(other) do
add(other[i]);
end;
procedure TBaseArrayList.add(const item: TElement);
begin
if FCount = capacity then expand;
fbuffer[fcount] := item;
inc(fcount);
end;
procedure TBaseArrayList.clear;
begin
SetLength(FBuffer, 0);
FCount := 0;
end;
procedure TBaseArrayList.delete(Index: SizeInt);
var
p: PElement;
moveCount: SizeInt;
begin
checkIndex(index);
FBuffer[index] := Default(TElement);
moveCount := FCount - index - 1;
p := @fbuffer[index];
move((p + index)^, p, sizeof(TElement) * moveCount);
dec(fcount);
fillchar(fbuffer[fcount], sizeof(TElement), 0);
end;
procedure TBaseArrayList.exchange(Index1, Index2: SizeInt);
var temp: TElement;
begin
//checkIndex(index1);
//checkIndex(index2);
temp := FBuffer[index1];
FBuffer[index1] := FBuffer[index2];
FBuffer[index2] := temp;
end;
procedure TBaseArrayList.expand;
var
cap: SizeInt;
begin
cap := length(FBuffer);
if cap < 16 then SetLength(FBuffer, cap + 4)
else if cap < 1024 then SetLength(FBuffer, cap * 2)
else SetLength(FBuffer, cap + 512);
end;
function TBaseArrayList.toSharedArray: TArrayBuffer;
begin
SetLength(FBuffer, fcount);
result := fbuffer;
end;
function TRecordArrayList.get(Index: SizeInt): PElement;
begin
//checkIndex(index);
result := @FBuffer[index];
end;
procedure TRecordArrayList.put(Index: SizeInt; Item: PElement);
begin
//checkIndex(index);
FBuffer[index] := item^;
end;
function TRecordArrayList.first: PElement;
begin
result := @FBuffer[0];
end;
function TRecordArrayList.last: PElement;
begin
result := @FBuffer[fcount - 1];
end;
function TCopyingArrayList.get(Index: SizeInt): TElement;
begin
//checkIndex(index);
result := FBuffer[index];
end;
procedure TCopyingArrayList.put(Index: SizeInt; const Item: TElement);
begin
//checkIndex(index);
FBuffer[index] := item;
end;
function TCopyingArrayList.first: TElement;
begin
result := FBuffer[0];
end;
function TCopyingArrayList.last: TElement;
begin
result := FBuffer[fcount - 1];
end;
function TCopyingArrayList.GetEnumerator: TEnumerator;
begin
if fbuffer <> nil then begin
result.FCurrent := @FBuffer[0];
result.fend := result.fcurrent + fcount;
dec(result.fcurrent);
end else begin
result.fcurrent := nil;
result.fend := nil;
end
end;
{$endif}
function strlmove(dest, source: pansichar; destLen, sourceLen: SizeInt): pansichar;
begin
move(source^,dest^,min(sourceLen,destLen));
result:=dest;
end;
function widestrlmove(dest, source: pwidechar; destLen, sourceLen: SizeInt): pwidechar;
begin
move(source^,dest^,min(sourceLen,destLen)*sizeof(widechar));
result:=dest;
end;
{$ifndef HAS_CPSTRING}
function StringCodePage(const str: RawByteString): TSystemCodePage;
begin
result := CP_ACP;
end;
procedure SetCodePage(var s: RawByteString; CodePage: TSystemCodePage; Convert: Boolean);
begin
UniqueString(s); //it does have some side effects
end;
{$endif}
//---------------------Comparison----------------------------
//--Length-limited
function strlEqual(const p1, p2: pansichar; const l: SizeInt): boolean;
begin
result:=(strlcomp(p1, p2, l) = 0);
end;
//Length limited && null terminated
//equal comparison, case sensitive, stopping at #0-bytes
function strlequal(const p1,p2:pansichar;const l1,l2: SizeInt):boolean;
begin
result:=(l1=l2) and (strlcomp(p1, p2,l1) = 0);
end;
//equal comparison, case insensitive, stopping at #0-bytes
function strliEqual(const p1, p2: pansichar; const l: SizeInt): boolean;
begin
result:=(strlicomp(p1,p2,l)=0);
end;
//equal comparison, case insensitive, stopping at #0-bytes
function strliequal(const p1,p2:pansichar;const l1,l2: SizeInt):boolean;
begin
result:=(l1=l2) and (strlicomp(p1,p2,l1)=0);
end;
{$IFNDEF FPC}
function compareByte(const a, b; size: SizeInt): SizeInt;
var ap, bp: pansichar;
i: SizeInt;
begin
ap := @a;
bp := @b;
for i:=1 to size do begin
if ap^ < bp^ then begin result := -1; exit; end;
if ap^ > bp^ then begin result := 1; exit; end;
inc(ap);
inc(bp);
end;
begin result := 0; exit; end;
end;
{$ENDIF}
//equal comparison, case sensitive, ignoring #0-bytes
function strlsequal(const p1,p2:pansichar;const l: SizeInt):boolean; {$IFDEF HASINLINE} inline; {$ENDIF}
begin
result:= (CompareByte(p1^, p2^, l) = 0);
end;
//equal comparison, case sensitive, ignoring #0-bytes
function strlsequal(const p1,p2:pansichar;const l1,l2: SizeInt):boolean; {$IFDEF HASINLINE} inline; {$ENDIF}
begin
result:= (l1=l2) and (CompareByte(p1^, p2^, l1) = 0);
end;
function strlsiEqual(const p1, p2: pansichar; const l: SizeInt): boolean;
var i: SizeInt;
c1, c2:integer;
begin
result := true;
for i := 0 to l-1 do
if p1[i] <> p2[i] then begin
c1 := ord(p1[i]);
c2 := ord(p2[i]);
if c1 in [97..122] then dec(c1, 32);
if c2 in [97..122] then dec(c2, 32);
if c1 <> c2 then begin result := false; exit; end;
end;
end;
//equal comparison, case insensitive, ignoring #0-bytes
function strlsiequal(const p1, p2: pansichar; const l1, l2: SizeInt): boolean;
begin
result:=(l1=l2) and strlsiequal(p1, p2, l1);
end;
//equal comparison, case sensitive, stopping at #0-bytes in p1, ignoring #0-bytes in l2
function strlnsequal(p1,p2:pansichar;l2: SizeInt):boolean;
var i:SizeInt;
begin
for i:=0 to l2-1 do begin
if p1[i]<>p2[i] then
begin result := false; exit; end;
if p1[i]=#0 then
begin result := i = l2-1; exit; end
end;
result:=true;
end;
//equal comparison, case insensitive, stopping at #0-bytes in p1, ignoring #0-bytes in l2
function strlnsiequal(p1,p2:pansichar;l2: SizeInt):boolean;
var i:SizeInt;
begin
for i:=0 to l2-1 do begin
if upcase(p1[i])<>upcase(p2[i]) then
begin result := false; exit; end;
if p1[i]=#0 then
begin result := i = l2-1; exit; end
end;
result:=true;
end;
function strlsequal(p: pansichar; const s: string; l: SizeInt): boolean;
var
q: PAnsiChar;
begin
q := pansichar(pointer(s));
result:=(l = length(s)) and ((l = 0) or (strlsequal(p, q, l,l)));
end;
function strlequal(p: pansichar; const s: string; l: SizeInt): boolean;
var
q: PAnsiChar;
begin
q := pansichar(pointer(s));
result := (l = length(s)) and ( (l = 0) or strlsequal(p, q, l, l));
end;
function strliequal(p: pansichar; const s:string;l: SizeInt): boolean;
var
q: PAnsiChar;
begin
q := pansichar(pointer(s));
result := (l = length(s)) and ( (l = 0) or strlsiequal(p, q, l, l));
end;
function strEqual(const s1, s2: RawByteString): boolean;
begin
{$if (FPC_FULLVERSION >= 20700) and (FPC_FULLVERSION <= 30200)}
//the first fpc versions with codepage aware strings had really bad equality tests
if pointer(s1) = pointer(s2) then begin
result := true;
exit;
end;
if StringCodePage(s1) <> StringCodePage(s2) then
if strActualEncoding(StringCodePage(s1)) <> strActualEncoding(StringCodePage(s2)) then begin
result := s1 = s2; //this is slow due to encoding conversion
exit;
end;
if length(s1) <> length(s2) then begin
result := false;
exit;
end;
result:=CompareByte(pchar(pointer(s1))^, pchar(pointer(s2))^, length(s1)) = 0;
{$else}
result := s1 = s2;
{$endif}
end;
function striequal(const s1, s2: rawbytestring): boolean;
begin
result:=CompareText(s1,s2)=0;
end;
function strlbeginswith(const p: pansichar; l: SizeInt; const expectedStart: string): boolean;
var
q: PAnsiChar;
begin
q := pansichar(pointer(expectedStart));
result:=(expectedStart='') or ((l>=length(expectedStart)) and (strlsequal(p,q,length(expectedStart),length(expectedStart))));
end;
function strlibeginswith(const p: pansichar; l: SizeInt; const expectedStart: string): boolean;
var
q: PAnsiChar;
begin
q := pansichar(pointer(expectedStart));
result:=(expectedStart='') or ((l>=length(expectedStart)) and (strlsiequal(p,q,length(expectedStart),length(expectedStart))));
end;
function strbeginswith(const p: pansichar; const expectedStart: string): boolean;
var
q: PAnsiChar;
begin
q := pansichar(pointer(expectedStart));
result:=(expectedStart='') or (strlnsequal(p, q, length(expectedStart)));
end;
function stribeginswith(const p: pansichar; const expectedStart: string): boolean;
var
q: PAnsiChar;
begin
q := pansichar(pointer(expectedStart));
result:=(expectedStart='') or (strlnsiequal(p, q, length(expectedStart)));
end;
function strbeginswith(const strToBeExaminated,expectedStart: string): boolean;
var
strLength, expectedLength: SizeInt;
begin
strLength := length(strToBeExaminated);
expectedLength := length(expectedStart);
result:= (strLength >= expectedLength) and ( (expectedStart='') or (CompareByte(PByte(strToBeExaminated)^, pbyte(expectedStart)^, expectedLength ) = 0));
end;
function stribeginswith(const strToBeExaminated,expectedStart: string): boolean;
var
strLength, expectedLength: SizeInt;
begin
strLength := length(strToBeExaminated);
expectedLength := length(expectedStart);
result:= (strLength >= expectedLength) and ( (expectedStart='') or strlsiequal(pansichar(pointer(strToBeExaminated)), pansichar(pointer(expectedStart)), expectedLength));
end;
function strendswith(const strToBeExaminated, expectedEnd: string): boolean;
var
strLength, expectedLength: SizeInt;
begin
strLength := length(strToBeExaminated);
expectedLength := length(expectedEnd);
result := ( strLength >= expectedLength ) and
( (expectedEnd='') or
(CompareByte(strToBeExaminated[strLength-expectedLength+1], PByte(expectedEnd)^, expectedLength) = 0) );
end;
function striendswith(const strToBeExaminated, expectedEnd: string): boolean;
begin
result := (length(strToBeExaminated)>=Length(expectedEnd)) and
( (expectedEnd='') or
(strlsiequal(@strToBeExaminated[length(strToBeExaminated)-length(expectedEnd)+1],pansichar(pointer(expectedEnd)),length(expectedEnd))) );
end;
function strlsIndexOf(str, searched: pansichar; l1, l2: SizeInt): SizeInt;
var last: pansichar;
begin
if l2<=0 then begin result := 0; exit; end;
if l1<l2 then begin result := -1; exit; end;
last:=str+(l1-l2);
result:=0;
while str <= last do begin
if str^ = searched^ then
if strlsequal(str, searched, l2) then
exit;
inc(str);
inc(result);
end;
result:=-1;
end;
function strlsIndexOf(str:pansichar; const searched: TCharSet; length: SizeInt): SizeInt;
var last: pansichar;
begin
if length<1 then begin result := -1; exit; end;
last:=str+(length-1);
result:=0;
while str <= last do begin
if str^ in searched then
exit;
inc(str);
inc(result);
end;
result:=-1;
end;
function strlsiIndexOf(str, searched: pansichar; l1, l2: SizeInt): SizeInt;
var last, startstr: pansichar;
searchedLow, searchedUp: char;
begin
if l2<=0 then begin result := 0; exit; end;
if l1<l2 then begin result := -1; exit; end;
startstr := str;
last:=str+(l1-l2);
result:=0;
searchedLow := lowercase(searched^);
searchedUp := upcase(searched^);
while str <= last do begin
if (str^ = searchedLow) or (str^ = searchedUp) then
if strlsiequal(str+1, searched+1, l2-1, l2-1) then
exit(str - startstr);
inc(str);
end;
result:=-1;
end;
function strindexof(const str, searched: string; from: SizeInt): SizeInt;
begin
if from > length(str) then begin result := 0; exit; end;
result := strlsIndexOf(pansichar(pointer(str))+from-1, pansichar(pointer(searched)), length(str) - from + 1, length(searched));
if result < 0 then begin result := 0; exit; end;
inc(result, from);
end;
function strIndexOf(const str: string; const searched: TCharSet; from: SizeInt): SizeInt;
var
i: SizeInt;
begin
for i := from to length(str) do
if str[i] in searched then begin
result := i;
exit;
end;
result := 0;
end;
function striindexof(const str, searched: string; from: SizeInt): SizeInt;
begin
if from > length(str) then begin result := 0; exit; end;
result := strlsiIndexOf(pansichar(pointer(str))+from-1, pansichar(pointer(searched)), length(str) - from + 1, length(searched));
if result < 0 then begin result := 0; exit; end;
inc(result, from);
end;
function strIndexOf(const str, searched: string): SizeInt;
begin
result := strIndexOf(str, searched, 1); //no default paramert, so you can take the address of both functions
end;
function strIndexOf(const str: string; const searched: TCharSet): SizeInt;
begin
result := strIndexOf(str, searched, 1);
end;
function striIndexOf(const str, searched: string): SizeInt;
begin
result := striIndexOf(str, searched, 1);
end;
function strlsLastIndexOf(str, searched: pansichar; l1, l2: SizeInt): SizeInt;
var last: pansichar;
begin
if l2<=0 then begin result := 0; exit; end;
if l1<l2 then begin result := -1; exit; end;
last:=str+(l1-l2);
result:=l1-l2;
while str <= last do begin
if last^ = searched^ then
if strlsequal(last, searched, l2) then
exit;
dec(last);
dec(result);
end;
result:=-1;
end;
function strlsLastIndexOf(str: pansichar; const searched: TCharSet; length: SizeInt): SizeInt;
var last: pansichar;
begin
if length<1 then begin result := -1; exit; end;
last:=str+(length-1);
result:=length-1;
while str <= last do begin
if last^ in searched then
exit;
dec(last);
dec(result);
end;
result:=-1;
end;
function strlsiLastIndexOf(str, searched: pansichar; l1, l2: SizeInt): SizeInt;
var last: pansichar;
begin
if l2<=0 then begin result := 0; exit; end;
if l1<l2 then begin result := -1; exit; end;
last:=str+(l1-l2);
result:=l1-l2;
while str <= last do begin
if upcase(last^) = upcase(searched^) then
if strlsiequal(last+1, searched+1, l2-1, l2-1) then
exit;
dec(last);
dec(result);
end;
result:=-1;
end;
function strLastIndexOf(const str: string; const searched: string; from: SizeInt): SizeInt;
begin
if from > length(str) then begin result := 0; exit; end;
result := strlsLastIndexOf(pansichar(pointer(str))+from-1, pansichar(pointer(searched)), length(str) - from + 1, length(searched));
if result < 0 then begin result := 0; exit; end;
inc(result, from);
end;
function strLastIndexOf(const str: string; const searched: TCharSet; from: SizeInt): SizeInt;
var
i: SizeInt;
begin
for i := length(str) downto from do
if str[i] in searched then begin
result := i;
exit;
end;
result := 0;
end;
function striLastIndexOf(const str: string; const searched: string; from: SizeInt): SizeInt;
begin
if from > length(str) then begin result := 0; exit; end;
result := strlsiLastIndexOf(pansichar(pointer(str))+from-1, pansichar(pointer(searched)), length(str) - from + 1, length(searched));
if result < 0 then begin result := 0; exit; end;
inc(result, from);
end;
function strLastIndexOf(const str: string; const searched: TCharSet): SizeInt;
begin
result := strLastIndexOf(str, searched, 1);
end;
function strcontains(const str, searched: string; from: SizeInt): boolean;
begin
result:=strindexof(str, searched, from) > 0;
end;
function strContains(const str: string; const searched: TCharSet; from: SizeInt): boolean;
begin
result:=strindexof(str, searched, from) > 0;
end;
function stricontains(const str, searched: string; from: SizeInt): boolean;
begin
result:=striindexof(str, searched, from) > 0;
end;
function strContains(const str, searched: string): boolean;
begin
result := strContains(str, searched, 1);
end;
function strContains(const str: string; const searched: TCharSet): boolean;
begin
result := strContains(str, searched, 1);
end;
function striContains(const str, searched: string): boolean;
begin
result := striContains(str, searched, 1);
end;
function strcopyfrom(const s: string; start: SizeInt): string; {$IFDEF HASINLINE} inline; {$ENDIF}
begin
result:=copy(s,start,length(s)-start+1);
end;
function strslice(const s: string; start, last: SizeInt): string;
begin
result:=copy(s,start,last-start+1);
end;
procedure strMoveRef(var source: string; var dest: string; const size: SizeInt); {$IFDEF HASINLINE} inline; {$ENDIF}
var clearFrom: PAnsiChar;
clearTo: PAnsiChar;
countHighSize: SizeInt;
begin
if size <= 0 then exit;
countHighSize := size - sizeof(string);
//clear reference count of target ( [dest:0..size-1] - [source:0..size-1] )
clearFrom := PAnsiChar(@dest);
clearTo := clearFrom + countHighSize;
if (clearFrom >= PAnsiChar(@source)) and (clearFrom <= PAnsiChar(@source) + countHighSize) then
clearFrom := PAnsiChar(@source) + countHighSize + sizeof(string);
if (clearTo >= PAnsiChar(@source)) and (clearTo <= PAnsiChar(@source) + countHighSize) then
clearTo := PAnsiChar(@source) - sizeof(string);
while clearFrom <= clearTo do begin
PString(clearFrom)^ := '';
inc(clearFrom, sizeof(string));
end;
//move
move(source, dest, size);
//remove source ( [source:0..size-1] - [dest:0..size-1] )
clearFrom := PAnsiChar(@source);
clearTo := clearFrom + countHighSize;
if (clearFrom >= PAnsiChar(@dest)) and (clearFrom <= PAnsiChar(@dest) + countHighSize) then
clearFrom := PAnsiChar(@dest) + countHighSize + sizeof(string);
if (clearTo >= PAnsiChar(@dest)) and (clearTo <= PAnsiChar(@dest) + countHighSize) then
clearTo := PAnsiChar(@dest) - sizeof(string);
if clearFrom <= clearTo then
FillChar(clearFrom^, PtrUInt(clearTo - clearFrom) + sizeof(string), 0);
end;
function strrpos(c: ansichar; const s: string): SizeInt;
var i:SizeInt;
begin
for i:=length(s) downto 1 do
if s[i]=c then
begin result := i; exit; end;
result := 0;
end;
function strlcount(const search: ansichar; const searchIn: pansichar; const len: SizeInt): SizeInt;
var
i: SizeInt;
begin
result:=0;
for i:=0 to len-1 do begin
if searchIn[i]=search then
inc(result);
if searchIn[i] = #0 then
exit;
end;
end;
function strCount(const str: string; const searched: ansichar; from: SizeInt): SizeInt;
var
i: SizeInt;
begin
result := 0;
for i := from to length(str) do
if str[i] = searched then inc(result);
end;
function strCount(const str: string; const searched: TCharSet; from: SizeInt): SizeInt;
var
i: SizeInt;
begin
result := 0;
for i := from to length(str) do
if str[i] in searched then inc(result);
end;
function strslice(const first, last: pansichar): string;
begin
result := '';
if first>last then exit;
SetLength(result,last-first+1);
move(first^,result[1],length(result));
end;
procedure strlTrimLeft(var p: pansichar; var l: SizeInt; const trimCharacters: TCharSet);
begin
while (l > 0) and (p^ in trimCharacters) do begin
inc(p);
dec(l);
end;
end;
procedure strlTrimRight(var p: pansichar; var l: SizeInt; const trimCharacters: TCharSet);
begin
while (l > 0) and (p[l-1] in trimCharacters) do
dec(l);
end;
procedure strlTrim(var p: pansichar; var l: SizeInt; const trimCharacters: TCharSet);
begin
strlTrimLeft(p,l,trimCharacters);
strlTrimRight(p,l,trimCharacters);
end;
function strTrimCommon(const s: string; const trimCharacters: TCharSet; const trimProc: TStrTrimProcedure): string;
var p: pansichar;
l: SizeInt;
cutOffFront: SizeInt;
begin
result := s;
l := length(Result);
if l = 0 then exit;
p := pansichar(pointer(result));
trimProc(p, l, trimCharacters);
if (p = pansichar(pointer(result))) and (l = length(result)) then exit;
cutOffFront := p - pansichar(pointer(result));
result := copy(result, 1 + cutOffFront, l);
end;
function strTrimLeft(const s: string; const trimCharacters: TCharSet): string;
begin
result:=strTrimCommon(s, trimCharacters, @strlTrimLeft);
end;
function strTrimRight(const s: string; const trimCharacters: TCharSet): string;
begin
result:=strTrimCommon(s, trimCharacters, @strlTrimRight);
end;
function strTrim(const s: string; const trimCharacters: TCharSet): string;
begin
result:=strTrimCommon(s, trimCharacters, @strlTrim);
end;
function strTrimAndNormalize(const s: string; const trimCharacters: TCharSet
): string;
var i,j: SizeInt;
begin
result:=strTrim(s,trimCharacters);
j:=1;
for i:=1 to length(result) do begin
if not (result[i] in trimCharacters) then begin
result[j]:=result[i];
inc(j);
end else if result[j-1] <> ' ' then begin
result[j]:=' ';
inc(j);
end;
end;
if j -1 <> length(result) then
setlength(result,j-1);
end;
function strNormalizeLineEndings(const s: string): string;
var
i, p: SizeInt;
begin
result := s;
if s = '' then exit;
p := 1;
for i :=1 to length(result) - 1 do begin
case result[i] of
#13: begin
result[p] := #10;
if result[i + 1] = #10 then continue;
end
else result[p] := result[i];
end;
inc(p);
end;
case result[length(result)] of
#13: result[p] := #10;
else result[p] := result[length(result)];
end;
setlength(result, p{ + 1 - 1});
{str := StringReplace(str, #13#10, #10, [rfReplaceAll]);
sr := StringReplace(str, #13, #10, [rfReplaceAll]);}
end;
function strNormalizeLineEndingsUTF8(const s: RawByteString): utf8string;
var
i, p: SizeInt;
begin
//utf 8 $2028 = e280a8, $85 = C285
result := s;
if s = '' then exit;
p := 1;
i := 1;
while i <= length(result) do begin
case result[i] of
#13: begin
result[p] := #10;
if (i + 1 <= length(Result)) then
case result[i + 1] of
#10: inc(i);
#$C2: if (i + 2 <= length(Result)) and (result[i + 2] = #$85) then inc(i, 2);
end;
end;
#$C2: begin
result[p] := result[i];
inc(i);
if (i <= length(result)) then
case result[i] of
#$85: result[p] := #10;
else begin
inc(p);
result[p] := result[i];
end;
end;
end;
#$E2: if (i + 2 <= length(result)) and (result[i + 1] = #$80) and (result[i + 2] = #$A8) then begin
result[p] := #10;
inc(i, 2);
end else result[p] := result[i];
else result[p] := result[i];
end;
inc(i);
inc(p);
end;
setlength(result, p - 1)
end;
function strPrependIfMissing(const s: string; const expectedStart: string): string;
begin
if strbeginswith(s, expectedStart) then result := s
else result := expectedStart + s;
end;
function strAppendIfMissing(const s: string; const expectedEnd: string): string;
begin
if strendswith(s, expectedEnd) then result := s
else result := s + expectedEnd;
end;
function strSplitGet(const separator: string; var remainingPart: string): string;
begin
strsplit(result,separator,remainingPart);
end;
procedure strSplit(out firstPart: string; const separator: string; var remainingPart: string);
var p:SizeInt;
begin
p:=pos(separator,remainingPart);
if p<=0 then begin
firstPart:=remainingPart;
remainingPart:='';
end else begin
firstPart:=copy(remainingPart,1,p-1);
delete(remainingPart,1,p+length(separator)-1);
end;
end;
function strWrapSplit(const Line: string; MaxCol: SizeInt; const BreakChars: TCharSet): TStringArray;
var i: SizeInt;
lastTextStart, lastBreakChance: SizeInt;
tempBreak: SizeInt;
begin
result := nil;
lastTextStart:=1;
lastBreakChance:=0;
for i := 1 to length(line) do begin
if line[i] in [#13,#10] then begin
if lastTextStart > i then continue;
arrayAdd(result, copy(Line,lastTextStart,i-lastTextStart));
lastTextStart:=i+1;
if (i < length(line)) and (line[i] <> line[i+1]) and (line[i+1] in [#13, #10]) then inc(lastTextStart);
end;
if (i < length(line)) and (line[i+1] in BreakChars) then begin
lastBreakChance:=i+1;
if lastTextStart = lastBreakChance then inc(lastTextStart); //merge seveal break characters into a single new line
end;
if i - lastTextStart + 1 >= MaxCol then begin
if lastBreakChance >= lastTextStart then begin
tempBreak := lastBreakChance;
while (tempBreak > 1) and (line[tempBreak-1] in BreakChars) do dec(tempBreak); //remove spaces before line wrap
arrayAdd(result, copy(Line,lastTextStart,tempBreak-lastTextStart));
lastTextStart:=lastBreakChance+1;
end else begin
arrayAdd(result, copy(Line, lastTextStart, MaxCol));
lastTextStart:=i+1;
end;
end;
end;
if lastTextStart <= length(line) then arrayAdd(result, strcopyfrom(line, lastTextStart));
if length(result) = 0 then arrayAdd(result, '');
end;
function strWrap(Line: string; MaxCol: Integer; const BreakChars: TCharSet): string;
begin
result := strJoin(strWrapSplit(line, MaxCol, BreakChars), LineEnding);
end;
function strReverse(s: string): string;
var
oldlen, charlen: SizeInt;
len: sizeint;
p: PChar;
q: Pchar;
begin
p := pointer(s);
len := length(s);
result := '';
SetLength(result, len);
q := pointer(result) + len;
while len > 0 do begin
oldlen := len;
strDecodeUTF8Character(p, len);
charlen := oldlen - len;
q := q - charlen;
move((p-charlen)^, q^, charlen);
end;
end;
//Given a string like openBracket .. openBracket ... closingBracket closingBracket closingBracket closingBracket , this will return everything between
//the string start and the second last closingBracket (it assumes one bracket is already opened, so 3 open vs. 4 closing => second last).
//If updateText, it will replace text with everything after that closingBracket. (always excluding the bracket itself)
function strSplitGetUntilBracketClosing(var text: string; const openBracket, closingBracket: string; updateText: boolean): string;
var pos, opened: SizeInt;
begin
opened := 1;
pos := 1;
while (pos <= length(text)) and (opened >= 1) do begin
if strlcomp(@text[pos], @openBracket[1], length(openBracket)) = 0 then begin
inc(opened);
inc(pos, length(openBracket));
end else if strlcomp(@text[pos], @closingBracket[1], length(closingBracket)) = 0 then begin
dec(opened);
inc(pos, length(closingBracket));
end else inc(pos);
end;
if opened < 1 then begin
dec(pos);
result := copy(text, 1, pos - length(closingBracket));
if updateText then delete(text, 1, pos);
end else begin
result := text;
if updateText then text := '';
end;
end;
function strSplitGetBetweenBrackets(var text: string; const openBracket, closingBracket: string; updateText: boolean): string;
var
start: SizeInt;
temp: string;
begin
start := pos(openBracket, text);
if start = 0 then begin result := ''; exit; end;
if updateText then begin
delete(text, 1, start + length(openBracket) - 1);
result := strSplitGetUntilBracketClosing(text, openBracket, closingBracket, updateText);
end else begin
temp := copy(text, start + length(openBracket), length(text));
result := strSplitGetUntilBracketClosing(temp, openBracket, closingBracket, updateText);
end;
end;
procedure strSplit(out splitted: TStringArray; s, sep: string; includeEmpty: boolean);
var p:SizeInt;
m: SizeInt;
reslen: SizeInt;
begin
splitted := nil;
reslen := 0;
if s='' then begin
if includeEmpty then begin
SetLength(splitted, 1);
splitted[0] := '';
end;
exit;
end;
p:=pos(sep,s);
m:=1;
while p>0 do begin
if p=m then begin
if includeEmpty then
arrayAddFast(splitted, reslen, '');
end else
arrayAddFast(splitted, reslen, copy(s,m,p-m));
m:=p+length(sep);
p:=strindexof(s, sep, m);
end;
if (m<>length(s)+1) or includeEmpty then
arrayAddFast(splitted, reslen, strcopyfrom(s,m));
SetLength(splitted, reslen);
end;
function strSplit(s, sep: string; includeEmpty: boolean): TStringArray;
begin
strSplit(result, s, sep, includeEmpty);
end;
//based on wikipedia
function strLengthUtf8(const str: RawByteString; out invalid: SizeInt): SizeInt;
var
p, e: pchar;
code: Integer;
begin
result := 0;
invalid := 0;
if length(str) = 0 then exit;
p := pchar(pointer(str));
e := p + length(str);
while p < e do begin
code := strDecodeUTF8Character(p, e);
inc(result);
if code < 0 then inc(invalid);
end;
end;
function strLengthUtf8(const str: RawByteString): SizeInt;
var invalid: SizeInt;
begin
result := strLengthUtf8(str, invalid);
end;
//native does not mean it is the encoding used on the platform, but that it is the endianness you get when accessing them as writing pword or pinteger
const CP_UTF16_NATIVE = {$IFDEF ENDIAN_BIG}CP_UTF16BE{$ELSE}CP_UTF16{$ENDIF};
CP_UTF32_NATIVE = {$IFDEF ENDIAN_BIG}CP_UTF32BE{$ELSE}CP_UTF32{$ENDIF};
const ENCODING_MAP_WINDOWS1252_TO_UNICODE: array[$80..$9F] of word = ( //from html5 standard. perhaps it is a windows-1252 -> unicode map?
$20AC,
$81, //2 digit code points remain unchanged
$201A, $0192, $201E, $2026, $2020, $2021, $02C6, $2030, $0160, $2039, $0152,
$8D,
$017D,
$8F, $90,
$2018, $2019, $201C, $201D, $2022, $2013, $2014, $02DC, $2122, $0161, $203A, $0153,
$9D,
$017E, $0178);
procedure strSwapEndianWord(str: PWord; countofword: SizeInt); overload;
begin
while countofword > 0 do begin
PWord(str)^ := SwapEndian(PWord(str)^);
inc(str);
dec(countofword);
end;
end;
procedure strSwapEndianWord(var str: RawByteString); overload;
begin
UniqueString(str);
assert(length(str) and 1 = 0);
strSwapEndianWord(pointer(str), length(str) div 2);
end;
procedure strSwapEndianDWord(str: PDWord; countofdword: SizeInt); overload;
begin
while countofdword > 0 do begin
PDWord(str)^ := SwapEndian(PDWord(str)^);
inc(str);
dec(countofdword);
end;
end;
procedure strSwapEndianDWord(var str: RawByteString); overload;
begin
UniqueString(str);
assert(length(str) and 3 = 0);
strSwapEndianDWord(pointer(str), length(str) div 4);
end;
function strConvertToUtf8(str: RawByteString; from: TSystemCodePage): UTF8String;
begin
result := strConvert(str, from, CP_UTF8);
end;
function strConvertFromUtf8(str: RawByteString; toe: TSystemCodePage): RawByteString;
begin
result := strConvert(str, CP_UTF8, toe);
end;
const INVALID_CHAR_1BYTE = '?';
function charCodePointToCP1252Extensions(const cp: integer): char;
begin
if (cp <= $7F) or ((cp > $9F) and (cp <= $FF)) then
result := chr(cp)
else
case cp of
//reverse of ENCODING_MAP_WINDOWS1252_TO_UNICODE
//these are mandatory for roundtrip
8364: result := #128;
129: result := #129;
8218: result := #130;
402: result := #131;
8222: result := #132;
8230: result := #133;
8224: result := #134;
8225: result := #135;
710: result := #136;
8240: result := #137;
352: result := #138;
8249: result := #139;
338: result := #140;
141: result := #141;
381: result := #142;
143: result := #143;
144: result := #144;
8216: result := #145;
8217: result := #146;
8220: result := #147;
8221: result := #148;
8226: result := #149;
8211: result := #150;
8212: result := #151;
732: result := #152;
8482: result := #153;
353: result := #154;
8250: result := #155;
339: result := #156;
157: result := #157;
382: result := #158;
376: result := #159;
//some of http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
//not all, just those that look nice to me. this table is already bigger than I want it to be
$2000..$2006: result := #$20;
$2010, $2011: result := #$2d;
$2205: result := #$d8; //Empty Set
$2212: result := #$2d; //Minus Sign
$2213: result := #$b1; //Minus-Or-Plus Sign
$2215: result := #$2f; //Division Slash
$2216: result := #$5c; //Set Minus
$2217: result := #$2a; //Asterisk Operator
$2218: result := #$b0; //Ring Operator
$2219: result := #$b7; //Bullet Operator
$221a: result := #$76; //Square Root
$221e: result := #$38; //Infinity
$2223: result := #$7c; //Divides
$2229: result := #$6e; //Intersection
$2236: result := #$3a; //Ratio
$223c: result := #$7e; //Tilde Operator
$2248: result := #$98; //Almost Equal To
$FF01..$FF5e: result := chr(cp - $FEE0); //fullwidth
else result := INVALID_CHAR_1BYTE;
end;
end;
function charCodePointToCP1252(const cp: integer): char; {$ifdef HASINLINE}inline;{$endif}
begin
if (cp <= $7F) or ((cp > $9F) and (cp <= $FF)) then
result := chr(cp)
else
result := charCodePointToCP1252Extensions(cp);
end;
function charCodePointToLatin1(const cp: integer): char; {$ifdef HASINLINE}inline;{$endif}
begin
if cp <= $FF then result := chr(cp)
else result := INVALID_CHAR_1BYTE;;
end;
procedure strRemoveNonASCIIFromANSI(var s: RawByteString);
var
i: SizeInt;
begin
for i := 1 to length(s) do
if s[i] > #127 then s[i] := INVALID_CHAR_1BYTE;
end;
function strConvert(const str: RawByteString; from, toCP: TSystemCodePage): RawByteString;
//beware the const aliasing! we might have pointer(result) = pointer(str), so result must not be changed before being recreated
var toCPActual: TSystemCodePage;
procedure convertUtf8ToWesternEurope(out result: RawByteString);
var
cp: Integer;
i, reslen: SizeInt;
p, e: pchar;
invalidutf8: SizeInt;
begin
reslen:=strLengthUtf8(str, invalidutf8);//character len (this is the exact result length for valid utf-8, but invalid characters confuse it)
if (reslen = length(str)) and (invalidutf8 = 0) then begin
//no special chars in str => utf-8=latin-8 => no conversion necessary
result := str;
SetCodePage(result, toCP, false);
exit;
end;
result := '';
SetLength(result,reslen);
p := pchar(pointer(str));
e := p + length(str);
i := 1;
case toCPActual of
CP_WINDOWS1252: while p < e do begin
cp := strDecodeUTF8Character(p, e);
if cp >= 0 then result[i] := charCodePointToCP1252(cp)
else result[i] := '?';
inc(i);
end;
else{CP_LATIN1:} while p < e do begin
cp := strDecodeUTF8Character(p, e);
if cp >= 0 then result[i] := charCodePointToLatin1(cp)
else result[i] := '?';
inc(i);
end;
end ;
if i - 1 <> reslen then SetLength(result, i - 1);
SetCodePage(result, toCP, false);
end;
procedure convertWesternEurope(out result: RawByteString);
var
i: SizeInt;
begin
result := str;
SetCodePage(result, toCP, false);
for i := 1 to length(result) do
if (result[i] >= #$80) and (result[i] <= #$9F) then
result := INVALID_CHAR_1BYTE;
SetCodePage(result, toCP, false);
end;
procedure convertWesternEuropeToUtf8(out result: RawByteString);
var
len, reslen, i: SizeInt;
builder: TStrBuilder;
begin ;
len:=length(str); //character and byte length of latin1-str
//guess length of resulting utf-8 string (for latin1 it is the exact length, but some 1252 characters map to 3 utf8 bytes)
reslen:=len;
for i:=1 to len do
if str[i] >= #$80 then inc(reslen);
//optimization
if reslen = len then begin
result := str;
SetCodePage(RawByteString(result), toCP, false);
exit;
end; //no special chars in str => utf-8=latin-8 => no conversion necessary
//reserve string
result := '';
builder.init(@result, reslen, CP_UTF8);
for i:=1 to len do begin
if str[i] < #$80 then
builder.append(str[i])
else if (ord(str[i]) <= high(ENCODING_MAP_WINDOWS1252_TO_UNICODE)) and (from = CP_WINDOWS1252) then
builder.appendCodePointToUtf8String(ENCODING_MAP_WINDOWS1252_TO_UNICODE[ord(str[i])])
else begin
//between $80.$FF: latin-1( abcdefgh ) = utf-8 ( 110000ab 10cdefgh )
builder.append(chr($C0 or (ord(str[i]) shr 6)));
builder.append(chr($80 or (ord(str[i]) and $3F)));
end
end;
builder.final;
end;
//convert between utf-8, 16, 32
//this function works like fpc, but fpc does not officially support storing utf-16 or utf-32 in ansistrings
procedure convertExtendedUnicode;
var temp: UnicodeString; //temporary utf-16 representation. todo: convert directly. at least for utf-16
begin
temp := '';
strAnsi2UnicodeMoveProc(pchar(pointer(str)), from, temp, length(str));
strUnicode2AnsiMoveProc(PUnicodeChar(pointer(temp)), strConvert, toCP, length(temp));
end;
begin
if (from=toCP) or (from=CP_NONE) or (toCP=CP_NONE) or (str = '') then begin
result := str;
if (toCP <> CP_NONE) then SetCodePage(result, toCP, false);
exit;
end;
from := strActualEncoding(from);
toCPActual := strActualEncoding(toCP);
if (from=toCPActual) then begin
result := str;
SetCodePage(result, toCP, false);
exit;
end;
//quick conversion for 1-byte encodings: CP_UTF8 <-> CP_LATIN1, CP_WINDOWS1252
case from of
CP_ASCII:
case toCPActual of
CP_LATIN1, CP_WINDOWS1252, CP_UTF8: begin
result := str;
SetCodePage(result, toCP, false);
exit;
end;
end;
CP_UTF8:
case toCPActual of
CP_LATIN1, CP_WINDOWS1252: begin
convertUtf8ToWesternEurope(result);
exit;
end;
CP_ASCII: begin
convertUtf8ToWesternEurope(result);
strRemoveNonASCIIFromANSI(result);
SetCodePage(result, toCP, false);
exit;
end;
end;
CP_LATIN1, CP_WINDOWS1252:
case toCPActual of
CP_LATIN1, CP_WINDOWS1252: begin
convertWesternEurope(result);
exit;
end;
CP_ASCII: begin
result := str;
strRemoveNonASCIIFromANSI(result);
SetCodePage(result, toCP, false);
exit;
end;
CP_UTF8: begin
convertWesternEuropeToUtf8(result);
exit;
end;
end;
end;
//extended unicode conversion
case from of
CP_WINDOWS1252, CP_LATIN1, CP_UTF8, CP_UTF16, CP_UTF16BE, CP_UTF32, CP_UTF32BE, CP_ASCII: case toCPActual of
CP_WINDOWS1252, CP_LATIN1, CP_UTF8, CP_UTF16, CP_UTF16BE, CP_UTF32, CP_UTF32BE, CP_ASCII: begin
strConvert := '';
convertExtendedUnicode();
exit;
end;
end;
end;
{$ifndef FPC_HAS_CPSTRING}
raise EConvertError.Create('Unknown encoding conversion: from ' + IntToStr(from) + ' to ' + IntToStr(toCP));
{$endif}
result := str;
SetCodePage(result, from, false);
SetCodePage(result, toCP, true);
end;
function strChangeEncoding(const str: RawByteString; from, toe: TSystemCodePage): RawByteString;
begin
result := strConvert(str, from, toe);
end;
function strDecodeUTF16Character(var source: PUnicodeChar): integer;
begin
result := Ord(source^);
inc(source);
if result and $f800 = $d800 then begin
//this might return nonsense, if the string ends with an incomplete surrogate
//However, as the string should be 0-terminated, this should be safe
result := ((result and $03ff) shl 10) or (ord(source^) and $03ff);
inc(source);
inc(result, $10000);
end;
end;
{$IFDEF fpc}
var
oldUnicode2AnsiMoveProc : procedure(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
oldAnsi2UnicodeMoveProc : procedure(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
procedure myUnicode2AnsiMoveProc(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
begin
case strActualEncoding(cp) of
CP_UTF32, CP_UTF32BE,
CP_UTF16, CP_UTF16BE,
CP_UTF8,
CP_WINDOWS1252, CP_LATIN1: strUnicode2AnsiMoveProc(source, dest, cp, len);
else oldUnicode2AnsiMoveProc(source, dest, cp, len);
end;
end;
procedure myAnsi2UnicodeMoveProc(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
begin
case strActualEncoding(cp) of
CP_UTF32, CP_UTF32BE,
CP_UTF16, CP_UTF16BE,
CP_UTF8,
CP_WINDOWS1252, CP_LATIN1: strAnsi2UnicodeMoveProc(source, cp, dest, len);
else oldAnsi2UnicodeMoveProc(source, cp, dest, len);
end;
end;
procedure registerFallbackUnicodeConversion;
begin
{$ifdef FPC_HAS_CPSTRING}
oldUnicode2AnsiMoveProc := widestringmanager.Unicode2AnsiMoveProc;
oldAnsi2UnicodeMoveProc := widestringmanager.Ansi2UnicodeMoveProc;
widestringmanager.Unicode2AnsiMoveProc := @myUnicode2AnsiMoveProc;
widestringmanager.Ansi2UnicodeMoveProc := @myAnsi2UnicodeMoveProc;
{$endif}
end;
procedure strUnicode2AnsiMoveProc(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
var
destptr: PInteger;
byteptr: PAnsiChar;
last: Pointer;
builder: TStrBuilder;
cpactual: TSystemCodePage;
begin
if len = 0 then begin
dest := '';
exit;
end;
cpactual := strActualEncoding(cp);
case cpactual of
CP_UTF16, CP_UTF16BE: begin
SetLength(dest, 2*len);
move(source^, dest[1], 2 * len);
if cp <> CP_UTF16_NATIVE then strSwapEndianWord(dest);
end;
CP_UTF32, CP_UTF32BE: begin
SetLength(dest, 4*len);
last := source + len;
destptr := PInteger(@dest[1]);
len := 0;
while source < last do begin
destptr^ := strDecodeUTF16Character(source);
inc(destptr);
inc(len);
end;
if 4 * len <> length(dest) then SetLength(dest, 4*len);
if cp <> CP_UTF32_NATIVE then strSwapEndianDWord(dest);
end;
CP_WINDOWS1252, CP_LATIN1, CP_ASCII: begin
SetLength(dest, len);
last := source + len;
byteptr := @dest[1];
case cpactual of
CP_WINDOWS1252:
while source < last do begin
byteptr^ := charCodePointToCP1252(strDecodeUTF16Character(source));
inc(byteptr);
end;
else begin
while source < last do begin
byteptr^ := charCodePointToLatin1(strDecodeUTF16Character(source));
inc(byteptr);
end;
if cpactual = CP_ASCII then strRemoveNonASCIIFromANSI(dest);
end;
end;
len := byteptr - @dest[1];
if len <> length(dest) then SetLength(dest, len); //if there were surrogates, the string becomes smaller
end
else begin//default utf8
builder.init(@dest, len, CP_UTF8);
last := source + len;
while source < last do
builder.appendCodePointToUtf8String(strDecodeUTF16Character(source));
builder.final;
end
end;
SetCodePage(dest, cp, false);
end;
procedure strAnsi2UnicodeMoveProc(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
var
outlen: SizeInt;
procedure writeCodepoint(codepoint: integer);
begin
inc(outlen);
if outlen > length(dest) then SetLength(dest, 2*length(dest));
if codepoint <= $FFFF then dest[outlen] := WideChar(codepoint)
else begin
dec(codepoint, $10000);
dest[outlen] := WideChar((codepoint shr 10) or %1101100000000000);
inc(outlen);
if outlen > length(dest) then SetLength(dest, 2*length(dest));
dest[outlen] := WideChar((codepoint and %0000001111111111) or %1101110000000000);
end;
end;
var
i: SizeInt;
begin
dest := '';
if len = 0 then exit;
case strActualEncoding(cp) of
CP_UTF16, CP_UTF16BE: begin
len := len - len and 1;
if len = 0 then exit;
SetLength(dest, len div 2);
move(source^, dest[1], len);
if cp <> CP_UTF16_NATIVE then strSwapEndianWord(pointer(dest), length(dest));
end;
CP_UTF32, CP_UTF32BE: begin
len := len - len and 3;
if len = 0 then exit;
SetLength(dest, len div 4);
outlen := 0;
if cp = CP_UTF32_NATIVE then begin
for i := 1 to length(dest) do begin
writeCodepoint(PInteger(source)^);
inc(source, 4);
end;
end else begin
for i := 1 to length(dest) do begin
writeCodepoint(SwapEndian(PInteger(source)^));
inc(source, 4);
end;
end;
if outlen <> length(dest) then SetLength(dest, outlen);
end;
CP_UTF8: begin
SetLength(dest, len);
outlen := 0;
while len > 0 do
writeCodepoint(strDecodeUTF8Character(source, len));
if outlen <> length(dest) then SetLength(dest, outlen);
end;
CP_WINDOWS1252: begin
SetLength(dest, len);
for i := 1 to len do begin
if (ord(source^) < low(ENCODING_MAP_WINDOWS1252_TO_UNICODE)) or (ord(source^) > high(ENCODING_MAP_WINDOWS1252_TO_UNICODE)) then
dest[i] := widechar(byte(source^))
else
dest[i] := widechar(ENCODING_MAP_WINDOWS1252_TO_UNICODE[ord(source^)]);
inc(source);
end;
end
else begin
SetLength(dest, len);
for i := 0 to len - 1 do
dest[i+1] := widechar(byte(source[i]));
end;
end;
end;
{$endif}
function strGetUnicodeCharacterUTFLength(const character: integer): integer;
begin
case character of
$00 .. $7F: result:=1;
$80 .. $7FF: result:=2;
$800 .. $FFFF: result:=3;
$10000 ..$10FFFF: result:=4;
else result := 0;
end;
end;
procedure strGetUnicodeCharacterUTF(const character: integer; buffer: pansichar);
begin
//result:=UnicodeToUTF8(character);
case character of
$00 .. $7F: buffer[0]:=chr(character);
$80 .. $7FF: begin
buffer[0] := chr($C0 or (character shr 6));
buffer[1] := chr($80 or (character and $3F));
end;
$800 .. $FFFF: begin
buffer[0] := chr($E0 or (character shr 12));
buffer[1] := chr($80 or ((character shr 6) and $3F));
buffer[2] := chr($80 or (character and $3F));
end;
$10000 ..$10FFFF: begin
buffer[0] := chr($F0 or (character shr 18));
buffer[1] := chr($80 or ((character shr 12) and $3F));
buffer[2] := chr($80 or ((character shr 6) and $3F));
buffer[3] := chr($80 or (character and $3F));
end;
end;
end;
function strGetUnicodeCharacter(const character: integer; encoding: TSystemCodePage): RawByteString;
begin
result := '';
setlength(result, strGetUnicodeCharacterUTFLength(character));
strGetUnicodeCharacterUTF(character, @result[1]);
case encoding of
CP_NONE, CP_UTF8: SetCodePage(result, CP_UTF8, false);
else result:=strConvertFromUtf8(result, encoding);
end;
end;
function strDecodeUTF8Character(const str: RawByteString; var curpos: SizeInt): integer;
var
temp: PChar;
source: Pointer;
begin
temp := PChar(pointer(str)); //this is nil for '', but that is fine
source := temp + curpos - 1;
result := strDecodeUTF8Character(source, temp + length(str));
curpos := source - temp + 1;
end;
function strDecodeUTF8Character(var source: PChar; var remainingLength: SizeInt): integer;
var
temp: PChar;
begin
temp := source;
result := strDecodeUTF8Character(source, source + remainingLength);
dec(remainingLength, source - temp);
end;
function strDecodeUTF8Character(var currentpos: pchar; afterlast: PChar): integer;
const ERROR_INVALID_STARTING_BYTE = -1;
ERROR_TRUNCATED = -2;
ERROR_INVALID_CONTINUATION_BYTE = -3;
var p: pchar;
b: byte;
label ErrorTruncated, ErrorInvalidContinuationByte;
begin
p := currentpos;
if p >= afterlast then goto ErrorTruncated;
b := ord(p^);
inc(p);
case b of
$00..$7F: result := b;
$C2..$DF: begin
result:= (b and not $C0) shl 6;
if p >= afterlast then goto ErrorTruncated; //all source >= afterlast checks could be removed, if we assume the string is 0-terminated
b := ord(p^);
if (b and $C0) <> $80 then goto ErrorInvalidContinuationByte; //these gotos are always the same in every branch
result := result or (b and not $80);
inc(p);
end;
$E0..$EF: begin
result:=(b and not $E0) shl 12;
if p >= afterlast then goto ErrorTruncated;
b := ord(p^);
if (b and $C0) <> $80 then goto ErrorInvalidContinuationByte;
case result of
{E}$0: if (b < $A0) {or (b > #$BF)} then goto ErrorInvalidContinuationByte;
{E}$D shl 12: if {(b < #$80) or} (b > $9F) then goto ErrorInvalidContinuationByte;
end;
result := result or ((b and not $80) shl 6);
inc(p);
if p >= afterlast then goto ErrorTruncated;
b := ord(p^);
if (b and $C0) <> $80 then goto ErrorInvalidContinuationByte;
result := result or ((b and not $80));
inc(p);
end;
$F0..$F4: begin
result:=((b and not $F0) shl 18);
if p >= afterlast then goto ErrorTruncated;
b := ord(p^);
if (b and $C0) <> $80 then goto ErrorInvalidContinuationByte;
case result of
{E}$0: if (b < $90) {or (b > $BF)} then goto ErrorInvalidContinuationByte;
{E}$4 shl 18: if {(b < $80) or} (b > $8F) then goto ErrorInvalidContinuationByte;
end;
result := result or ((b and not $80) shl 12);
inc(p);
if p >= afterlast then goto ErrorTruncated;
b := ord(p^);
if (b and $C0) <> $80 then goto ErrorInvalidContinuationByte;
result := result or ((b and not $80) shl 6);
inc(p);
if p >= afterlast then goto ErrorTruncated;
b := ord(p^);
if (b and $C0) <> $80 then goto ErrorInvalidContinuationByte;
result := result or ((b and not $80));
inc(p);
end;
(*
$80..$BF //in multibyte character (should never appear)
$C0..$C1: //invalid (two bytes used for single byte)
$F5..$F7 //not allowed after rfc3629
$F8..$FB //"
$FC..$FD //"
$FE..$FF //invalid*)
else result := ERROR_INVALID_STARTING_BYTE;
end;
currentpos := p;
exit;
ErrorTruncated: result := ERROR_TRUNCATED; currentpos := p; exit;
ErrorInvalidContinuationByte: result := ERROR_INVALID_CONTINUATION_BYTE; currentpos := p; exit;
end;
{$IFDEF fpc}
function strEncodingFromName(str: string): TSystemCodePage;
begin
result := {$ifdef HAS_CPSTRING}CodePageNameToCodePage(str){$else}$FFFF{$endif};
if result = $FFFF then begin
str := LowerCase(str);
case str of
//missing in fpc3
'utf8': result := CP_UTF8;
'utf-32le': result := CP_UTF32;
'oem': result := CP_OEMCP;
//fpc 2 compatibility
{$ifndef FPC_HAS_CPSTRING}
'utf-8': result := CP_UTF8;
'latin1': result := CP_LATIN1;
'utf-16', 'utf-16le': result := CP_UTF16;
'utf-16be': result := CP_UTF16BE;
'utf-32be': result := CP_UTF32BE;
'utf-32': result := CP_UTF32;
{$endif}
else if strBeginsWith(str, 'cp') then result := StrToIntDef(strAfter(str, 'cp'), CP_NONE);
end;
end;
end;
{$ENDIF}
function strEncodingFromBOMRemove(var str: RawByteString): TSystemCodePage;
begin
if strbeginswith(str,#$ef#$bb#$bf) then begin
delete(str,1,3);
result:=CP_UTF8;
end else if strbeginswith(str,#$fe#$ff) then begin
delete(str,1,2);
result:=CP_UTF16BE;
end else if strbeginswith(str,#$ff#$fe) then begin
delete(str,1,2);
result:=CP_UTF16;
end else if strbeginswith(str,#00#00#$fe#$ff) then begin
delete(str,1,4);
result:=CP_UTF32BE;
end else if strbeginswith(str,#$ff#$fe#00#00) then begin
delete(str,1,4);
result:=CP_UTF32;
end else result := CP_NONE;
end;
{$ifdef HAS_CPSTRING}
function strEncodingFromBOMRemove(var str:string):TSystemCodePage;
begin
result := strEncodingFromBOMRemove(RawByteString(str));
end;
{$endif}
function strUpperCaseSpecialUTF8(codePoint: integer): string;
const block: array[0..465] of byte = ( $53, $53, $46, $46, $46, $49, $46, $4C, $46, $46, $49, $46, $46, $4C, $53, $54, $53, $54, $D4, $B5, $D5, $92, $D5, $84, $D5, $86, $D5, $84, $D4, $B5, $D5, $84, $D4, $BB, $D5, $8E, $D5, $86, $D5, $84, $D4, $BD, $CA, $BC, $4E, $CE, $99, $CC, $88, $CC, $81, $CE, $A5, $CC, $88, $CC, $81, $4A, $CC, $8C, $48, $CC, $B1, $54, $CC, $88,
$57, $CC, $8A, $59, $CC, $8A, $41, $CA, $BE, $CE, $A5, $CC, $93, $CE, $A5, $CC, $93, $CC, $80, $CE, $A5, $CC, $93, $CC, $81, $CE, $A5, $CC, $93, $CD, $82, $CE, $91, $CD, $82, $CE, $97, $CD, $82, $CE, $99, $CC, $88, $CC, $80, $CE, $99, $CC, $88, $CC, $81, $CE, $99, $CD, $82, $CE, $99, $CC, $88, $CD, $82, $CE, $A5, $CC, $88, $CC, $80, $CE, $A5, $CC, $88, $CC, $81, $CE,
$A1, $CC, $93, $CE, $A5, $CD, $82, $CE, $A5, $CC, $88, $CD, $82, $CE, $A9, $CD, $82, $E1, $BC, $88, $CE, $99, $E1, $BC, $89, $CE, $99, $E1, $BC, $8A, $CE, $99, $E1, $BC, $8B, $CE, $99, $E1, $BC, $8C, $CE, $99, $E1, $BC, $8D, $CE, $99, $E1, $BC, $8E, $CE, $99, $E1, $BC, $8F, $CE, $99, $E1, $BC, $88, $CE, $99, $E1, $BC, $89, $CE, $99, $E1, $BC, $8A, $CE, $99, $E1, $BC,
$8B, $CE, $99, $E1, $BC, $8C, $CE, $99, $E1, $BC, $8D, $CE, $99, $E1, $BC, $8E, $CE, $99, $E1, $BC, $8F, $CE, $99, $E1, $BC, $A8, $CE, $99, $E1, $BC, $A9, $CE, $99, $E1, $BC, $AA, $CE, $99, $E1, $BC, $AB, $CE, $99, $E1, $BC, $AC, $CE, $99, $E1, $BC, $AD, $CE, $99, $E1, $BC, $AE, $CE, $99, $E1, $BC, $AF, $CE, $99, $E1, $BC, $A8, $CE, $99, $E1, $BC, $A9, $CE, $99, $E1,
$BC, $AA, $CE, $99, $E1, $BC, $AB, $CE, $99, $E1, $BC, $AC, $CE, $99, $E1, $BC, $AD, $CE, $99, $E1, $BC, $AE, $CE, $99, $E1, $BC, $AF, $CE, $99, $E1, $BD, $A8, $CE, $99, $E1, $BD, $A9, $CE, $99, $E1, $BD, $AA, $CE, $99, $E1, $BD, $AB, $CE, $99, $E1, $BD, $AC, $CE, $99, $E1, $BD, $AD, $CE, $99, $E1, $BD, $AE, $CE, $99, $E1, $BD, $AF, $CE, $99, $E1, $BD, $A8, $CE, $99,
$E1, $BD, $A9, $CE, $99, $E1, $BD, $AA, $CE, $99, $E1, $BD, $AB, $CE, $99, $E1, $BD, $AC, $CE, $99, $E1, $BD, $AD, $CE, $99, $E1, $BD, $AE, $CE, $99, $E1, $BD, $AF, $CE, $99, $CE, $91, $CE, $99, $CE, $91, $CE, $99, $CE, $97, $CE, $99, $CE, $97, $CE, $99, $CE, $A9, $CE, $99, $CE, $A9, $CE, $99, $E1, $BE, $BA, $CE, $99, $CE, $86, $CE, $99, $E1, $BF, $8A, $CE, $99, $CE,
$89, $CE, $99, $E1, $BF, $BA, $CE, $99, $CE, $8F, $CE, $99, $CE, $91, $CD, $82, $CE, $99, $CE, $97, $CD, $82, $CE, $99, $CE, $A9, $CD, $82, $CE, $99);
var special: integer;
begin
special := 0;
case codePoint of
$00DF: special := $00000002; //ß 00DF; 00DF; 0053 0073; 0053 0053;
$FB00: special := $00020002; //ff FB00; FB00; 0046 0066; 0046 0046;
$FB01: special := $00040002; //fi FB01; FB01; 0046 0069; 0046 0049;
$FB02: special := $00060002; //fl FB02; FB02; 0046 006C; 0046 004C;
$FB03: special := $00080003; //ffi FB03; FB03; 0046 0066 0069; 0046 0046 0049;
$FB04: special := $000B0003; //ffl FB04; FB04; 0046 0066 006C; 0046 0046 004C;
$FB05: special := $000E0002; //ſt FB05; FB05; 0053 0074; 0053 0054;
$FB06: special := $00100002; //st FB06; FB06; 0053 0074; 0053 0054;
$0587: special := $00120004; //և 0587; 0587; 0535 0582; 0535 0552;
$FB13: special := $00160004; //ﬓ FB13; FB13; 0544 0576; 0544 0546;
$FB14: special := $001A0004; //ﬔ FB14; FB14; 0544 0565; 0544 0535;
$FB15: special := $001E0004; //ﬕ FB15; FB15; 0544 056B; 0544 053B;
$FB16: special := $00220004; //ﬖ FB16; FB16; 054E 0576; 054E 0546;
$FB17: special := $00260004; //ﬗ FB17; FB17; 0544 056D; 0544 053D;
$0149: special := $002A0003; //ʼn 0149; 0149; 02BC 004E; 02BC 004E;
$0390: special := $002D0006; //ΐ 0390; 0390; 0399 0308 0301; 0399 0308 0301;
$03B0: special := $00330006; //ΰ 03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301;
$01F0: special := $00390003; //ǰ 01F0; 01F0; 004A 030C; 004A 030C;
$1E96: special := $003C0003; //ẖ 1E96; 1E96; 0048 0331; 0048 0331;
$1E97: special := $003F0003; //ẗ 1E97; 1E97; 0054 0308; 0054 0308;
$1E98: special := $00420003; //ẘ 1E98; 1E98; 0057 030A; 0057 030A;
$1E99: special := $00450003; //ẙ 1E99; 1E99; 0059 030A; 0059 030A;
$1E9A: special := $00480003; //ẚ 1E9A; 1E9A; 0041 02BE; 0041 02BE;
$1F50: special := $004B0004; //ὐ 1F50; 1F50; 03A5 0313; 03A5 0313;
$1F52: special := $004F0006; //ὒ 1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300;
$1F54: special := $00550006; //ὔ 1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301;
$1F56: special := $005B0006; //ὖ 1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342;
$1FB6: special := $00610004; //ᾶ 1FB6; 1FB6; 0391 0342; 0391 0342;
$1FC6: special := $00650004; //ῆ 1FC6; 1FC6; 0397 0342; 0397 0342;
$1FD2: special := $00690006; //ῒ 1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300;
$1FD3: special := $006F0006; //ΐ 1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301;
$1FD6: special := $00750004; //ῖ 1FD6; 1FD6; 0399 0342; 0399 0342;
$1FD7: special := $00790006; //ῗ 1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342;
$1FE2: special := $007F0006; //ῢ 1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300;
$1FE3: special := $00850006; //ΰ 1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301;
$1FE4: special := $008B0004; //ῤ 1FE4; 1FE4; 03A1 0313; 03A1 0313;
$1FE6: special := $008F0004; //ῦ 1FE6; 1FE6; 03A5 0342; 03A5 0342;
$1FE7: special := $00930006; //ῧ 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342;
$1FF6: special := $00990004; //ῶ 1FF6; 1FF6; 03A9 0342; 03A9 0342;
$1F80: special := $009D0005; //ᾀ 1F80; 1F80; 1F88; 1F08 0399;
$1F81: special := $00A20005; //ᾁ 1F81; 1F81; 1F89; 1F09 0399;
$1F82: special := $00A70005; //ᾂ 1F82; 1F82; 1F8A; 1F0A 0399;
$1F83: special := $00AC0005; //ᾃ 1F83; 1F83; 1F8B; 1F0B 0399;
$1F84: special := $00B10005; //ᾄ 1F84; 1F84; 1F8C; 1F0C 0399;
$1F85: special := $00B60005; //ᾅ 1F85; 1F85; 1F8D; 1F0D 0399;
$1F86: special := $00BB0005; //ᾆ 1F86; 1F86; 1F8E; 1F0E 0399;
$1F87: special := $00C00005; //ᾇ 1F87; 1F87; 1F8F; 1F0F 0399;
$1F88: special := $00C50005; //ᾈ 1F88; 1F80; 1F88; 1F08 0399;
$1F89: special := $00CA0005; //ᾉ 1F89; 1F81; 1F89; 1F09 0399;
$1F8A: special := $00CF0005; //ᾊ 1F8A; 1F82; 1F8A; 1F0A 0399;
$1F8B: special := $00D40005; //ᾋ 1F8B; 1F83; 1F8B; 1F0B 0399;
$1F8C: special := $00D90005; //ᾌ 1F8C; 1F84; 1F8C; 1F0C 0399;
$1F8D: special := $00DE0005; //ᾍ 1F8D; 1F85; 1F8D; 1F0D 0399;
$1F8E: special := $00E30005; //ᾎ 1F8E; 1F86; 1F8E; 1F0E 0399;
$1F8F: special := $00E80005; //ᾏ 1F8F; 1F87; 1F8F; 1F0F 0399;
$1F90: special := $00ED0005; //ᾐ 1F90; 1F90; 1F98; 1F28 0399;
$1F91: special := $00F20005; //ᾑ 1F91; 1F91; 1F99; 1F29 0399;
$1F92: special := $00F70005; //ᾒ 1F92; 1F92; 1F9A; 1F2A 0399;
$1F93: special := $00FC0005; //ᾓ 1F93; 1F93; 1F9B; 1F2B 0399;
$1F94: special := $01010005; //ᾔ 1F94; 1F94; 1F9C; 1F2C 0399;
$1F95: special := $01060005; //ᾕ 1F95; 1F95; 1F9D; 1F2D 0399;
$1F96: special := $010B0005; //ᾖ 1F96; 1F96; 1F9E; 1F2E 0399;
$1F97: special := $01100005; //ᾗ 1F97; 1F97; 1F9F; 1F2F 0399;
$1F98: special := $01150005; //ᾘ 1F98; 1F90; 1F98; 1F28 0399;
$1F99: special := $011A0005; //ᾙ 1F99; 1F91; 1F99; 1F29 0399;
$1F9A: special := $011F0005; //ᾚ 1F9A; 1F92; 1F9A; 1F2A 0399;
$1F9B: special := $01240005; //ᾛ 1F9B; 1F93; 1F9B; 1F2B 0399;
$1F9C: special := $01290005; //ᾜ 1F9C; 1F94; 1F9C; 1F2C 0399;
$1F9D: special := $012E0005; //ᾝ 1F9D; 1F95; 1F9D; 1F2D 0399;
$1F9E: special := $01330005; //ᾞ 1F9E; 1F96; 1F9E; 1F2E 0399;
$1F9F: special := $01380005; //ᾟ 1F9F; 1F97; 1F9F; 1F2F 0399;
$1FA0: special := $013D0005; //ᾠ 1FA0; 1FA0; 1FA8; 1F68 0399;
$1FA1: special := $01420005; //ᾡ 1FA1; 1FA1; 1FA9; 1F69 0399;
$1FA2: special := $01470005; //ᾢ 1FA2; 1FA2; 1FAA; 1F6A 0399;
$1FA3: special := $014C0005; //ᾣ 1FA3; 1FA3; 1FAB; 1F6B 0399;
$1FA4: special := $01510005; //ᾤ 1FA4; 1FA4; 1FAC; 1F6C 0399;
$1FA5: special := $01560005; //ᾥ 1FA5; 1FA5; 1FAD; 1F6D 0399;
$1FA6: special := $015B0005; //ᾦ 1FA6; 1FA6; 1FAE; 1F6E 0399;
$1FA7: special := $01600005; //ᾧ 1FA7; 1FA7; 1FAF; 1F6F 0399;
$1FA8: special := $01650005; //ᾨ 1FA8; 1FA0; 1FA8; 1F68 0399;
$1FA9: special := $016A0005; //ᾩ 1FA9; 1FA1; 1FA9; 1F69 0399;
$1FAA: special := $016F0005; //ᾪ 1FAA; 1FA2; 1FAA; 1F6A 0399;
$1FAB: special := $01740005; //ᾫ 1FAB; 1FA3; 1FAB; 1F6B 0399;
$1FAC: special := $01790005; //ᾬ 1FAC; 1FA4; 1FAC; 1F6C 0399;
$1FAD: special := $017E0005; //ᾭ 1FAD; 1FA5; 1FAD; 1F6D 0399;
$1FAE: special := $01830005; //ᾮ 1FAE; 1FA6; 1FAE; 1F6E 0399;
$1FAF: special := $01880005; //ᾯ 1FAF; 1FA7; 1FAF; 1F6F 0399;
$1FB3: special := $018D0004; //ᾳ 1FB3; 1FB3; 1FBC; 0391 0399;
$1FBC: special := $01910004; //ᾼ 1FBC; 1FB3; 1FBC; 0391 0399;
$1FC3: special := $01950004; //ῃ 1FC3; 1FC3; 1FCC; 0397 0399;
$1FCC: special := $01990004; //ῌ 1FCC; 1FC3; 1FCC; 0397 0399;
$1FF3: special := $019D0004; //ῳ 1FF3; 1FF3; 1FFC; 03A9 0399;
$1FFC: special := $01A10004; //ῼ 1FFC; 1FF3; 1FFC; 03A9 0399;
$1FB2: special := $01A50005; //ᾲ 1FB2; 1FB2; 1FBA 0345; 1FBA 0399;
$1FB4: special := $01AA0004; //ᾴ 1FB4; 1FB4; 0386 0345; 0386 0399;
$1FC2: special := $01AE0005; //ῂ 1FC2; 1FC2; 1FCA 0345; 1FCA 0399;
$1FC4: special := $01B30004; //ῄ 1FC4; 1FC4; 0389 0345; 0389 0399;
$1FF2: special := $01B70005; //ῲ 1FF2; 1FF2; 1FFA 0345; 1FFA 0399;
$1FF4: special := $01BC0004; //ῴ 1FF4; 1FF4; 038F 0345; 038F 0399;
$1FB7: special := $01C00006; //ᾷ 1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399;
$1FC7: special := $01C60006; //ῇ 1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399;
$1FF7: special := $01CC0006; //ῷ 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399;
end;
if special <> 0 then begin result := ''; setlength(result, special and $FFFF); move(block[special shr 16], result[1], length(result)); end
else result := strGetUnicodeCharacter(CodePoint);
end;
function strLowerCaseSpecialUTF8(codePoint: integer): string;
const block: array[0..83] of byte = ( $69, $CC, $87, $E1, $BE, $80, $E1, $BE, $81, $E1, $BE, $82, $E1, $BE, $83, $E1, $BE, $84, $E1, $BE, $85, $E1, $BE, $86, $E1, $BE, $87, $E1, $BE, $90, $E1, $BE, $91, $E1, $BE, $92, $E1, $BE, $93, $E1, $BE, $94, $E1, $BE, $95, $E1, $BE, $96, $E1, $BE, $97, $E1, $BE, $A0, $E1, $BE, $A1, $E1, $BE, $A2, $E1, $BE, $A3, $E1, $BE, $A4, $E1, $BE, $A5, $E1, $BE, $A6, $E1, $BE, $A7, $E1, $BE, $B3, $E1, $BF, $83, $E1, $BF, $B3);
var special: integer;
begin
special := 0;
case codePoint of
$0130: special := $00000003; //İ 0130; 0069 0307; 0130; 0130;
$1F88: special := $00030003; //ᾈ 1F88; 1F80; 1F88; 1F08 0399;
$1F89: special := $00060003; //ᾉ 1F89; 1F81; 1F89; 1F09 0399;
$1F8A: special := $00090003; //ᾊ 1F8A; 1F82; 1F8A; 1F0A 0399;
$1F8B: special := $000C0003; //ᾋ 1F8B; 1F83; 1F8B; 1F0B 0399;
$1F8C: special := $000F0003; //ᾌ 1F8C; 1F84; 1F8C; 1F0C 0399;
$1F8D: special := $00120003; //ᾍ 1F8D; 1F85; 1F8D; 1F0D 0399;
$1F8E: special := $00150003; //ᾎ 1F8E; 1F86; 1F8E; 1F0E 0399;
$1F8F: special := $00180003; //ᾏ 1F8F; 1F87; 1F8F; 1F0F 0399;
$1F98: special := $001B0003; //ᾘ 1F98; 1F90; 1F98; 1F28 0399;
$1F99: special := $001E0003; //ᾙ 1F99; 1F91; 1F99; 1F29 0399;
$1F9A: special := $00210003; //ᾚ 1F9A; 1F92; 1F9A; 1F2A 0399;
$1F9B: special := $00240003; //ᾛ 1F9B; 1F93; 1F9B; 1F2B 0399;
$1F9C: special := $00270003; //ᾜ 1F9C; 1F94; 1F9C; 1F2C 0399;
$1F9D: special := $002A0003; //ᾝ 1F9D; 1F95; 1F9D; 1F2D 0399;
$1F9E: special := $002D0003; //ᾞ 1F9E; 1F96; 1F9E; 1F2E 0399;
$1F9F: special := $00300003; //ᾟ 1F9F; 1F97; 1F9F; 1F2F 0399;
$1FA8: special := $00330003; //ᾨ 1FA8; 1FA0; 1FA8; 1F68 0399;
$1FA9: special := $00360003; //ᾩ 1FA9; 1FA1; 1FA9; 1F69 0399;
$1FAA: special := $00390003; //ᾪ 1FAA; 1FA2; 1FAA; 1F6A 0399;
$1FAB: special := $003C0003; //ᾫ 1FAB; 1FA3; 1FAB; 1F6B 0399;
$1FAC: special := $003F0003; //ᾬ 1FAC; 1FA4; 1FAC; 1F6C 0399;
$1FAD: special := $00420003; //ᾭ 1FAD; 1FA5; 1FAD; 1F6D 0399;
$1FAE: special := $00450003; //ᾮ 1FAE; 1FA6; 1FAE; 1F6E 0399;
$1FAF: special := $00480003; //ᾯ 1FAF; 1FA7; 1FAF; 1F6F 0399;
$1FBC: special := $004B0003; //ᾼ 1FBC; 1FB3; 1FBC; 0391 0399;
$1FCC: special := $004E0003; //ῌ 1FCC; 1FC3; 1FCC; 0397 0399;
$1FFC: special := $00510003; //ῼ 1FFC; 1FF3; 1FFC; 03A9 0399;
end;
if special <> 0 then begin result := ''; setlength(result, special and $FFFF); move(block[special shr 16], result[1], length(result)); end
else result := strGetUnicodeCharacter(CodePoint);
end;
function strEscape(s: string; const toEscape: TCharSet; escapeChar: ansichar): string;
var
i: SizeInt;
sb: TStrBuilder;
begin
result := '';
if length(s) = 0 then exit;
sb.init(@result, length(s));
for i:=1 to length(s) do begin
if s[i] in toEscape then sb.append(escapeChar);
sb.append(s[i]);
end;
sb.final;
end;
function strEscapeToHex(s:string; const toEscape: TCharSet; escape: string): string;
var
p, i: SizeInt;
escapeCount: SizeInt;
escapeP: pansichar;
begin
result := s;
escapeCount := strCount(s, toEscape);
if escapeCount = 0 then exit
else if length(s) = 0 then exit;
if length(escape) > 0 then escapeP := @escape[1]
else escapeP := @s[1]; //value is not used, but
SetLength(result, length(s) + escapeCount * ( 2 + length(escape) - 1 ));
p := 1;
for i := 1 to length(s) do
if not (s[i] in toEscape) then begin
result[p] := s[i];
inc(p);
end else begin
move(escapeP^, result[p], length(escape));
inc(p, length(escape));
result[p] := charEncodeHexDigitUp(ord(s[i]) shr 4);
result[p+1] := charEncodeHexDigitUp(ord(s[i]) and $F);
inc(p, 2);
end;
Assert(p = length(result) + 1);
//setlength(result, p-1);
end;
function strUnescapeHex(s: string; escape: string): string;
var
f, t: SizeInt;
start: SizeInt;
last: SizeInt;
pescape: PChar;
begin
if escape = '' then begin
result := {%H-}strDecodeHex(s);
exit;
end;
start := pos(escape, s);
if start <= 0 then begin
result := s;
exit;
end;
SetLength(result, length(s));
move(s[1], result[1], start-1);
f := start;
t := start;
last := length(s) - length(escape) + 1 - 2;
pescape := pchar(escape);
while f <= last do begin
if strlsequal(@s[f], pescape, length(escape)) then begin
inc(f, length(escape));
result[t] := chr(charDecodeHexDigit(s[f]) shl 4 or charDecodeHexDigit(s[f+1]));
inc(f, 2);
inc(t, 1);
end else begin
result[t] := s[f];
inc(f, 1);
inc(t, 1);
end;
end;
if (f > last) and (f <= length(s)) then begin
move(s[f], result[t], length(s) - f + 1);
inc(t, length(s) - f + 1);
end;
SetLength(result, t-1);
end;
function strEscapeRegex(const s: string): string;
begin
result := strEscape(s, ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\'], '\');
end;
function strDecodeHTMLEntities(s: string; encoding: TSystemCodePage; flags: TDecodeHTMLEntitiesFlags = []): RawByteString;
begin
result:=strDecodeHTMLEntities(pansichar(s), length(s), encoding, flags);
end;
function strDecodeHex(s: string): string;
var
i: SizeInt;
begin
assert(length(s) and 1 = 0);
result := '';
setlength(result, length(s) div 2);
for i:=1 to length(result) do
result[i] := chr((charDecodeHexDigit(s[2*i-1]) shl 4) or charDecodeHexDigit(s[2*i]));
end;
function strEncodeHex(s: string; const code: string): string;
var
o: Integer;
pcode: pansichar;
i: SizeInt;
begin
assert(length(code) = 16);
pcode := @code[1];
result := '';
setlength(result, length(s) * 2);
for i:=1 to length(s) do begin
o := ord(s[i]);
result[2*i - 1] := pcode[o shr 4];
result[2*i ] := pcode[o and $F];
end;
end;
function strFromPchar(p: pansichar; l: SizeInt): string;
begin
if l=0 then begin result := ''; exit; end;
result := '';
setlength(result,l);
move(p^,result[1],l);
end;
{function strFromPchar(p: pansichar; l: SizeInt; encoding: TSystemCodePage): RawByteString;
begin
if l=0 then begin result := ''; exit; end;
result := '';
setlength(result,l);
move(p^,result[1],l);
SetCodePage(result, encoding, false);
end;}
function strBeforeLast(const s: string; const sep: TCharSet): string;
var i: SizeInt;
begin
i := strLastIndexOf(s, sep);
if i = 0 then result := ''
else result := copy(s, 1, i-1);
end;
function strAfterLast(const s: string; const sep: TCharSet): string;
var
i: SizeInt;
begin
i := strLastIndexOf(s, sep);
if i = 0 then result := ''
else result := strcopyfrom(s, i + 1);
end;
function strJoin(const sl: TStrings; const sep: string = ', '; limit: Integer=0; const limitStr: string='...'): string; overload;
var i:Integer;
begin
Result:='';
if sl.Count=0 then exit;
result:=sl[0];
if (limit = 0) or (sl.count <= abs(limit)) then begin
for i:=1 to sl.Count-1 do
result := result + sep+sl[i];
end else if limit > 0 then begin
for i:=1 to limit-1 do
result := result + sep+sl[i];
result := result + limitStr;
end else begin
for i:=1 to (-limit-1) div 2 do
result := result + sep+sl[i];
result := result + sep+limitStr;
for i:=sl.Count - (-limit) div 2 to sl.Count-1 do
result := result + sep+sl[i];
end;
end;
{function strJoin(const sl: TStringArray; const sep: string = ', '; limit: SizeInt = 0;
const limitStr: string = '...'): string; overload;
begin
if length(sl) = 0 then exit('');
result := strJoin(@sl[0], length(sl), sep, limit, limitStr);
end;}
function strJoin(const sl: TStringArray; const sep: string = ', '; limit: SizeInt=0; const limitStr: string='...'): string;overload;//{$ifdef HASINLINE} inline; {$endif}
var i:SizeInt;
begin
Result:='';
if length(sl)=0 then exit;
result:=sl[0];
if (limit = 0) or (length(sl) <= abs(limit)) then begin
for i:=1 to high(sl) do
result := result + sep+sl[i];
end else if limit > 0 then begin
for i:=1 to limit-1 do
result := result + sep+sl[i];
result := result + limitStr;
end else begin
for i:=1 to (-limit-1) div 2 do
result := result + sep+sl[i];
result := result + sep+limitStr;
for i:=length(sl) - (-limit) div 2 to high(sl) do
result := result + sep+sl[i];
end;
end;
function strJoin(strings: PString; stringsLength: SizeInt; const sep: string = ', '): string; overload;
var sb: TStrBuilder;
reslen: SizeInt;
i: SizeInt;
begin
case stringsLength of
0: exit('');
1: exit(strings^);
end;
reslen := length(sep) * (stringsLength - 1);
for i := 0 to stringsLength - 1 do reslen := reslen + length(strings[i]);
sb.init(@result, reslen);
sb.append(strings^);
for i := 1 to stringsLength - 1 do begin
sb.append(sep);
inc(strings);
sb.append(strings^);
end;
sb.final;
end;
function StrToBoolDef(const S: string;const Def:Boolean): Boolean;
Var
foundDot, foundExp: boolean;
i: Integer;
begin
if s = '' then
result := def //good idea? probably for StrToBoolDef(@attribute, def) and if @attribute is missing (=> '') it should def
else if striequal(S, 'TRUE') then
result:=true
else if striequal(S, 'FALSE') then
result:=false
else begin
i := 1;
if s[i] in ['-', '+'] then inc(i);
foundDot := false; foundExp := false;
result := def;
while i <= length(s) do begin
case s[i] of
'.': if foundDot then exit else foundDot := true;
'e', 'E': if foundExp then exit else begin
foundExp := true;
if i < length(s) then if s[i+1] in ['+', '-'] then inc(i);
end;
'0': ;
'1'..'9': if not foundExp then begin result := true; exit; end;
else exit;
end;
inc(i);
end;
result := false;
end;
end;
function strRemoveFileURLPrefix(const filename: string): string;
begin
result := filename;
if not stribeginswith(result, 'file://') then
begin result := result; exit; end;
delete(result, 1, 7);
if (length(result) >= 4) and (result[1] = '/') and (result[3] = ':') and (result[4] = '\') then
delete(result, 1, 1); //Windows like file:///C:\abc\def url
end;
function strLoadFromFile(filename: string): string;
var f:TFileStream;
begin
f:=TFileStream.Create(strRemoveFileURLPrefix(filename),fmOpenRead);
result := '';
SetLength(result,f.Size);
if f.size>0 then
f.Read(Result[1],length(result));
f.Free;
end;
procedure strSaveToFileCallback(stream: TStream; data: pointer);
begin
stream.Write(Pstring(data)^[1], length(Pstring(data)^));
end;
procedure strSaveToFile(filename: string;str:string);
var f:TFileStream;
begin
filename := strRemoveFileURLPrefix(filename);
if length(str) = 0 then begin
f:=TFileStream.Create(filename,fmCreate);
f.free;
end else
fileSaveSafe(filename, @strSaveToFileCallback, @str);
end;
{$IFNDEF FPC}
var codePage: integer = -1;
function UTF8ToAnsi(const s: UTF8String): String;
var temp: RawByteString;
tempws: WideString;
begin
if s = '' then exit;
if codePage = -1 then codePage := getACP;
if codePage = CP_UTF8 then result := s
else if (codePage = {CP_LATIN1} 28591) or (codePage = 1252) then result := strConvertFromUtf8(s, eWindows1252)
else begin
temp := strConvertFromUtf8(s, {$IFDEF ENDIAN_BIG}CP_UTF16BE{$ELSE}CP_UTF16{$ENDIF});
setlength(tempws, (length(temp) + 1) div 2);
move(s[1], tempws[1], length(temp));
result := AnsiString(tempws); //todo
end;
end;
{$ENDIF}
function utf8toSys(const filename: UTF8String): string;
begin
result := filename;
{$IFnDEF FPC_HAS_CPSTRING}{$ifdef windows}
result := Utf8ToAnsi(result);
{$endif}{$endif}
end;
function strLoadFromFileUTF8(filename: RawByteString): string;
begin
result:=strLoadFromFile(utf8toSys(filename));
end;
procedure strSaveToFileUTF8(filename: RawByteString; str: String);
begin
strSaveToFile(utf8toSys(filename),str);
end;
function strFromSize(size: int64): string;
const iec: string='KMGTPEZY';
var res: int64;
i:longint;
begin
i:=0;
res := 0;
while (i<=length(iec)) and (size>=2048) do begin
res:=size mod 1024;
size:=size div 1024;
inc(i);
end;
if i=0 then result:=IntToStr(size)+' B'
else result:=format('%4f ',[size+res/1024])+iec[i]+'iB';
end;
function strFromPtr(p: pointer): string;
begin
result:=IntToHex(PtrToUInt(p), 2*sizeof(Pointer));
end;
function strFromInt(i: int64; displayLength: longint): string;
begin
if i < 0 then begin result := '-'+strFromInt(-i, displayLength); exit; end;
result := IntToStr(i);
if length(result) < (displayLength) then
result := strDup('0', (displayLength) - length(Result)) + result;
end;
//case-sensitive, intelligent string compare (splits in text, number parts)
function strCompareClever(const s1, s2: string): longint;
var t1,t2:string; //lowercase text
i,j,ib,jb,p: SizeInt;
iz, jz: SizeInt;
begin
result:=0;
t1 := s1;
t2 := s2;
i:=1;
j:=1;
while (i<=length(t1)) and (j<=length(t2)) do begin
if (t1[i] in ['0'..'9']) and (t2[j] in ['0'..'9']) then begin
iz := i;
jz := j;
while (i<=length(t1)) and (t1[i] = '0') do inc(i);
while (j<=length(t2)) and (t2[j] = '0') do inc(j);
ib:=i;
jb:=j;
while (i<=length(t1)) and (t1[i] in ['0'..'9']) do inc(i);
while (j<=length(t2)) and (t2[j] in ['0'..'9']) do inc(j);
if i-ib<>j-jb then begin
result:=sign(i-ib - (j-jb)); //find longer number
exit;
end;
for p:=0 to i-ib-1 do //numerical == lexical
if t1[ib+p]<>t2[jb+p] then begin
result:=sign(ord(t1[ib+p]) - ord(t2[jb+p]));
exit;
end;
if result = 0 then result := sign ( i - iz - (j - jz) );
end else begin
if t1[i]<>t2[j] then begin
result:=sign(ord(t1[i]) - ord(t2[j]));
exit;
end;
inc(i);
inc(j);
end;
end;
if result = 0 then
result:=sign(length(t1) - length(t2));
end;
function striCompareClever(const s1, s2: string): longint;
begin
result := strCompareClever(lowercase(s1), lowercase(s2)); //todo optimize
end;
function strDup(rep: string; const count: SizeInt): string;
var
i: SizeInt;
builder: TStrBuilder;
begin
builder.init(@result, length(rep) * count);
for i:=1 to count do
builder.append(rep);
builder.final;
end;
function strIsAbsoluteURI(const s: string): boolean;
var
p: SizeInt;
i: SizeInt;
begin
result := false;
if s = '' then exit;
if not (s[1] in ['A'..'Z','a'..'z']) then exit;
p := pos(':', s);
if (p = 0) or (p + 2 > length(s)) then exit;
for i:=2 to p-1 do
if not (s[i] in ['A'..'Z','a'..'z','0'..'9','+','-','.']) then exit;
if (s[p+1] <> '/') or (s[p+2] <> '/') then exit;
result := true;
end;
function strResolveURIReal(rel, base: string): string; //base must be an absolute uri
function isWindowsFileUrl(): boolean;
begin
result := stribeginswith(base, 'file:///') and (length(base) >= 11) and (base[10] = ':') and (base[11] in ['/', '\']);
end;
var
schemeLength: SizeInt;
p: SizeInt;
relsplit, basesplit: TStringArray;
i: SizeInt;
relparams: string;
begin
p := pos('#', base);
if p > 0 then delete(base, p, length(base) - p + 1);
if (rel <> '') and not strbeginswith(rel, '#') then begin //keeping base query, if rel is empty except for fragment
p := pos('?', base);
if p > 0 then delete(base, p, length(base) - p + 1);
end;
schemeLength := pos(':', base); inc(schemeLength);
if (schemeLength <= length(base)) and (base[schemeLength] = '/') then inc(schemeLength);
if (schemeLength <= length(base)) and (base[schemeLength] = '/') then inc(schemeLength);
if strBeginsWith(rel, '/') then begin
if isWindowsFileUrl() then //Windows file:///c:/ special case
schemeLength := schemeLength + 3;
p := strIndexOf(base, '/', schemeLength);
delete(base, p, length(base) - p + 1);
begin result := base+rel; exit; end;
end;
p := pos('#', rel);
if p > 0 then begin relparams:=strCopyFrom(rel, p); delete(rel, p, length(rel) - p + 1);end else relparams := '';
p := pos('?', rel);
if p > 0 then begin relparams:=strCopyFrom(rel, p) + relparams; delete(rel, p, length(rel) - p + 1);end;
if rel = '' then begin result := base + relparams; exit; end;
relsplit:=strSplit(rel, '/');
basesplit:=strSplit(strCopyFrom(base,schemeLength),'/');
basesplit[0] := copy(base,1,schemeLength-1) + basesplit[0];
if isWindowsFileUrl() then begin basesplit[0] := basesplit[0] + '/' + basesplit[1]; arrayDelete(basesplit, 1); end;
for i:=high(relsplit) downto 0 do if relsplit[i] = '.' then arrayDelete(relsplit, i);
if (length(basesplit) &