Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ python3 scripts/run_toml_test_suite.py --harness=./build/GocciaTOMLCheck --outpu

Unlike the YAML script, this harness compares both parse/fail behavior and the official tagged JSON fixtures for valid cases. It uses a Pascal decoder built around `TGocciaTOMLParser.ParseDocument(...)` so TOML scalar kinds like `integer`, `float`, `datetime`, `datetime-local`, `date-local`, and `time-local` remain visible during compliance checks even though the normal runtime surface still maps date/time values to strings.

The harness reads TOML suite files as raw UTF-8 bytes and preserves that UTF-8 code page when passing text into the Pascal parser. That extra step matters on Windows, where an implicit conversion through the local ANSI code page would otherwise corrupt Unicode keys and string values and produce false TOML compliance failures.

The TOML runner exits non-zero when any case fails or times out, so it is safe to use directly in CI. When `--harness` is omitted it compiles `scripts/GocciaTOMLCheck.dpr` automatically; CI uses a prebuilt harness from the matrix build artifacts instead.

### Run Pascal Unit Tests
Expand Down
19 changes: 13 additions & 6 deletions scripts/GocciaTOMLCheck.dpr
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,22 @@ uses
Goccia.TOML,
Goccia.Values.Primitives;

function LoadUTF8File(const APath: string): UTF8String;
function LoadUTF8File(const APath: string): string;
const
UTF8_CODE_PAGE = 65001;
var
Stream: TFileStream;
UTF8Text: RawByteString;
begin
Stream := TFileStream.Create(APath, fmOpenRead or fmShareDenyWrite);
try
SetLength(Result, Stream.Size);
if Length(Result) > 0 then
Stream.ReadBuffer(Pointer(Result)^, Length(Result));
SetLength(UTF8Text, Stream.Size);
if Length(UTF8Text) > 0 then
begin
Stream.ReadBuffer(Pointer(UTF8Text)^, Length(UTF8Text));
SetCodePage(UTF8Text, UTF8_CODE_PAGE, False);
end;
Result := UTF8Text;
finally
Stream.Free;
end;
Expand Down Expand Up @@ -134,7 +141,7 @@ var
ExitCode: Integer;
Parser: TGocciaTOMLParser;
Root: TGocciaTOMLNode;
SourceText: UTF8String;
SourceText: string;
begin
if ParamCount <> 1 then
Halt(2);
Expand All @@ -146,7 +153,7 @@ begin
try
try
SourceText := LoadUTF8File(ParamStr(1));
Root := Parser.ParseDocument(string(SourceText));
Root := Parser.ParseDocument(SourceText);
try
WriteLn(SerializeNode(Root));
ExitCode := 0;
Expand Down
43 changes: 43 additions & 0 deletions units/Goccia.TOML.Test.pas
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ TTOMLParserTests = class(TTestSuite)
function GetChildOrFail(const AParent: TGocciaTOMLNode;
const AKey: string): TGocciaTOMLNode;
procedure TestParseDocumentNormalizesCRLFInMultilineStrings;
procedure TestParseDocumentPreservesUTF8BytesForUnicodeKeysAndValues;
procedure TestParseDocumentTracksScalarKinds;
procedure TestParseDocumentTracksArrayElementKinds;
public
Expand All @@ -30,6 +31,8 @@ procedure TTOMLParserTests.SetupTests;
TestParseDocumentTracksArrayElementKinds);
Test('ParseDocument normalizes CRLF in multiline strings',
TestParseDocumentNormalizesCRLFInMultilineStrings);
Test('ParseDocument preserves UTF-8 bytes for Unicode keys and values',
TestParseDocumentPreservesUTF8BytesForUnicodeKeysAndValues);
end;

function TTOMLParserTests.GetChildOrFail(const AParent: TGocciaTOMLNode;
Expand Down Expand Up @@ -150,6 +153,46 @@ procedure TTOMLParserTests.TestParseDocumentNormalizesCRLFInMultilineStrings;
end;
end;

procedure TTOMLParserTests.TestParseDocumentPreservesUTF8BytesForUnicodeKeysAndValues;
const
UTF8_CODE_PAGE = 65001;
var
Parser: TGocciaTOMLParser;
RawSourceText: RawByteString;
RawUnicodeKey: RawByteString;
RawUnicodeValue: RawByteString;
Root: TGocciaTOMLNode;
SourceText: string;
UnicodeKey: string;
UnicodeNode: TGocciaTOMLNode;
UnicodeValue: string;
begin
RawUnicodeKey := #$CE#$B4;
SetCodePage(RawUnicodeKey, UTF8_CODE_PAGE, False);
UnicodeKey := RawUnicodeKey;

RawUnicodeValue := 'Jos' + #$C3#$A9;
SetCodePage(RawUnicodeValue, UTF8_CODE_PAGE, False);
UnicodeValue := RawUnicodeValue;

RawSourceText := '"' + RawUnicodeKey + '" = "' + RawUnicodeValue + '"' + LineEnding;
SetCodePage(RawSourceText, UTF8_CODE_PAGE, False);
SourceText := RawSourceText;

Parser := TGocciaTOMLParser.Create;
try
Root := Parser.ParseDocument(SourceText);
try
UnicodeNode := GetChildOrFail(Root, UnicodeKey);
Expect<string>(UnicodeNode.CanonicalValue).ToBe(UnicodeValue);
finally
Root.Free;
end;
finally
Parser.Free;
end;
end;

begin
TestRunnerProgram.AddSuite(TTOMLParserTests.Create('TOML Parser'));
TestRunnerProgram.Run;
Expand Down
Loading