Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WI #2632 Use correct default encoding for alphanumeric literals written using hex notation #2633

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion TypeCobol.LanguageServer/Workspace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ internal void BindFileCompilerSourceTextDocument(DocumentContext docContext, str
StopDocumentBackgroundCompilation(docContext);
CompilationProject compilationProject = docContext.Project.Project;
string fileName = Path.GetFileName(docContext.Uri.LocalPath);
ITextDocument initialTextDocumentLines = new ReadOnlyTextDocument(fileName, Configuration.Format.Encoding,
var encodingForAlphanumericLiterals = compilationProject.CompilationOptions.GetEncodingForAlphanumericLiterals();
ITextDocument initialTextDocumentLines = new ReadOnlyTextDocument(fileName, encodingForAlphanumericLiterals,
Configuration.Format.ColumnsLayout, docContext.IsCopy, sourceText);
FileCompiler fileCompiler = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Line 46[24,27] <27, Error, Syntax> - Syntax error : mismatched input 'Var1' expe
- OutputDeviceName = SYSOUT
- WithNoAdvancing

[[DisplayStatement]] [12,18:display]<DISPLAY> --> [24,28+:X'40']<HexadecimalAlphanumericLiteral>(',Y,Y){@}
[[DisplayStatement]] [12,18:display]<DISPLAY> --> [24,28+:X'40']<HexadecimalAlphanumericLiteral>(',Y,Y){ }
- variables = X'40'

[[DisplayStatement]] [12,18:display]<DISPLAY> --> [24,33:HIGH-VALUE]<HIGH_VALUE>
Expand Down
26 changes: 15 additions & 11 deletions TypeCobol.Test/Parser/FileFormat/TestCobolFile.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text;
using TypeCobol.Compiler;
using TypeCobol.Compiler.File;
using TypeCobol.Compiler.Text;
using TypeCobol.Test.Parser.Scanner;

namespace TypeCobol.Test.Parser.FileFormat
{
static class TestCobolFile {
static class TestCobolFile
{
private static readonly Encoding _EncodingForAlphanumericLiterals =
#if EUROINFO_RULES
IBMCodePages.GetDotNetEncodingFromIBMCCSID(1147);
#else
IBMCodePages.GetDotNetEncodingFromIBMCCSID(1140);
#endif
public static readonly string SampleFolder = "Parser" + Path.DirectorySeparatorChar + "FileFormat" + Path.DirectorySeparatorChar + "Samples";
public static void Check_EBCDICCobolFile()
{
Expand All @@ -27,7 +31,7 @@ public static void Check_EBCDICCobolFile()
if (fileProvider.TryGetFile("EbcdicRefFormat", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormat.TXT", docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormat.TXT", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -84,7 +88,7 @@ public static void Check_EBCDICCobolFileWithUnsupportedChar()
try
{
// Load the CobolFile in a TextDocument;
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormatWithBadChars.TXT", docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormatWithBadChars.TXT", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
}
catch(Exception e)
{
Expand Down Expand Up @@ -117,7 +121,7 @@ public static void Check_ASCIICobolFile_ReferenceFormat()
if (fileProvider.TryGetFile("AsciiRefFormat", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -172,7 +176,7 @@ public static void Check_ASCIICobolFile_LinuxReferenceFormat()
if (fileProvider.TryGetFile("AsciiLinuxFormat.14", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiLinuxFormat.14", docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiLinuxFormat.14", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -227,7 +231,7 @@ public static void Check_ASCIICobolFile_FreeTextFormat()
if (fileProvider.TryGetFile("AsciiFreeFormat", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiFreeFormat.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiFreeFormat.cpy", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -307,7 +311,7 @@ public static void Check_UTF8File()
if (fileProvider.TryGetFile(filename, out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument(filename, docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument(filename, _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Line 1 --
[1,9+:X'C085D0']<HexadecimalAlphanumericLiteral>(',Y,Y){{e}}

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Line 1 --
[1,9+:X'C085D0']<HexadecimalAlphanumericLiteral>(',Y,Y){éeè}

6 changes: 2 additions & 4 deletions TypeCobol.Test/Parser/Scanner/ScannerUtils.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text;
using TypeCobol.Compiler.Concurrency;
using TypeCobol.Compiler.Diagnostics;
using TypeCobol.Compiler.Directives;
Expand Down Expand Up @@ -44,8 +42,8 @@ public TextChangeMap(TextChange change, ColumnsLayout columnsLayout)

internal static class ScannerUtils
{
public static TextSourceInfo TextSourceInfo = new TextSourceInfo("test", IBMCodePages.GetDotNetEncodingFromIBMCCSID(1147), ColumnsLayout.FreeTextFormat, false);//Assuming a program here, not a copy.
public static TypeCobolOptions CompilerOptions = new TypeCobolOptions();
public static TextSourceInfo TextSourceInfo = new TextSourceInfo("test", CompilerOptions.GetEncodingForAlphanumericLiterals(), ColumnsLayout.FreeTextFormat, false);//Assuming a program here, not a copy.
public static List<RemarksDirective.TextNameVariation> CopyTextNameVariations = new List<RemarksDirective.TextNameVariation>();

public static string ScanLine(string testLine)
Expand Down
12 changes: 10 additions & 2 deletions TypeCobol.Test/Parser/Scanner/TestTokenTypes.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using TypeCobol.Compiler.Scanner;
using TypeCobol.Compiler.Scanner;

namespace TypeCobol.Test.Parser.Scanner
{
Expand Down Expand Up @@ -139,6 +138,15 @@ public static void CheckAlphanumericLiterals()
result = ScannerUtils.ScanLines(testLines);
ScannerUtils.CheckWithResultFile(result, testName);

#if EUROINFO_RULES
testName = "AlphanumericLiterals4-1147";
#else
testName = "AlphanumericLiterals4-1140";
#endif
testLines = new string[] { "X'C085D0'" }; // 'éeè' in EBCDIC 1147, '{e}' in EBCDIC 1140
result = ScannerUtils.ScanLines(testLines);
ScannerUtils.CheckWithResultFile(result, testName);

testName = "UTF8Literals";
testLines = new string[] {
@"U""This text does not include any escaped char""",
Expand Down
11 changes: 3 additions & 8 deletions TypeCobol.Test/Parser/TestParserRobustness.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using TypeCobol.Compiler;
using TypeCobol.Compiler;
using TypeCobol.Compiler.CodeElements;
using TypeCobol.Compiler.Diagnostics;
using TypeCobol.Compiler.Directives;
Expand All @@ -22,11 +17,11 @@ public static void CheckProgramCodeElements()
private static CodeElement[] ParseCodeElements(string cobolString, bool asPartOfACopy, out Diagnostic[] parserDiagnostics)
{
// Load text document from string
var textDocument = new ReadOnlyTextDocument("test string", Encoding.Default, ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
var typeCobolOptions = new TypeCobolOptions();
var textDocument = new ReadOnlyTextDocument("test string", typeCobolOptions.GetEncodingForAlphanumericLiterals(), ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
textDocument.LoadChars(cobolString);

// Create a compilation project and a compiler for this document
var typeCobolOptions = new TypeCobolOptions();
var project = new CompilationProject("test project", ".", new[] { ".cbl", ".cpy" },
DocumentFormat.FreeTextFormat, typeCobolOptions, null);
var compiler = new FileCompiler(textDocument, project.SourceFileProvider, project, typeCobolOptions, project);
Expand Down
15 changes: 8 additions & 7 deletions TypeCobol.Test/Parser/Text/TestReadOnlyTextDocument.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text;
using TypeCobol.Compiler;
using TypeCobol.Compiler.Directives;
using TypeCobol.Compiler.File;
using TypeCobol.Compiler.Text;

Expand Down Expand Up @@ -49,7 +47,8 @@ public static void Check_DocumentFormatExceptions()

public static void Check_EmptyDocument()
{
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("empty", Encoding.Default, ColumnsLayout.CobolReferenceFormat, false, String.Empty);
var options = new TypeCobolOptions();
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("empty", options.GetEncodingForAlphanumericLiterals(), ColumnsLayout.CobolReferenceFormat, false, String.Empty);

Exception resultException = null;
try
Expand Down Expand Up @@ -176,7 +175,8 @@ public static void Check_ReferenceFormatDocument()
}

// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
var options = new TypeCobolOptions();
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", options.GetEncodingForAlphanumericLiterals(), docFormat.ColumnsLayout, true, cobolFile.ReadChars());

if(textDocument.CharAt(0) != '0')
{
Expand Down Expand Up @@ -269,7 +269,8 @@ public static void Check_FreeFormatDocument()
}

// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCINP free format.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
var options = new TypeCobolOptions();
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCINP free format.cpy", options.GetEncodingForAlphanumericLiterals(), docFormat.ColumnsLayout, true, cobolFile.ReadChars());

if (textDocument.CharAt(0) != '/')
{
Expand Down
10 changes: 3 additions & 7 deletions TypeCobol.Test/Utils/ParserUtils.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text;
using Antlr4.Runtime;
using TypeCobol.Compiler;
using TypeCobol.Compiler.AntlrUtils;
Expand Down Expand Up @@ -60,10 +56,10 @@ public static CompilationUnit ParseCobolFile(string textName, string folder, boo
public static CompilationUnit ParseCobolString(string cobolString, bool asPartOfACopy)
{
//Prepare
var textDocument = new ReadOnlyTextDocument("Empty doc", Encoding.Default, ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
var typeCobolOptions = new TypeCobolOptions();
var textDocument = new ReadOnlyTextDocument("Empty doc", typeCobolOptions.GetEncodingForAlphanumericLiterals(), ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
textDocument.LoadChars(cobolString);

var typeCobolOptions = new TypeCobolOptions();
var project = new CompilationProject("Empty project", ".", new[] { ".cbl", ".cpy" },
DocumentFormat.FreeTextFormat, typeCobolOptions, null);

Expand Down
36 changes: 35 additions & 1 deletion TypeCobol/Compiler/Directives/IBMCompilerOptions.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#nullable enable

using System.Diagnostics.CodeAnalysis;
using System.Text;
using TypeCobol.Compiler.File;

namespace TypeCobol.Compiler.Directives
{
Expand Down Expand Up @@ -137,7 +139,14 @@ internal IBMCompilerOptionStatus(IBMCompilerOptionName name)
case IBMCompilerOptionName.BLOCK0: IsActivated = false; Value = null; break;
case IBMCompilerOptionName.BUFSIZE: IsActivated = true; Value = "4096"; break;
case IBMCompilerOptionName.CICS: IsActivated = false; Value = null; break;
case IBMCompilerOptionName.CODEPAGE: IsActivated = true; Value = "1140"; break;
case IBMCompilerOptionName.CODEPAGE:
IsActivated = true;
#if EUROINFO_RULES
Value = "1147"; //IBM EBCDIC (France-Euro)
#else
Value = "1140"; //IBM EBCDIC (EU-Canada-Euro)
#endif
break;
case IBMCompilerOptionName.COMPILE: IsActivated = false; Value = "S"; break;
case IBMCompilerOptionName.COPYLOC: IsActivated = false; Value = null; break;
case IBMCompilerOptionName.CURRENCY: IsActivated = false; Value = null; break;
Expand Down Expand Up @@ -1015,4 +1024,29 @@ public enum IBMCompilerOptionName
/* If you compile using ZWB, the compiler removes the sign from a signed zoned decimal (DISPLAY) field before comparing this field to an alphanumeric elementary field during execution. */
ZWB
}

public static class IBMCompilerOptionsExtensions
{
/// <summary>
/// Get from the CODEPAGE compiler option:
/// • The encoding of literals in the source program
/// • The encoding for data items described with USAGE DISPLAY or DISPLAY-1
/// • The encoding for XML parsing and XML generation
///
/// The encoding of national and UTF-8 data is not affected by the CODEPAGE compiler option. The encoding
/// for national literals and data items described with usage NATIONAL is UTF-16BE (big endian), CCSID
/// 1200. A reference to UTF-16 in this document is a reference to UTF-16BE. The encoding for UTF-8 literals
/// and data items described with usage UTF-8 is UTF-8, CCSID 1208.
/// </summary>
public static Encoding GetEncodingForAlphanumericLiterals(this IBMCompilerOptions ibmCompilerOptions)
{
string? codePageOption = ibmCompilerOptions.CODEPAGE.Value;
if (int.TryParse(codePageOption, out int codePage))
{
return IBMCodePages.GetDotNetEncodingFromIBMCCSID(codePage);
}

throw new ArgumentException($"Invalid CODEPAGE compiler option: '{codePageOption}'.");
}
}
}
6 changes: 3 additions & 3 deletions TypeCobol/Compiler/FileCompiler.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using System.Diagnostics;
using System.Diagnostics;
using JetBrains.Annotations;
using TypeCobol.Compiler.CodeModel;
using TypeCobol.Compiler.Directives;
Expand Down Expand Up @@ -165,7 +164,8 @@ private FileCompiler(Tuple<string, string, ColumnsLayout, bool> fileInfo, ITextD
{
// 2.a Load it in a new text document in memory
Debug.Assert(sourceFile != null);
TextDocument = new ReadOnlyTextDocument(sourceFile.Name, sourceFile.Encoding, fileInfo.Item3, fileInfo.Item4, sourceFile.ReadChars());
var encodingForAlphanumericLiterals = compilerOptions.GetEncodingForAlphanumericLiterals();
TextDocument = new ReadOnlyTextDocument(sourceFile.Name, encodingForAlphanumericLiterals, fileInfo.Item3, fileInfo.Item4, sourceFile.ReadChars());
}
else
{
Expand Down
3 changes: 1 addition & 2 deletions TypeCobol/Compiler/Text/CobolTextLine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

using TypeCobol.Compiler.Concurrency;
using TypeCobol.Compiler.Directives;
using TypeCobol.Compiler.File;
using TypeCobol.Compiler.Scanner;

namespace TypeCobol.Compiler.Text
Expand Down Expand Up @@ -193,7 +192,7 @@ private static IList<Tuple<string, bool> > Split(string line, int max, int min,
}
}
TokensLine tempTokensLine = TokensLine.CreateVirtualLineForInsertedToken(0, line, layout);
tempTokensLine.InitializeScanState(new MultilineScanState(IBMCodePages.GetDotNetEncodingFromIBMCCSID(1147)));
tempTokensLine.InitializeScanState(new MultilineScanState(scannerOptions.GetEncodingForAlphanumericLiterals()));

Scanner.Scanner scanner = new Scanner.Scanner(line, 0, line.Length - 1, tempTokensLine, scannerOptions, false);
Token? t;
Expand Down
Loading