From df16847061b0ee22fba4277afeb9ef0eb0a9ea71 Mon Sep 17 00:00:00 2001 From: codemanyak Date: Sun, 17 Mar 2024 22:13:42 +0100 Subject: [PATCH] Fixes #1136 w.r.t arrays over parameterized types as well now. --- src/lu/fisch/structorizer/gui/changelog.txt | 17 +++-- .../structorizer/parsers/JavaParser.java | 76 ++++++++++++------- 2 files changed, 57 insertions(+), 36 deletions(-) diff --git a/src/lu/fisch/structorizer/gui/changelog.txt b/src/lu/fisch/structorizer/gui/changelog.txt index 975ae25e..eb492b92 100644 --- a/src/lu/fisch/structorizer/gui/changelog.txt +++ b/src/lu/fisch/structorizer/gui/changelog.txt @@ -8,17 +8,17 @@ Legend: Known issues (also see https://github.com/fesch/Structorizer.Desktop/issues): - Copying diagram images to the clipboard may fail with some OS/JRE combination (#685), on Windows the image format may be JPG instead of PNG. -- Pascal import does not cope with unit name aliases. +- Pascal import does not cope with unit name aliases. ObjectPascal code should + be tolerated by the parser but may produce nonsense from the OOP parts. - COBOL import may fail if certain single-letter identifiers are used (rename them!), it does not cope with some statement variants and variable redefinitions, either. -- Java import may not cope with type arguments in array element types. The - import option to mitigate ambiguities between closing angular brackets (in +- The import option to mitigate ambiguities between closing angular brackets (in nested type arguments) and shift operators >> may fail in some cases. Then it should be switched off and closing angular brackets be separated manually. - It does not cope with lambda expressions and some annotations. The imported - diagrams will usually not be executable, anyway (because of unsupported OOP - context). + It does not cope with lambda expressions and some annotations. Because OOP + context is not actually supported in Structorizer, the imported diagrams will + usually not be executable and will raise a lot of Analyser warnings. - Shell export neither copes with nested array/record initialisers and component access nor with cleanly handling usual and associative arrays as parameters or results. @@ -207,8 +207,9 @@ Current development version 3.32-18 (2024-03-17) - 18: Enh. #1084 Pascal import toughened up for ObjectPascal/Delphi 12 <2> - 18: Bugfix #1135 Two Java import bugs - unicode escape sequences in string literals and identifiers 'str', 'binary', 'hex' caused failure. <2> -- 18: Bugfix #1136 Three Java import problems due to angular brackets of type - arguments (nested parameterized types, casting, "" parameters). <2> +- 18: Bugfix #1136 Four Java import problems due to angular brackets of type + arguments (nested parameterized types, casting, "" parameters, + arrays over parameterized types). <2> - 18: Bugfix #1137 Workaround for Java import errors due to ".this" <2> - 18: Issue #1138 Arranger collision warnings could become a nuisance <2> - 18: Bugfix #1139 TRY elements without catch variable caused defects <2> diff --git a/src/lu/fisch/structorizer/parsers/JavaParser.java b/src/lu/fisch/structorizer/parsers/JavaParser.java index b7092fd9..f19a0459 100644 --- a/src/lu/fisch/structorizer/parsers/JavaParser.java +++ b/src/lu/fisch/structorizer/parsers/JavaParser.java @@ -1083,6 +1083,7 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par * beyond the power of our grammar to solve this. */ final Matcher castMatcher = Pattern.compile(".*[>]\\s*[)].*").matcher(""); + final Matcher arrayDeclMatcher = Pattern.compile(".*[>]\\s*\\[\\s*\\].*").matcher(""); // START KGU#1122 2024-03-12: Bugfix #1136 New option to separate angular type parameter brackets boolean separateAngularBrackets = (Boolean)this.getPluginOption("separate_angular_brackets", true); @@ -1149,7 +1150,9 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par } // START KGU#1122 2024-03-12: Bugfix #1136 castMatcher.reset(strLine); - if (castMatcher.matches() || strLine.contains(">>") || strLine.contains("?>")) { + arrayDeclMatcher.reset(strLine); + if (castMatcher.matches() || arrayDeclMatcher.matches() + || strLine.contains(">>") || strLine.contains("?>")) { boolean replaced = false; StringList tokens = Element.splitLexically(strLine, true); // Initially decompose all ">>" tokens into ">", ">" ... @@ -1174,8 +1177,8 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par } } - // Now we first look for type casts with type arguments and modify them - replaced = transformCastsWithTypeParameters(tokens) || replaced; + // Now we first look for type casts or array specs with type arguments and modify them + replaced = transformParameterisedTypes(tokens) || replaced; // Second, we look for clashing closing angular brackets. if (separateAngularBrackets) { replaced = separateAngularTypeBrackets(tokens) || replaced; @@ -1210,21 +1213,24 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par return interm; } - // START KGU#1122 2024-03-12: Bugfix #1136 Methods to solve angular bracket trouble + // START KGU#1122 2024-03-17: Bugfix #1136 Methods to solve angular bracket trouble /** * Derives a single pseudo identifier from parameterized types with casting - * parentheses, e.g.
- * {@code (Vector)expression} → {@code (VectorííIntegerìì)expression} + * parentheses or in array specifications, e.g.
+ * {@code (Vector)expression} → {@code (VectorííIntegerìì)expression}
+ * {@code JComboBox[]} → {@code JComboBoxííIntegerìì[]}
+ * If a replacement takes place adds its backward mapping to {@link CodeParser#replacedIds}. * * @param tokens - a tokenized line to be transformed in the described way * at all matching places. May be modified (then returns {@code true}). - * @return {@code true} if {@code tokens} was modified, {@code false} otherwise. + * @return {@code true} if {@code tokens} was modified and a related entry is member + * of {@link CodeParser#replacedIds}, {@code false} otherwise. */ - private boolean transformCastsWithTypeParameters(StringList tokens) { + private boolean transformParameterisedTypes(StringList tokens) { boolean replaced = false; int posAngBr = -1; while ((posAngBr = tokens.indexOf(">", posAngBr+1)) >= 0) { - // Go ahead and look for a closing parenthesis... + // Go ahead and look for a closing parenthesis or an opening bracket... int posPar1 = posAngBr + 1; while (posPar1 < tokens.count() && tokens.get(posPar1).isBlank()) posPar1++; /* If there is a closing parenthesis then we walk backwards along @@ -1236,7 +1242,8 @@ private boolean transformCastsWithTypeParameters(StringList tokens) { * are also necessary. The major question here is, how precise our syntax * check has to be. */ - if (posPar1 < tokens.count() && tokens.get(posPar1).equals(")")) { + boolean isCast = false; + if (posPar1 < tokens.count() && ((isCast = tokens.get(posPar1).equals(")")) || tokens.get(posPar1).equals("["))) { /* First make sure the construct is complete within this line * We must only find identifiers, '.', ',', '[]', '<', and '>' * (and possibly comments!?) between the parentheses... @@ -1245,6 +1252,7 @@ private boolean transformCastsWithTypeParameters(StringList tokens) { */ int posPar0 = posAngBr - 1; int angCount = 1; + int posId = -1; while (posPar0 >= 0 && !tokens.get(posPar0).equals("(") && angCount >= 0) { String token = tokens.get(posPar0); if (token.isBlank()) { @@ -1252,12 +1260,23 @@ private boolean transformCastsWithTypeParameters(StringList tokens) { tokens.remove(posPar0); posPar1--; posAngBr--; + posId--; } else if (token.equals(">")) { angCount++; } else if (token.equals("<")) { angCount--; + // There must be an identifier before the opening '<' + posId = posPar0 - 1; + while (angCount >= 0 && posId >= 0 && tokens.get(posId).isBlank()) posId--; + if (posId < 0 || !Function.testIdentifier(tokens.get(posId), false, "$")) { + break; + } + else if (!isCast && angCount == 0) { + posPar0 = posId; + break; + } } else if (!Function.testIdentifier(token, false, "$") && !(token.length() == 1 && "[],.".contains(token))) { @@ -1265,22 +1284,26 @@ else if (!Function.testIdentifier(token, false, "$") } posPar0--; } - if (posPar0 >= 0 && tokens.get(posPar0).equals("(") && angCount == 0 - && isTypeSpecificationList(tokens.subSequence(posPar0+1, posPar1))) { + if (posPar0 >= 0 && (!isCast || tokens.get(posPar0).equals("(")) && angCount == 0 + && isTypeSpecificationList(tokens.subSequence(posId, posAngBr+1))) { + String origSequence = tokens.concatenate("", posId, posAngBr+1); // We should have a dense token sequence now and produce a pseudo-identifier tokens.replaceAllBetween("<", ANG_BRACK_SUBST[0], true, posPar0+1, posPar1); tokens.replaceAllBetween(">", ANG_BRACK_SUBST[2], true, posPar0+1, posPar1); tokens.replaceAllBetween(",", ANG_BRACK_SUBST[1], true, posPar0+1, posPar1); tokens.replaceAllBetween("[", ANG_BRACK_SUBST[3], true, posPar0+1, posPar1); tokens.replaceAllBetween("]", ANG_BRACK_SUBST[4], true, posPar0+1, posPar1); - // (The operator symbols will be restored after the parsing) + // Ensure the operator symbols will be restored after the parsing + this.replacedIds.putIfAbsent(tokens.concatenate("", posId, posAngBr+1), origSequence); replaced = true; } } } return replaced; } + // END KGU#1122 2024-03-17 + // START KGU#1122 2024-03-12: Bugfix #1136 Methods to solve angular bracket trouble /** * Tries to separate closing angular brackets of nested type parmeters as * in {@code HashMap>} within the given token @@ -1329,12 +1352,12 @@ else if (!token.isBlank() * Checks the syntax of (possibly nested) type specifications like * {@code HashMap>} * - * @param subSequence - token sequence assumed to specify a (possibly parameterized) - * type. + * @param subSequence - token sequence assumed to specify a (possibly + * parameterized) type. * @return */ private boolean isTypeSpecificationList(StringList subSequence) { - // TODO Auto-generated method stub + // TODO (by now the parsing in transformParameterisedTypes() was good enough) //System.out.println("(Not) checking type list \"" + subSequence + "\" in JavaParser.isTypeSpecification()"); return true; } @@ -3907,8 +3930,9 @@ protected String translateContent(String _content) /** * Convenience method for the string content retrieval from a {@link Token} * that may be either represent a content symbol or a {@link Reduction}. + * * @param _token - the {@link Token} the content is to be appended to - * {@code _content} + * {@code _content} * @return the content string (may be empty in case of noise) * @throws ParserCancelled */ @@ -3918,12 +3942,13 @@ protected String getContent_R(Token _token) throws ParserCancelled return getContent_R(_token.asReduction(), ""); } else if (_token.getType() == SymbolType.CONTENT) { - // START KGU#1122 2024-03-12: Bugfix #1136 revert preprocessing substitutions + // START KGU#1122 2024-03-17: Bugfix #1136 revert preprocessing substitutions //return _token.asString(); - return _token.asString().replace(ANG_BRACK_SUBST[0], "<").replace(ANG_BRACK_SUBST[2], ">") - .replace(ANG_BRACK_SUBST[1], ",") - .replace(ANG_BRACK_SUBST[3], "[").replace(ANG_BRACK_SUBST[4], "]"); - // END KGU#1122 2024-03-12 + //return _token.asString().replace(ANG_BRACK_SUBST[0], "<").replace(ANG_BRACK_SUBST[2], ">") + // .replace(ANG_BRACK_SUBST[1], ",") + // .replace(ANG_BRACK_SUBST[3], "[").replace(ANG_BRACK_SUBST[4], "]"); + return undoIdReplacements(_token.asString()); + // END KGU#1122 2024-03-17 } return ""; } @@ -4041,12 +4066,7 @@ else if (idx == SymbolConstants.SYM_FLOATINGPOINTLITERAL && toAdd.matches(".+?[f /* -------- End code example for text retrieval and translation -------- */ } - // START KGU#1122 2024-03-12: Bugfix #1136 revert preprocessing substitutions - //return _content; - return _content.replace(ANG_BRACK_SUBST[0], "<").replace(ANG_BRACK_SUBST[2], ">") - .replace(ANG_BRACK_SUBST[1], ",") - .replace(ANG_BRACK_SUBST[3], "[").replace(ANG_BRACK_SUBST[4], "]"); - // END KGU#1122 2024-03-12 + return _content; } //------------------------- Postprocessor ---------------------------