Fixes #1136 w.r.t arrays over parameterized types as well now.

fesch · Mar 17, 2024 · df16847 · df16847
1 parent 944de6d
commit df16847
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 36 deletions.
diff --git a/src/lu/fisch/structorizer/gui/changelog.txt b/src/lu/fisch/structorizer/gui/changelog.txt
@@ -8,17 +8,17 @@ Legend:
 Known issues (also see https://github.com/fesch/Structorizer.Desktop/issues):
 - Copying diagram images to the clipboard may fail with some OS/JRE combination
   (#685), on Windows the image format may be JPG instead of PNG.
-- Pascal import does not cope with unit name aliases.
+- Pascal import does not cope with unit name aliases. ObjectPascal code should
+  be tolerated by the parser but may produce nonsense from the OOP parts.
 - COBOL import may fail if certain single-letter identifiers are used (rename
   them!), it does not cope with some statement variants and variable
   redefinitions, either.
-- Java import may not cope with type arguments in array element types. The
-  import option to mitigate ambiguities between closing angular brackets (in
+- The import option to mitigate ambiguities between closing angular brackets (in
   nested type arguments) and shift operators >> may fail in some cases. Then
   it should be switched off and closing angular brackets be separated manually.
-  It does not cope with lambda expressions and some annotations. The imported
-  diagrams will usually not be executable, anyway (because of unsupported OOP
-  context).
+  It does not cope with lambda expressions and some annotations. Because OOP
+  context is not actually supported in Structorizer, the imported diagrams will
+  usually not be executable and will raise a lot of Analyser warnings.
 - Shell export neither copes with nested array/record initialisers and
   component access nor with cleanly handling usual and associative arrays as
   parameters or results.
@@ -207,8 +207,9 @@ Current development version 3.32-18 (2024-03-17)
 - 18: Enh. #1084 Pascal import toughened up for ObjectPascal/Delphi 12 <2>
 - 18: Bugfix #1135 Two Java import bugs - unicode escape sequences in string
       literals and identifiers 'str', 'binary', 'hex' caused failure. <2>
-- 18: Bugfix #1136 Three Java import problems due to angular brackets of type
-      arguments (nested parameterized types, casting, "<?>" parameters). <2>
+- 18: Bugfix #1136 Four Java import problems due to angular brackets of type
+      arguments (nested parameterized types, casting, "<?>" parameters,
+      arrays over parameterized types). <2>
 - 18: Bugfix #1137 Workaround for Java import errors due to ".this" <2>
 - 18: Issue #1138 Arranger collision warnings could become a nuisance <2>
 - 18: Bugfix #1139 TRY elements without catch variable caused defects <2>

diff --git a/src/lu/fisch/structorizer/parsers/JavaParser.java b/src/lu/fisch/structorizer/parsers/JavaParser.java
@@ -1083,6 +1083,7 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par
 		 * beyond the power of our grammar to solve this.
 		 */
 		final Matcher castMatcher = Pattern.compile(".*[>]\\s*[)].*").matcher("");
+		final Matcher arrayDeclMatcher = Pattern.compile(".*[>]\\s*\\[\\s*\\].*").matcher("");
 
 		// START KGU#1122 2024-03-12: Bugfix #1136 New option to separate angular type parameter brackets
 		boolean separateAngularBrackets = (Boolean)this.getPluginOption("separate_angular_brackets", true);
@@ -1149,7 +1150,9 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par
 					}
 					// START KGU#1122 2024-03-12: Bugfix #1136
 					castMatcher.reset(strLine);
-					if (castMatcher.matches() || strLine.contains(">>") || strLine.contains("?>")) {
+					arrayDeclMatcher.reset(strLine);
+					if (castMatcher.matches() || arrayDeclMatcher.matches()
+							|| strLine.contains(">>") || strLine.contains("?>")) {
 						boolean replaced = false;
 						StringList tokens = Element.splitLexically(strLine, true);
 						// Initially decompose all ">>" tokens into ">", ">" ...
@@ -1174,8 +1177,8 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par
 							}
 						}
 
-						// Now we first look for type casts with type arguments and modify them
-						replaced = transformCastsWithTypeParameters(tokens) || replaced;
+						// Now we first look for type casts or array specs with type arguments and modify them
+						replaced = transformParameterisedTypes(tokens) || replaced;
 						// Second, we look for clashing closing angular brackets.
 						if (separateAngularBrackets) {
 							replaced = separateAngularTypeBrackets(tokens) || replaced;
@@ -1210,21 +1213,24 @@ protected File prepareTextfile(String _textToParse, String _encoding) throws Par
 		return interm;
 	}
 
-	// START KGU#1122 2024-03-12: Bugfix #1136 Methods to solve angular bracket trouble
+	// START KGU#1122 2024-03-17: Bugfix #1136 Methods to solve angular bracket trouble
 	/**
 	 * Derives a single pseudo identifier from parameterized types with casting
-	 * parentheses, e.g.<br/>
-	 * {@code (Vector<Integer>)expression} &rarr; {@code (VectorííIntegerìì)expression}
+	 * parentheses or in array specifications, e.g.<br/>
+	 * {@code (Vector<Integer>)expression} &rarr; {@code (VectorííIntegerìì)expression}<br/>
+	 * {@code JComboBox<String>[]} &rarr; {@code JComboBoxííIntegerìì[]}<br/>
+	 * If a replacement takes place adds its backward mapping to {@link CodeParser#replacedIds}.
 	 * 
 	 * @param tokens - a tokenized line to be transformed in the described way
 	 *    at all matching places. May be modified (then returns {@code true}).
-	 * @return {@code true} if {@code tokens} was modified, {@code false} otherwise.
+	 * @return {@code true} if {@code tokens} was modified and a related entry is member
+	 *    of {@link CodeParser#replacedIds}, {@code false} otherwise.
 	 */
-	private boolean transformCastsWithTypeParameters(StringList tokens) {
+	private boolean transformParameterisedTypes(StringList tokens) {
 		boolean replaced = false;
 		int posAngBr = -1;
 		while ((posAngBr = tokens.indexOf(">", posAngBr+1)) >= 0) {
-			// Go ahead and look for a closing parenthesis...
+			// Go ahead and look for a closing parenthesis or an opening bracket...
 			int posPar1 = posAngBr + 1;
 			while (posPar1 < tokens.count() && tokens.get(posPar1).isBlank()) posPar1++;
 			/* If there is a closing parenthesis then we walk backwards along
@@ -1236,7 +1242,8 @@ private boolean transformCastsWithTypeParameters(StringList tokens) {
 			 * are also necessary. The major question here is, how precise our syntax
 			 * check has to be.
 			 */
-			if (posPar1 < tokens.count() && tokens.get(posPar1).equals(")")) {
+			boolean isCast = false;
+			if (posPar1 < tokens.count() && ((isCast = tokens.get(posPar1).equals(")")) || tokens.get(posPar1).equals("["))) {
 				/* First make sure the construct is complete within this line
 				 * We must only find identifiers, '.', ',', '[]', '<', and '>'
 				 * (and possibly comments!?) between the parentheses...
@@ -1245,42 +1252,58 @@ private boolean transformCastsWithTypeParameters(StringList tokens) {
 				 */
 				int posPar0 = posAngBr - 1;
 				int angCount = 1;
+				int posId = -1;
 				while (posPar0 >= 0 && !tokens.get(posPar0).equals("(") && angCount >= 0) {
 					String token = tokens.get(posPar0);
 					if (token.isBlank()) {
 						// We want to produce a single pseudo identifier, so remove all blanks
 						tokens.remove(posPar0);
 						posPar1--;
 						posAngBr--;
+						posId--;
 					}
 					else if (token.equals(">")) {
 						angCount++;
 					}
 					else if (token.equals("<")) {
 						angCount--;
+						// There must be an identifier before the opening '<'
+						posId = posPar0 - 1;
+						while (angCount >= 0 && posId >= 0 && tokens.get(posId).isBlank()) posId--;
+						if (posId < 0 || !Function.testIdentifier(tokens.get(posId), false, "$")) {
+							break;
+						}
+						else if (!isCast && angCount == 0) {
+							posPar0 = posId;
+							break;
+						}
 					}
 					else if (!Function.testIdentifier(token, false, "$")
 							&& !(token.length() == 1 && "[],.".contains(token))) {
 						break;
 					}
 					posPar0--;
 				}
-				if (posPar0 >= 0 && tokens.get(posPar0).equals("(") && angCount == 0
-						&& isTypeSpecificationList(tokens.subSequence(posPar0+1, posPar1))) {
+				if (posPar0 >= 0 && (!isCast || tokens.get(posPar0).equals("(")) && angCount == 0
+						&& isTypeSpecificationList(tokens.subSequence(posId, posAngBr+1))) {
+					String origSequence = tokens.concatenate("", posId, posAngBr+1);
 					// We should have a dense token sequence now and produce a pseudo-identifier
 					tokens.replaceAllBetween("<", ANG_BRACK_SUBST[0], true, posPar0+1, posPar1);
 					tokens.replaceAllBetween(">", ANG_BRACK_SUBST[2], true, posPar0+1, posPar1);
 					tokens.replaceAllBetween(",", ANG_BRACK_SUBST[1], true, posPar0+1, posPar1);
 					tokens.replaceAllBetween("[", ANG_BRACK_SUBST[3], true, posPar0+1, posPar1);
 					tokens.replaceAllBetween("]", ANG_BRACK_SUBST[4], true, posPar0+1, posPar1);
-					// (The operator symbols will be restored after the parsing)
+					// Ensure the operator symbols will be restored after the parsing
+					this.replacedIds.putIfAbsent(tokens.concatenate("", posId, posAngBr+1), origSequence);
 					replaced = true;
 				}
 			}
 		}
 		return replaced;
 	}
+	// END KGU#1122 2024-03-17
 
+	// START KGU#1122 2024-03-12: Bugfix #1136 Methods to solve angular bracket trouble
 	/**
 	 * Tries to separate closing angular brackets of nested type parmeters as
 	 * in {@code HashMap<String, ArrayList<String[]>>} within the given token
@@ -1329,12 +1352,12 @@ else if (!token.isBlank()
 	 * Checks the syntax of (possibly nested) type specifications like
 	 * {@code HashMap<String, Vector<String[]>>}
 	 * 
-	 * @param subSequence - token sequence assumed to specify a (possibly parameterized)
-	 *    type.
+	 * @param subSequence - token sequence assumed to specify a (possibly
+	 *    parameterized) type.
 	 * @return
 	 */
 	private boolean isTypeSpecificationList(StringList subSequence) {
-		// TODO Auto-generated method stub
+		// TODO (by now the parsing in transformParameterisedTypes() was good enough)
 		//System.out.println("(Not) checking type list \"" + subSequence + "\" in JavaParser.isTypeSpecification()");
 		return true;
 	}
@@ -3907,8 +3930,9 @@ protected String translateContent(String _content)
 	/**
 	 * Convenience method for the string content retrieval from a {@link Token}
 	 * that may be either represent a content symbol or a {@link Reduction}.
+	 * 
 	 * @param _token - the {@link Token} the content is to be appended to
-	 *        {@code _content}
+	 *    {@code _content}
 	 * @return the content string (may be empty in case of noise)
 	 * @throws ParserCancelled
 	 */
@@ -3918,12 +3942,13 @@ protected String getContent_R(Token _token) throws ParserCancelled
 			return getContent_R(_token.asReduction(), "");
 		}
 		else if (_token.getType() == SymbolType.CONTENT) {
-			// START KGU#1122 2024-03-12: Bugfix #1136 revert preprocessing substitutions
+			// START KGU#1122 2024-03-17: Bugfix #1136 revert preprocessing substitutions
 			//return _token.asString();
-			return _token.asString().replace(ANG_BRACK_SUBST[0], "<").replace(ANG_BRACK_SUBST[2], ">")
-					.replace(ANG_BRACK_SUBST[1], ",")
-					.replace(ANG_BRACK_SUBST[3], "[").replace(ANG_BRACK_SUBST[4], "]");
-			// END KGU#1122 2024-03-12
+			//return _token.asString().replace(ANG_BRACK_SUBST[0], "<").replace(ANG_BRACK_SUBST[2], ">")
+			//		.replace(ANG_BRACK_SUBST[1], ",")
+			//		.replace(ANG_BRACK_SUBST[3], "[").replace(ANG_BRACK_SUBST[4], "]");
+			return undoIdReplacements(_token.asString());
+			// END KGU#1122 2024-03-17
 		}
 		return "";
 	}
@@ -4041,12 +4066,7 @@ else if (idx == SymbolConstants.SYM_FLOATINGPOINTLITERAL && toAdd.matches(".+?[f
 			/* -------- End code example for text retrieval and translation -------- */
 		}
 
-		// START KGU#1122 2024-03-12: Bugfix #1136 revert preprocessing substitutions
-		//return _content;
-		return _content.replace(ANG_BRACK_SUBST[0], "<").replace(ANG_BRACK_SUBST[2], ">")
-				.replace(ANG_BRACK_SUBST[1], ",")
-				.replace(ANG_BRACK_SUBST[3], "[").replace(ANG_BRACK_SUBST[4], "]");
-		// END KGU#1122 2024-03-12
+		return _content;
 	}
 
 	//------------------------- Postprocessor ---------------------------