-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
T256312: same reference used twice in the same place
- Loading branch information
Nicolas
authored and
Nicolas
committed
Nov 4, 2020
1 parent
9520408
commit 71d184b
Showing
11 changed files
with
356 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
DoTasks _Common.txt | ||
Set Prefix [[Utilisateur:WikiCleanerBot#T4|Bot_T4]] | ||
ListCheckWiki -onlyRecheck C:\Users\Nicolas\Downloads\frwiki-$-pages-articles.xml.bz2 wiki:Projet:Correction_syntaxique/Analyse_{0} 1 2 3 4 5 7 8 9 13 14 15 16 17 18 19 20 21 22 23 24 25 26 28 38 42 48 51 52 54 55 64 69 70 71 72 73 83 88 90 98 99 102 104 105 106 107 108 109 111 112 513 526 542 543 544 546 547 548 549 550 551 | ||
ListCheckWiki -onlyRecheck C:\Users\Nicolas\Downloads\frwiki-$-pages-articles.xml.bz2 wiki:Projet:Correction_syntaxique/Analyse_{0} 1 2 3 4 5 7 8 9 10 13 14 15 16 17 18 19 20 21 22 23 24 25 26 28 38 42 46 48 51 52 54 55 64 69 70 71 72 73 83 85 88 90 92 98 99 102 104 105 106 107 108 109 111 112 513 526 542 543 544 546 547 548 549 550 551 552 553 554 555 557 558 | ||
DoTasks ISBN_ISSN.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
Set AdditionalAlgorithms 1 2 7 9 10 16 17 18 19 22 25 46 48 52 55 64 83 85 88 90 91 92 104 106 513 524 526 532 533 534 537 538 539 540 541 542 543 547 548 549 550 551 553 554 555 557 | ||
Set AdditionalAlgorithms 1 2 7 9 10 16 17 18 19 22 25 46 48 52 55 64 83 85 88 90 91 92 104 106 513 524 526 532 533 534 537 538 539 540 541 542 543 547 548 549 550 551 553 554 555 557 558 | ||
Set Configuration TimeBetweenEdit 5 | ||
Set Configuration MaxEditsPerMinute 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
292 changes: 292 additions & 0 deletions
292
WikipediaCleaner/src/org/wikipediacleaner/api/check/algorithm/CheckErrorAlgorithm558.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,292 @@ | ||
/* | ||
* WPCleaner: A tool to help on Wikipedia maintenance tasks. | ||
* Copyright (C) 2013 Nicolas Vervelle | ||
* | ||
* See README.txt file for licensing information. | ||
*/ | ||
|
||
package org.wikipediacleaner.api.check.algorithm; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Set; | ||
|
||
import org.apache.commons.lang3.StringUtils; | ||
import org.wikipediacleaner.api.algorithm.AlgorithmParameter; | ||
import org.wikipediacleaner.api.algorithm.AlgorithmParameterElement; | ||
import org.wikipediacleaner.api.check.CheckErrorResult; | ||
import org.wikipediacleaner.api.configuration.WPCConfiguration; | ||
import org.wikipediacleaner.api.configuration.WPCConfigurationString; | ||
import org.wikipediacleaner.api.configuration.WPCConfigurationStringList; | ||
import org.wikipediacleaner.api.data.Page; | ||
import org.wikipediacleaner.api.data.PageElement; | ||
import org.wikipediacleaner.api.data.PageElementFullTag; | ||
import org.wikipediacleaner.api.data.PageElementTag; | ||
import org.wikipediacleaner.api.data.PageElementTag.Parameter; | ||
import org.wikipediacleaner.api.data.PageElementTemplate; | ||
import org.wikipediacleaner.api.data.analysis.PageAnalysis; | ||
import org.wikipediacleaner.api.data.contents.IntervalComparator; | ||
import org.wikipediacleaner.i18n.GT; | ||
|
||
|
||
/** | ||
* Algorithm for analyzing error 558 of check wikipedia project. | ||
* Error 558: Duplicated reference | ||
*/ | ||
public class CheckErrorAlgorithm558 extends CheckErrorAlgorithmBase { | ||
|
||
public CheckErrorAlgorithm558() { | ||
super("Duplicated reference"); | ||
} | ||
|
||
/** | ||
* Analyze a page to check if errors are present. | ||
* | ||
* @param analysis Page analysis. | ||
* @param errors Errors found in the page. | ||
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. | ||
* @return Flag indicating if the error was found. | ||
*/ | ||
@Override | ||
public boolean analyze( | ||
PageAnalysis analysis, | ||
Collection<CheckErrorResult> errors, boolean onlyAutomatic) { | ||
if (analysis == null) { | ||
return false; | ||
} | ||
|
||
// Analyze from the beginning | ||
List<PageElement> refs = getRefs(analysis); | ||
if ((refs == null) || (refs.isEmpty())) { | ||
return false; | ||
} | ||
boolean result = false; | ||
String contents = analysis.getContents(); | ||
int refIndex = 0; | ||
int maxRefs = refs.size(); | ||
while (refIndex < maxRefs) { | ||
|
||
// Group references separated only by punctuation characters | ||
int lastRefIndex = PageElement.groupElements(refs, refIndex, contents, ",;.\'", separators); | ||
result |= analyzeGroupOfTags(analysis, contents, errors, refs, refIndex, lastRefIndex); | ||
refIndex = lastRefIndex + 1; | ||
} | ||
return result; | ||
} | ||
|
||
/** | ||
* Analyze a group of tags. | ||
* | ||
* @param analysis Page analysis. | ||
* @param contents Page contents. | ||
* @param errors Errors found in the page. | ||
* @param refs List of references. | ||
* @param firstRefIndex Index of the first reference of the group. | ||
* @param lastRefIndex Index of the last reference of the group. | ||
* @return True if the error was found in the group of tags. | ||
*/ | ||
private boolean analyzeGroupOfTags( | ||
PageAnalysis analysis, String contents, | ||
Collection<CheckErrorResult> errors, | ||
List<PageElement> refs, | ||
int firstRefIndex, int lastRefIndex) { | ||
|
||
if (lastRefIndex == firstRefIndex) { | ||
return false; | ||
} | ||
for (int firstIndex = firstRefIndex; firstIndex < lastRefIndex; firstIndex++) { | ||
PageElement firstRef = refs.get(firstIndex); | ||
PageElementTag firstRefTag = (firstRef instanceof PageElementTag) ? (PageElementTag) firstRef : null; | ||
String firstContent = contents.substring(firstRef.getBeginIndex(), firstRef.getEndIndex()); | ||
for (int secondIndex = firstIndex + 1; secondIndex <= lastRefIndex; secondIndex++) { | ||
PageElement secondRef = refs.get(secondIndex); | ||
String secondContent = contents.substring(secondRef.getBeginIndex(), secondRef.getEndIndex()); | ||
if (firstContent.equals(secondContent)) { | ||
if (errors == null) { | ||
return true; | ||
} | ||
CheckErrorResult errorResult = createCheckErrorResult(analysis, firstRef.getBeginIndex(), secondRef.getEndIndex()); | ||
errorResult.addReplacement( | ||
contents.substring(firstRef.getBeginIndex(), refs.get(secondIndex - 1).getEndIndex()), | ||
canRemoveBetween(contents, refs.get(secondIndex - 1), refs.get(secondIndex))); | ||
errors.add(errorResult); | ||
return true; | ||
} | ||
PageElementTag secondRefTag = null; | ||
if ((firstRefTag != null) && (secondRef instanceof PageElementTag)) { | ||
PageElementTag tmpTag = (PageElementTag) secondRef; | ||
Parameter firstName = firstRefTag.getParameter("name"); | ||
Parameter secondName = tmpTag.getParameter("name"); | ||
if ((firstName != null) && | ||
(secondName != null) && | ||
StringUtils.equals(firstName.getValue(), secondName.getValue())) { | ||
Parameter firstGroup = firstRefTag.getParameter("group"); | ||
Parameter secondGroup = tmpTag.getParameter("group"); | ||
if ((firstGroup != null) && | ||
(secondGroup != null) && | ||
StringUtils.equals(firstGroup.getValue(), secondGroup.getValue())) { | ||
secondRefTag = tmpTag; | ||
} else if ((firstGroup == null) && (secondGroup == null)) { | ||
secondRefTag = tmpTag; | ||
} | ||
} | ||
} | ||
if ((firstRefTag != null) && (secondRefTag != null)) { | ||
CheckErrorResult errorResult = createCheckErrorResult(analysis, firstRef.getBeginIndex(), secondRef.getEndIndex()); | ||
if (secondRefTag.isFullTag()) { | ||
errorResult.addReplacement( | ||
contents.substring(firstRef.getBeginIndex(), refs.get(secondIndex - 1).getEndIndex()), | ||
canRemoveBetween(contents, refs.get(secondIndex - 1), refs.get(secondIndex))); | ||
} else if (firstRefTag.isFullTag()) { | ||
errorResult.addReplacement( | ||
contents.substring(refs.get(firstIndex + 1).getBeginIndex(), secondRef.getEndIndex()), | ||
canRemoveBetween(contents, refs.get(firstIndex), refs.get(firstIndex + 1))); | ||
} | ||
errors.add(errorResult); | ||
return true; | ||
} | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
/** | ||
* Check if text can be removed between references. | ||
* | ||
* @param contents Page contents. | ||
* @param previousRef Previous reference. | ||
* @param nextRef Next reference. | ||
* @return True if the texte between the two references can be safely removed. | ||
*/ | ||
private boolean canRemoveBetween( | ||
String contents, | ||
PageElement previousRef, | ||
PageElement nextRef) { | ||
String text = contents.substring(previousRef.getEndIndex(), nextRef.getBeginIndex()); | ||
return !text.contains("''"); | ||
} | ||
|
||
/** | ||
* @param analysis Page analysis. | ||
* @return List of references (tags, templates, ...). | ||
*/ | ||
private List<PageElement> getRefs(PageAnalysis analysis) { | ||
List<PageElement> refs = new ArrayList<PageElement>(); | ||
|
||
// Retrieve references defined by tags | ||
List<PageElementTag> refTags = analysis.getCompleteTags(PageElementTag.TAG_WIKI_REF); | ||
if (refTags != null) { | ||
for (PageElementTag refTag : refTags) { | ||
refs.add(new PageElementFullTag(refTag)); | ||
} | ||
} | ||
|
||
// Retrieve references defined by templates | ||
if (!templatesName.isEmpty()) { | ||
List<PageElementTemplate> templates = analysis.getTemplates(); | ||
for (PageElementTemplate template : templates) { | ||
if (templatesName.contains(template.getTemplateName())) { | ||
refs.add(template); | ||
} | ||
} | ||
} | ||
|
||
Collections.sort(refs, new IntervalComparator()); | ||
return refs; | ||
} | ||
|
||
/** | ||
* Automatic fixing of all the errors in the page. | ||
* | ||
* @param analysis Page analysis. | ||
* @return Page contents after fix. | ||
*/ | ||
@Override | ||
protected String internalAutomaticFix(PageAnalysis analysis) { | ||
if (!analysis.getPage().isArticle() || | ||
!analysis.getPage().isInMainNamespace()) { | ||
return analysis.getContents(); | ||
} | ||
return fixUsingAutomaticReplacement(analysis); | ||
} | ||
|
||
/* ====================================================================== */ | ||
/* PARAMETERS */ | ||
/* ====================================================================== */ | ||
|
||
/** Separator between consecutive tags */ | ||
private static final String PARAMETER_SEPARATOR = "separator"; | ||
|
||
/** Templates that can replace a tag */ | ||
private static final String PARAMETER_TEMPLATES = "templates"; | ||
|
||
/** | ||
* Initialize settings for the algorithm. | ||
* | ||
* @see org.wikipediacleaner.api.check.algorithm.CheckErrorAlgorithmBase#initializeSettings() | ||
*/ | ||
@Override | ||
protected void initializeSettings() { | ||
separators.clear(); | ||
separator = getSpecificProperty(PARAMETER_SEPARATOR, true, false, false); | ||
if (separator == null) { | ||
separator = getWPCConfiguration().getString(WPCConfigurationString.REF_SEPARATOR); | ||
} | ||
if (separator == null) { | ||
separator = ""; | ||
} else { | ||
separators.add(separator); | ||
} | ||
List<String> tmpList = getWPCConfiguration().getStringList(WPCConfigurationStringList.REF_OTHER_SEPARATORS); | ||
if (tmpList != null) { | ||
for (String tmp : tmpList) { | ||
if (!separators.contains(tmp)) { | ||
separators.add(tmp); | ||
} | ||
} | ||
} | ||
|
||
String tmp = getSpecificProperty(PARAMETER_TEMPLATES, true, true, false); | ||
templatesName.clear(); | ||
if (tmp != null) { | ||
tmpList = WPCConfiguration.convertPropertyToStringList(tmp); | ||
for (String tmpElement : tmpList) { | ||
templatesName.add(Page.normalizeTitle(tmpElement)); | ||
} | ||
} | ||
} | ||
|
||
/** Valid separator between consecutive tags */ | ||
private String separator = ""; | ||
|
||
/** Separators between consecutive tags */ | ||
private final List<String> separators = new ArrayList<>(); | ||
|
||
/** Templates that can replace a tag */ | ||
private final Set<String> templatesName = new HashSet<>(); | ||
|
||
/** | ||
* Build the list of parameters for this algorithm. | ||
*/ | ||
@Override | ||
protected void addParameters() { | ||
super.addParameters(); | ||
addParameter(new AlgorithmParameter( | ||
PARAMETER_SEPARATOR, | ||
GT._T("Used as a separator between consecutive {0} tags", "<ref>"), | ||
new AlgorithmParameterElement( | ||
"text", | ||
GT._T("Used as a separator between consecutive {0} tags", "<ref>")))); | ||
addParameter(new AlgorithmParameter( | ||
PARAMETER_TEMPLATES, | ||
GT._T("Templates that can be used to replace {0} tags", "<ref>"), | ||
new AlgorithmParameterElement( | ||
"template name", | ||
GT._T("Template that can be used to replace {0} tags", "<ref>")), | ||
true)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.