Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
148 lines (130 sloc) 5.32 KB
/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.Collection;
import java.util.List;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.constants.ArticleUrl;
import org.wikipediacleaner.api.data.Namespace;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.api.data.PageElementExternalLink;
import org.wikipediacleaner.api.data.PageElementInternalLink;
import org.wikipediacleaner.api.data.PageElementTag;
/**
* Algorithm for analyzing error 004 of check wikipedia project.
* Error 004: <a> tags in main namespace
*/
public class CheckErrorAlgorithm004 extends CheckErrorAlgorithmBase {
public CheckErrorAlgorithm004() {
super("<a> tags");
}
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if ((analysis == null) || (analysis.getPage() == null)) {
return false;
}
Integer ns = analysis.getPage().getNamespace();
if ((ns == null) || (ns.intValue() != Namespace.MAIN)) {
return false;
}
// Check each tag
List<PageElementTag> tags = analysis.getTags(PageElementTag.TAG_HTML_A);
if ((tags == null) || (tags.isEmpty())) {
return false;
}
boolean result = false;
for (PageElementTag tag : tags) {
boolean shouldKeep = true;
if (!tag.isFullTag() && tag.isEndTag() && tag.isComplete()) {
shouldKeep = false;
}
int index = tag.getBeginIndex();
if ((analysis.getSurroundingTag(PageElementTag.TAG_HTML_CODE, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_IMAGEMAP, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH_CHEM, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_NOWIKI, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_PRE, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SCORE, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SOURCE, index) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT, index) != null)) {
shouldKeep = false;
}
if (shouldKeep) {
if (errors == null) {
return true;
}
result = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, tag.getCompleteBeginIndex(), tag.getCompleteEndIndex());
if (tag.isFullTag()) {
errorResult.addReplacement("");
} else if (tag.isComplete()) {
String internalText = analysis.getContents().substring(
tag.getValueBeginIndex(), tag.getValueEndIndex());
PageElementTag.Parameter href = tag.getParameter("href");
String hrefValue = (href != null) ?href.getTrimmedValue() : null;
// Check for link with "tel:" protocol as href
if ((hrefValue != null) &&
(hrefValue.startsWith("tel:"))) {
errorResult.addReplacement(internalText, true);
}
// Check for link with internal link as href
if (hrefValue != null) {
ArticleUrl articleUrl = ArticleUrl.isArticleUrl(analysis.getWikipedia(), hrefValue);
if (articleUrl != null) {
String article = articleUrl.getTitle();
if ((article != null) && (article.length() > 0)) {
boolean automatic = true;
if ((articleUrl.getAttributes() != null) ||
(articleUrl.getFragment() != null)) {
automatic = false;
}
errorResult.addReplacement(
PageElementInternalLink.createInternalLink(article, internalText),
automatic);
}
}
}
// Check for link
if ((hrefValue != null) && (hrefValue.length() > 0)) {
boolean protocolOk = PageElementExternalLink.isPossibleProtocol(hrefValue, 0);
if (protocolOk) {
errorResult.addReplacement(
PageElementExternalLink.createExternalLink(hrefValue, internalText));
}
}
errorResult.addReplacement(internalText);
} else {
errorResult.addReplacement("");
}
errors.add(errorResult);
}
}
return result;
}
/**
* Automatic fixing of some errors in the page.
*
* @param analysis Page analysis.
* @return Page contents after fix.
*/
@Override
protected String internalAutomaticFix(PageAnalysis analysis) {
return fixUsingAutomaticReplacement(analysis);
}
}