Skip to content

Commit

Permalink
Retrieving list of pages linking to a page: do not use "backlinks"
Browse files Browse the repository at this point in the history
anymore, use instead "redirects" and "linkshere", to avoid problems with
redirects having also contents
  • Loading branch information
Nicolas Vervelle committed May 30, 2016
1 parent c9d057b commit 6481f87
Show file tree
Hide file tree
Showing 55 changed files with 1,579 additions and 963 deletions.
4 changes: 2 additions & 2 deletions WikipediaCleaner/src/org/wikipediacleaner/Version.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
*/
public final class Version {

public final static String VERSION = "1.38";
public final static Date DATE = new GregorianCalendar(2016, Calendar.JANUARY, 3).getTime();
public final static String VERSION = "1.39";
public final static Date DATE = new GregorianCalendar(2016, Calendar.MAY, 30).getTime();

public final static String PROGRAM = "WPCleaner";

Expand Down
20 changes: 18 additions & 2 deletions WikipediaCleaner/src/org/wikipediacleaner/api/API.java
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,21 @@ public void initializeDisambiguationStatus(
public void retrieveLinks(
EnumWikipedia wiki, Collection<Page> pages) throws APIException;

/**
* Retrieves links to the <code>page</code> and initialize redirect status.
* (<code>action=query</code>, <code>prop=linkshere</code>).
*
* @param wiki Wiki.
* @param page The page.
* @param redirects True if it should also retrieve links through redirects.
* @throws APIException
* @see <a href="http://www.mediawiki.org/wiki/API:Linkshere">API:Linkshere</a>
*/
public void retrieveLinksHere(
EnumWikipedia wiki, Page page,
boolean redirects)
throws APIException;

/**
* Retrieves internal links of one page.
* (<code>action=query</code>, <code>prop=links</code>).
Expand Down Expand Up @@ -298,7 +313,7 @@ public String getLanguageLink(
*
* @param wiki Wiki.
* @throws APIException
* @see <a href="http://www.mediawiki.org/wiki/API:Backlinks">API:Backlinks</a>
* @see <a href="http://www.mediawiki.org/wiki/API:Abusefilters">API:Abusefilters</a>
*/
public List<AbuseFilter> retrieveAbuseFilters(
EnumWikipedia wiki) throws APIException;
Expand Down Expand Up @@ -327,9 +342,10 @@ public List<Page> retrieveAbuseLog(
* @throws APIException
* @see <a href="http://www.mediawiki.org/wiki/API:Backlinks">API:Backlinks</a>
*/
/* @Deprecated
public void retrieveBackLinks(
EnumWikipedia wiki, Page page,
boolean redirects) throws APIException;
boolean redirects) throws APIException;*/

/**
* Retrieves the pages in which <code>page</code> is embedded.
Expand Down
14 changes: 7 additions & 7 deletions WikipediaCleaner/src/org/wikipediacleaner/api/MediaWiki.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import org.wikipediacleaner.api.data.AutomaticFixing;
import org.wikipediacleaner.api.data.AutomaticFormatter;
import org.wikipediacleaner.api.data.Page;
import org.wikipediacleaner.api.execution.BacklinksWRCallable;
import org.wikipediacleaner.api.execution.AllLinksToPageCallable;
import org.wikipediacleaner.api.execution.ContentsCallable;
import org.wikipediacleaner.api.execution.DisambiguationStatusCallable;
import org.wikipediacleaner.api.execution.EmbeddedInCallable;
Expand Down Expand Up @@ -392,39 +392,39 @@ public void retrieveAllTemplates(
}

/**
* Retrieve all backlinks (with redirects) of a page.
* Retrieve all links to a page (with redirects).
*
* @param wikipedia Wikipedia.
* @param page Page.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveAllBacklinks(
public void retrieveAllLinksToPage(
EnumWikipedia wikipedia,
Page page, boolean block) throws APIException {
if (page == null) {
return;
}
retrieveAllBacklinks(wikipedia, Collections.singleton(page), block);
retrieveAllLinksToPages(wikipedia, Collections.singleton(page), block);
}

/**
* Retrieve all backlinks (with redirects) of a list of pages.
* Retrieve all links to a list of pages (with redirects).
*
* @param wikipedia Wikipedia.
* @param pageList List of pages.
* @param block Flag indicating if the call should block until completed.
* @throws APIException
*/
public void retrieveAllBacklinks(
public void retrieveAllLinksToPages(
EnumWikipedia wikipedia,
Collection<Page> pageList, boolean block) throws APIException {
if ((pageList == null) || (pageList.size() == 0)) {
return;
}
final API api = APIFactory.getAPI();
for (final Page page : pageList) {
addTask(new BacklinksWRCallable(wikipedia, this, api, page));
addTask(new AllLinksToPageCallable(wikipedia, this, api, page));
}
block(block);
}
Expand Down
44 changes: 19 additions & 25 deletions WikipediaCleaner/src/org/wikipediacleaner/api/data/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ public class Page implements Comparable<Page> {
* Kinds of related pages.
*/
public static enum RelatedPages {
BACKLINKS,
@Deprecated BACKLINKS, // Replaced by LINKS_HERE (back links notion is too problematic in API)
CATEGORIES,
CATEGORY_MEMBERS,
EMBEDDED_IN,
LINKS_HERE,
REDIRECTS,
SIMILAR_PAGES;
}

Expand Down Expand Up @@ -632,30 +634,22 @@ public void setLinks(List<Page> links) {
}

/**
* @return Back links of the page (including redirects).
* @return Links to the page (including through redirects).
*/
public List<Page> getBackLinksWithRedirects() {
List<Page> backLinks = getRelatedPages(RelatedPages.BACKLINKS);
List<Page> result = backLinks;
public List<Page> getAllLinksToPage() {
List<Page> linksHere = getRelatedPages(RelatedPages.LINKS_HERE);
List<Page> result = linksHere;
boolean originalList = true;
if (backLinks != null) {
for (Page p : backLinks) {
if (p.isRedirect()) {
List<Page> tmpRedirects = p.getRedirects();
for (int i = 0; i < tmpRedirects.size(); i++) {
Page tmp = tmpRedirects.get(i);
if (areSameTitle(title, tmp.getTitle())) {
List<Page> tmpBackLinks = p.getBackLinksWithRedirects();
if ((tmpBackLinks != null) && (!tmpBackLinks.isEmpty())) {
if (originalList) {
result = new ArrayList<Page>(result);
originalList = false;
}
result.addAll(tmpBackLinks);
}
break;
}
List<Page> tmpRedirects = getRelatedPages(RelatedPages.REDIRECTS);
if (tmpRedirects != null) {
for (Page p : tmpRedirects) {
List<Page> tmpLinksHere = p.getAllLinksToPage();
if ((tmpLinksHere != null) && (!tmpLinksHere.isEmpty())) {
if (originalList) {
result = new ArrayList<Page>(result);
originalList = false;
}
result.addAll(tmpLinksHere);
}
}
}
Expand All @@ -676,7 +670,7 @@ public List<Page> getBackLinksWithRedirects() {
* @return Backlinks count.
*/
public Integer getBacklinksCount() {
List<Page> backlinks = getBackLinksWithRedirects();
List<Page> backlinks = getAllLinksToPage();
if (backlinks != null) {
return backlinks.size();
}
Expand All @@ -687,7 +681,7 @@ public Integer getBacklinksCount() {
* @return Backlinks count in article namespace.
*/
public Integer getBacklinksCountInMainNamespace() {
List<Page> backlinks = getBackLinksWithRedirects();
List<Page> backlinks = getAllLinksToPage();
if (backlinks != null) {
int count = 0;
for (int i = 0; i < backlinks.size(); i++) {
Expand All @@ -704,7 +698,7 @@ public Integer getBacklinksCountInMainNamespace() {
* @return Backlinks count in template namespace.
*/
public Integer getBacklinksCountInTemplateNamespace() {
List<Page> backlinks = getBackLinksWithRedirects();
List<Page> backlinks = getAllLinksToPage();
if (backlinks != null) {
int count = 0;
for (int i = 0; i < backlinks.size(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/**
* A Callable implementation for retrieving Backlinks with Redirects.
*/
public class BacklinksWRCallable extends MediaWikiCallable<Page> {
public class AllLinksToPageCallable extends MediaWikiCallable<Page> {

private final Page page;

Expand All @@ -28,7 +28,7 @@ public class BacklinksWRCallable extends MediaWikiCallable<Page> {
* @param api MediaWiki API.
* @param page Page.
*/
public BacklinksWRCallable(
public AllLinksToPageCallable(
EnumWikipedia wikipedia, MediaWikiListener listener, API api,
Page page) {
super(wikipedia, listener, api);
Expand All @@ -40,8 +40,8 @@ public BacklinksWRCallable(
*/
@Override
public Page call() throws APIException {
setText(GT._("Retrieving page back links") + " - " + page.getTitle());
api.retrieveBackLinks(getWikipedia(), page, true);
setText(GT._("Retrieving all links to page") + " - " + page.getTitle());
api.retrieveLinksHere(getWikipedia(), page, true);
return page;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@
import org.wikipediacleaner.api.request.query.list.ApiAbuseFiltersResult;
import org.wikipediacleaner.api.request.query.list.ApiAbuseLogRequest;
import org.wikipediacleaner.api.request.query.list.ApiAbuseLogResult;
import org.wikipediacleaner.api.request.query.list.ApiBacklinksRequest;
import org.wikipediacleaner.api.request.query.list.ApiBacklinksResult;
import org.wikipediacleaner.api.request.query.list.ApiCategoryMembersRequest;
import org.wikipediacleaner.api.request.query.list.ApiCategoryMembersResult;
import org.wikipediacleaner.api.request.query.list.ApiEmbeddedInRequest;
Expand All @@ -103,7 +101,6 @@
import org.wikipediacleaner.api.request.query.list.ApiUsersResult;
import org.wikipediacleaner.api.request.query.list.ApiXmlAbuseFiltersResult;
import org.wikipediacleaner.api.request.query.list.ApiXmlAbuseLogResult;
import org.wikipediacleaner.api.request.query.list.ApiXmlBacklinksResult;
import org.wikipediacleaner.api.request.query.list.ApiXmlCategoryMembersResult;
import org.wikipediacleaner.api.request.query.list.ApiXmlEmbeddedInResult;
import org.wikipediacleaner.api.request.query.list.ApiXmlPagesWithPropResult;
Expand All @@ -129,20 +126,26 @@
import org.wikipediacleaner.api.request.query.prop.ApiInfoResult;
import org.wikipediacleaner.api.request.query.prop.ApiLanguageLinksRequest;
import org.wikipediacleaner.api.request.query.prop.ApiLanguageLinksResult;
import org.wikipediacleaner.api.request.query.prop.ApiLinksHereRequest;
import org.wikipediacleaner.api.request.query.prop.ApiLinksHereResult;
import org.wikipediacleaner.api.request.query.prop.ApiLinksRequest;
import org.wikipediacleaner.api.request.query.prop.ApiLinksResult;
import org.wikipediacleaner.api.request.query.prop.ApiPagePropsRequest;
import org.wikipediacleaner.api.request.query.prop.ApiPagePropsResult;
import org.wikipediacleaner.api.request.query.prop.ApiRedirectsRequest;
import org.wikipediacleaner.api.request.query.prop.ApiRedirectsResult;
import org.wikipediacleaner.api.request.query.prop.ApiRevisionsRequest;
import org.wikipediacleaner.api.request.query.prop.ApiRevisionsResult;
import org.wikipediacleaner.api.request.query.prop.ApiTemplatesRequest;
import org.wikipediacleaner.api.request.query.prop.ApiTemplatesResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlCategoriesResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlInfoResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlLanguageLinksResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlLinksHereResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlLinksResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlPagePropsResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlPropertiesResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlRedirectsResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlRevisionsResult;
import org.wikipediacleaner.api.request.query.prop.ApiXmlTemplatesResult;
import org.wikipediacleaner.api.request.templatedata.ApiJsonTemplateDataResult;
Expand Down Expand Up @@ -1092,6 +1095,30 @@ public void retrieveLinks(
}
}

/**
* Retrieves links to the <code>page</code> and initialize redirect status.
* (<code>action=query</code>, <code>prop=linkshere</code>).
*
* @param wiki Wiki.
* @param page The page.
* @param redirects True if it should also retrieve links through redirects.
* @throws APIException
* @see <a href="http://www.mediawiki.org/wiki/API:Linkshere">API:Linkshere</a>
*/
@Override
public void retrieveLinksHere(
EnumWikipedia wiki, Page page,
boolean redirects)
throws APIException {
ApiRedirectsResult redirectResult = new ApiXmlRedirectsResult(wiki, httpClient);
ApiRedirectsRequest redirectRequest = new ApiRedirectsRequest(wiki, redirectResult);
redirectRequest.loadRedirects(page);

ApiLinksHereResult result = new ApiXmlLinksHereResult(wiki, httpClient);
ApiLinksHereRequest request = new ApiLinksHereRequest(wiki, result);
request.loadLinksHere(page, redirects);
}

/**
* Retrieve a specific language link in a page.
* (<code>action=query</code>, <code>prop=langlinks</code>).
Expand Down Expand Up @@ -1161,15 +1188,15 @@ public List<Page> retrieveAbuseLog(
* @throws APIException
* @see <a href="http://www.mediawiki.org/wiki/API:Backlinks">API:Backlinks</a>
*/
@Override
/* @Override
public void retrieveBackLinks(
EnumWikipedia wiki, Page page,
boolean redirects)
throws APIException {
ApiBacklinksResult result = new ApiXmlBacklinksResult(wiki, httpClient);
ApiBacklinksRequest request = new ApiBacklinksRequest(wiki, result);
request.loadBacklinks(page, redirects);
}
}*/

/**
* Retrieves the pages in which <code>page</code> is embedded.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
/**
* MediaWiki back links requests.
*/
@Deprecated
public class ApiBacklinksRequest extends ApiListRequest {

// ==========================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
/**
* MediaWiki API XML back links results.
*/
@Deprecated
public class ApiXmlBacklinksResult extends ApiXmlResult implements ApiBacklinksResult {

/**
Expand Down
Loading

0 comments on commit 6481f87

Please sign in to comment.