Skip to content

Commit

Permalink
T257236: Performance improvement for page initialization
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicolas authored and Nicolas committed Feb 8, 2021
1 parent c39ea32 commit 311eddb
Show file tree
Hide file tree
Showing 44 changed files with 180 additions and 116 deletions.
3 changes: 2 additions & 1 deletion WikipediaCleaner/src/org/wikipediacleaner/Bot.java
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,8 @@ private BasicWorker executeFixCheckWiki(Action actionConfig) {
private BasicWorker executeFixListCheckWiki(Action actionConfig) {
Page page = null;
if (actionConfig.actionArgs.length > 0) {
page = DataManager.getPage(wiki, actionConfig.actionArgs[0], null, null, null);
page = DataManager.createSimplePage(
wiki, actionConfig.actionArgs[0], null, null, null);
}
List<CheckErrorAlgorithm> algorithms = new ArrayList<>();
List<CheckErrorAlgorithm> allAlgorithms = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,8 @@ public Page getPage(int index) {
* @param pageId Page id.
*/
private void addPage(String page, Integer pageId) {
Page tmpPage = DataManager.getPage(wikipedia, page, pageId, null, null);
Page tmpPage = DataManager.createSimplePage(
wikipedia, page, pageId, null, null);
if (!errors.contains(tmpPage)) {
errors.add(tmpPage);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ public void retrieveConfiguration(
try {
String translationPage = wpcConfiguration.getString(WPCConfigurationString.CW_TRANSLATION_PAGE);
if (translationPage != null) {
Page page = DataManager.getPage(
Page page = DataManager.createSimplePage(
wiki, translationPage,
null, null, null);
api.retrieveContents(wiki, Collections.singleton(page), false, false);
Expand All @@ -313,7 +313,7 @@ public void retrieveConfiguration(
for (int i = 0; i < CWConfiguration.MAX_ERROR_NUMBER; i++) {
CWConfigurationError error = cwConfiguration.getErrorConfiguration(i);
if ((error != null) && (error.getWhiteListPageName() != null)) {
Page page = DataManager.getPage(
Page page = DataManager.createSimplePage(
wiki, error.getWhiteListPageName(), null, null, null);
whiteListPages.put(error.getWhiteListPageName(), page);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ public void computeLinkInformation() {
endIndex++;
}
}
Page articlePage = DataManager.getPage(
Page articlePage = DataManager.createSimplePage(
analysis.getWikipedia(), article, null, null, null);
needColon = Boolean.FALSE;
if (articlePage.getNamespace() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public boolean analyze(
if ((linkDest != null) &&
(linkDest.trim().length() > 0) &&
(linkDest.indexOf(':') >= 0)) {
Page page = DataManager.getPage(wiki, linkDest, null, null, null);
Page page = DataManager.createSimplePage(wiki, linkDest, null, null, null);
Integer namespace = page.getNamespace();
if ((namespace != null) &&
((namespace.intValue() == Namespace.USER) ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
if (category != null) {
API api = APIFactory.getAPI();
String title = wiki.getWikiConfiguration().getPageTitle(Namespace.CATEGORY, category);
Page categoryPage = DataManager.getPage(wiki, title, null, null, null);
Page categoryPage = DataManager.createSimplePage(wiki, title, null, null, Namespace.CATEGORY);
try {
api.retrieveCategoryMembers(wiki, categoryPage, 0, false, limit);
result = categoryPage.getRelatedPages(RelatedPages.CATEGORY_MEMBERS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
// Use internal links
if (dumpAnalysis != null) {
API api = APIFactory.getAPI();
Page page = DataManager.getPage(wiki, dumpAnalysis, null, null, null);
Page page = DataManager.createSimplePage(wiki, dumpAnalysis, null, null, null);
try {
api.retrieveLinks(wiki, page, null, null, false, false);
if (page.getLinks() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
if (category != null) {
API api = APIFactory.getAPI();
String title = wiki.getWikiConfiguration().getPageTitle(Namespace.CATEGORY, category);
Page categoryPage = DataManager.getPage(wiki, title, null, null, null);
Page categoryPage = DataManager.createSimplePage(wiki, title, null, null, Namespace.CATEGORY);
try {
api.retrieveCategoryMembers(wiki, categoryPage, 0, false, limit);
result = categoryPage.getRelatedPages(RelatedPages.CATEGORY_MEMBERS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
if (category != null) {
API api = APIFactory.getAPI();
String title = wiki.getWikiConfiguration().getPageTitle(Namespace.CATEGORY, category);
Page categoryPage = DataManager.getPage(wiki, title, null, null, null);
Page categoryPage = DataManager.createSimplePage(wiki, title, null, null, Namespace.CATEGORY);
try {
api.retrieveCategoryMembers(wiki, categoryPage, 0, false, limit);
result = categoryPage.getRelatedPages(RelatedPages.CATEGORY_MEMBERS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
if (category != null) {
API api = APIFactory.getAPI();
String title = wiki.getWikiConfiguration().getPageTitle(Namespace.CATEGORY, category);
Page categoryPage = DataManager.getPage(wiki, title, null, null, null);
Page categoryPage = DataManager.createSimplePage(wiki, title, null, null, Namespace.CATEGORY);
try {
api.retrieveCategoryMembers(wiki, categoryPage, 0, false, limit);
result = categoryPage.getRelatedPages(RelatedPages.CATEGORY_MEMBERS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
if (category != null) {
API api = APIFactory.getAPI();
String title = wiki.getWikiConfiguration().getPageTitle(Namespace.CATEGORY, category);
Page categoryPage = DataManager.getPage(wiki, title, null, null, null);
Page categoryPage = DataManager.createSimplePage(wiki, title, null, null, Namespace.CATEGORY);
try {
api.retrieveCategoryMembers(wiki, categoryPage, 0, false, limit);
result = categoryPage.getRelatedPages(RelatedPages.CATEGORY_MEMBERS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
API api = APIFactory.getAPI();
for (String categoryName : categoriesName) {
String title = wiki.getWikiConfiguration().getPageTitle(Namespace.CATEGORY, categoryName);
Page category = DataManager.getPage(wiki, title, null, null, null);
Page category = DataManager.createSimplePage(wiki, title, null, null, Namespace.CATEGORY);
try {
api.retrieveCategoryMembers(wiki, category, 0, false, limit);
List<Page> tmp = category.getRelatedPages(RelatedPages.CATEGORY_MEMBERS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ public void initSuggestions(API api, boolean forceInit) {
if (elements.length >= 4) {
String pageName = elements[0];
if (!pages.containsKey(pageName)) {
pages.put(pageName, DataManager.getPage(wiki, pageName, null, null, null));
pages.put(pageName, DataManager.createSimplePage(wiki, pageName, null, null, null));
}
}
}
Expand All @@ -637,7 +637,7 @@ public void initSuggestions(API api, boolean forceInit) {
if (suggestionTypoPages != null) {
for (String suggestionPage : suggestionTypoPages) {
if (!pages.containsKey(suggestionPage)) {
pages.put(suggestionPage, DataManager.getPage(wiki, suggestionPage, null, null, null));
pages.put(suggestionPage, DataManager.createSimplePage(wiki, suggestionPage, null, null, null));
}
}
}
Expand Down Expand Up @@ -996,7 +996,7 @@ private void setDisambiguationCategories(String value) {
this.disambiguationCategories = new ArrayList<>(tmp.size());
for (String category : tmp) {
this.disambiguationCategories.add(
DataManager.getPage(wiki, category, null, null, null));
DataManager.createSimplePage(wiki, category, null, null, null));
}
} else {
this.disambiguationCategories = null;
Expand Down Expand Up @@ -1356,7 +1356,7 @@ public List<Page> getTemplatesForHelpRequested() {
for (String template : templatesForHelpRequested) {
String title = wiki.getWikiConfiguration().getPageTitle(
Namespace.TEMPLATE, template);
tmp.add(DataManager.getPage(wiki, title, null, null, null));
tmp.add(DataManager.createSimplePage(wiki, title, null, null, Namespace.TEMPLATE));
}
return tmp;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ public Boolean isDisambiguationPage(Page page) {
public void initDisambiguationTemplates(API api) {
if (disambiguationTemplates == null) {
synchronized (api) {
Page page = DataManager.getPage(
Page page = DataManager.createSimplePage(
this, "Mediawiki:Disambiguationspage",
null, null, null);
try {
Expand Down
78 changes: 55 additions & 23 deletions WikipediaCleaner/src/org/wikipediacleaner/api/data/DataManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,47 +54,79 @@ public static Page getExistingPage(
}

/**
* Create a page.
*
* @param wiki Wiki.
* @param title Page title.
* @param pageId Page id.
* @param revisionId Revision id.
* @param knownPages Already known pages.
* @return The requested page.
* @param title Page title (full title including the optional namespace)
* @param pageId Page identifier.
* @param revisionId Revision identifier.
* @param namespaceHint Suggestion for the namespace.
* @return Page.
*/
public static Page getPage(
public static Page createSimplePage(
EnumWikipedia wiki,
String title, Integer pageId, String revisionId,
List<Page> knownPages) {
Integer namespaceHint) {

// Check in the known pages
Page page = getExistingPage(wiki, title, revisionId, knownPages);
if (page != null) {
return page;
}

// Retrieve page
page = new Page(wiki, title);
// Create page
Page page = new Page(wiki, title);
page.setPageId(pageId);
page.setRevisionId(revisionId);

// Manage namespace
Integer namespaceId = null;
if (page.getTitle() != null) {
int colonIndex = page.getTitle().indexOf(':');
if (colonIndex > 0) {
String namespaceText = page.getTitle().substring(0, colonIndex);
List<Namespace> namespaces = wiki.getWikiConfiguration().getNamespaces();
if (namespaces != null) {
for (Namespace namespace : namespaces) {
if (namespace.isPossibleName(namespaceText)) {
page.setNamespace(namespace.getId());
if (namespaceHint != null) {
Namespace namespace = wiki.getWikiConfiguration().getNamespace(namespaceHint);
if ((namespace != null) && (namespace.isPossibleName(namespaceText))) {
namespaceId = namespace.getId();
}
}
if (namespaceId == null) {
List<Namespace> namespaces = wiki.getWikiConfiguration().getNamespaces();
if (namespaces != null) {
for (Namespace namespace : namespaces) {
if (namespace.isPossibleName(namespaceText)) {
namespaceId = namespace.getId();
break;
}
}
}
}
}
if (page.getNamespace() == null) {
page.setNamespace(Namespace.MAIN);
}
}
if (namespaceId == null) {
namespaceId = Namespace.MAIN;
}
page.setNamespace(namespaceId);

return page;
}

/**
* @param wiki Wiki.
* @param title Page title.
* @param pageId Page id.
* @param revisionId Revision id.
* @param knownPages Already known pages.
* @return The requested page.
*/
public static Page getPage(
EnumWikipedia wiki,
String title, Integer pageId, String revisionId,
List<Page> knownPages) {

// Check in the known pages
Page page = getExistingPage(wiki, title, revisionId, knownPages);
if (page != null) {
return page;
}

// Create page
page = createSimplePage(wiki, title, pageId, revisionId, null);

// Manage comments
Configuration config = Configuration.getConfiguration();
Expand Down
9 changes: 5 additions & 4 deletions WikipediaCleaner/src/org/wikipediacleaner/api/data/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ public Page getArticlePage() {
if (articlePageName == null) {
return null;
}
Page articlePage = DataManager.getPage(
Page articlePage = DataManager.createSimplePage(
getWikipedia(), articlePageName, null, null, null);
return articlePage;
}
Expand Down Expand Up @@ -514,7 +514,7 @@ public Page getTalkPage() {
if (talkPageName == null) {
return null;
}
Page talkPage = DataManager.getPage(
Page talkPage = DataManager.createSimplePage(
getWikipedia(), talkPageName, null, null, null);
return talkPage;
}
Expand Down Expand Up @@ -553,8 +553,9 @@ public String getTalkPageName() {
* @return Subpage.
*/
public Page getSubPage(String subpage) {
Page subPage = DataManager.getPage(
getWikipedia(), getTitle() + "/" + subpage, null, null, null);
Page subPage = DataManager.createSimplePage(
getWikipedia(), getTitle() + "/" + subpage,
null, null, getNamespace());
return subPage;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public class StaticPageListProvider implements PageListProvider {
* @param title Title of the page to provide.
*/
public StaticPageListProvider(EnumWikipedia wiki, String title) {
this.pages = Collections.singletonList(DataManager.getPage(wiki, title, null, null, null));
this.pages = Collections.singletonList(DataManager.createSimplePage(wiki, title, null, null, null));
}

/**
Expand Down
26 changes: 12 additions & 14 deletions WikipediaCleaner/src/org/wikipediacleaner/api/dump/PageHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,19 @@ public void endElement(String uri, String localName, String qName) throws SAXExc
if (processor != null) {
increasePageCount();
try {
Integer namespaceNum = Integer.valueOf(namespace.toString());
if (processor.isForNamespace(namespaceNum)) {
Page page = DataManager.getPage(
processor.getWiki(), title.toString(),
Integer.valueOf(pageId.toString(), 10), revisionId.toString(),
null);
page.setNamespace(namespaceNum);
page.setContents(revisionText.toString());
if (redirect.length() > 0) {
PageRedirect redirects = page.getRedirects();
redirects.isRedirect(true);
redirects.add(DataManager.getPage(processor.getWiki(), redirect.toString(), null, null, null), null);
}
processor.processPage(page);
Integer namespaceId = Integer.valueOf(namespace.toString());
Page currentPage = DataManager.createSimplePage(
processor.getWiki(), title.toString(),
Integer.valueOf(pageId.toString(), 10), revisionId.toString(),
namespaceId);
currentPage.setNamespace(namespaceId);
currentPage.setContents(revisionText.toString());
if (redirect.length() > 0) {
PageRedirect redirects = currentPage.getRedirects();
redirects.isRedirect(true);
redirects.add(DataManager.createSimplePage(processor.getWiki(), redirect.toString(), null, null, null), null);
}
processor.processPage(currentPage);
} catch (NumberFormatException e) {
log.error("Problem in endElement: " + e.getMessage());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,13 @@ public void loadConfiguration(

// Decide which pages to be retrieved
String configPageName = wiki.getConfigurationPage();
Page page = DataManager.getPage(
Page page = DataManager.createSimplePage(
wiki, configPageName, null, null, null);
Page userConfigPage = null;
if ((userName != null) && (userName.trim().length() > 0) &&
(wiki.getUserConfigurationPage(userName) != null) &&
(!Page.areSameTitle(wiki.getUserConfigurationPage(userName), configPageName))) {
userConfigPage = DataManager.getPage(
userConfigPage = DataManager.createSimplePage(
wiki,
wiki.getUserConfigurationPage(userName),
null, null, null);
Expand Down Expand Up @@ -1168,7 +1168,7 @@ public String getLanguageLink(EnumWikipedia from, EnumWikipedia to, String title
throws APIException {
ApiLanguageLinksResult result = new ApiXmlLanguageLinksResult(from, httpClient);
ApiLanguageLinksRequest request = new ApiLanguageLinksRequest(from, result);
return request.getLanguageLink(DataManager.getPage(from, title, null, null, null), to);
return request.getLanguageLink(DataManager.createSimplePage(from, title, null, null, null), to);
}

// ==========================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ public void recentChanges(List<RecentChange> newRC, Date currentTime) {
// Check if an update has been made on a monitored page
for (RecentChange rc : filteredNewRC) {
if (monitoredPages.containsKey(rc.getTitle())) {
Page page = DataManager.getPage(getWikipedia(), rc.getTitle(), null, null, null);
Page page = DataManager.createSimplePage(
getWikipedia(), rc.getTitle(), null, null, null);
try {
updateDabWarning.updateWarning(
Collections.singletonList(page), null, null, null);
Expand Down Expand Up @@ -295,7 +296,7 @@ public void recentChanges(List<RecentChange> newRC, Date currentTime) {
if (oldEnough) {
modelRCInteresting.removeRecentChanges(title);
if (!redirect) {
Page page = DataManager.getPage(getWikipedia(), title, null, null, null);
Page page = DataManager.createSimplePage(getWikipedia(), title, null, null, null);
pages.add(page);
creators.put(title, creator);
modifiers.put(title, pageModifiers);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ protected void clean() {
if (getTextContents() != null) {
getTextContents().setText(null);
}
page = DataManager.getPage(getWikipedia(), getTextPageName(), null, null, null);
page = DataManager.createSimplePage(getWikipedia(), getTextPageName(), null, null, null);
updateComponentState();
}

Expand Down
Loading

0 comments on commit 311eddb

Please sign in to comment.