Permalink
Browse files

Adds a method that gets all distinct documents from MediaWiki.

  • Loading branch information...
1 parent 9f1625c commit dea12e1f2fbaa1bbb237d00be249ea18b3ef3e74 Jim Safley committed May 16, 2011
Showing with 78 additions and 5 deletions.
  1. +62 −5 lib/Scripto.php
  2. +16 −0 lib/Scripto/Service/MediaWiki.php
View
@@ -11,6 +11,11 @@
require_once 'Scripto/Exception.php';
/**
+ * @see Scripto_Document
+ */
+require_once 'Scripto/Document.php';
+
+/**
* @see Scripto_Service_MediaWiki
*/
require_once 'Scripto/Service/MediaWiki.php';
@@ -100,7 +105,6 @@ public function documentExists($id)
*/
public function getDocument($id)
{
- require_once 'Scripto/Document.php';
return new Scripto_Document($id, $this->_adapter, $this->_mediawiki);
}
@@ -206,8 +210,6 @@ public function getUserName()
*/
public function getUserDocumentPages($limit = 10)
{
- require_once 'Scripto/Document.php';
-
$limit = (int) $limit;
$userDocumentPages = array();
$documentTitles = array();
@@ -288,8 +290,6 @@ public function getUserDocumentPages($limit = 10)
*/
public function getRecentChanges($limit = 10)
{
- require_once 'Scripto/Document.php';
-
$start = null;
$recentChanges = array();
$documentTitles = array();
@@ -377,6 +377,63 @@ public function getRecentChanges($limit = 10)
}
/**
+ * Get all documents from MediaWiki that have at least one page with text.
+ *
+ * @uses Scripto_Service_MediaWiki::getAllDocuments()
+ * @return array
+ */
+ public function getAllDocuments()
+ {
+ $from = null;
+ $documentTitles = array();
+ $allDocuments = array();
+ do {
+ $response = $this->_mediawiki->getAllPages(
+ array('aplimit' => 500,
+ 'apminsize' => 1,
+ 'apprefix' => Scripto_Document::BASE_TITLE_PREFIX,
+ 'apfrom' => $from)
+ );
+
+ foreach ($response['query']['allpages'] as $value) {
+
+ // Set the document ID and page ID.
+ $documentIds = Scripto_Document::decodeBaseTitle($value['title']);
+
+ // Set the document title. Continue if it was already set.
+ if (array_key_exists($documentIds[0], $documentTitles)) {
+ continue;
+ } else {
+ // Before getting the title, filter out pages that are not
+ // valid documents.
+ if (!$this->_adapter->documentExists($documentIds[0])) {
+ continue;
+ }
+ $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]);
+ $documentTitles[$documentIds[0]] = $documentTitle;
+ }
+
+ $allDocuments[] = array(
+ 'mediawiki_title_prefix' => Scripto_Document::BASE_TITLE_PREFIX
+ . Scripto_Document::base64UrlEncode($documentIds[0]),
+ 'document_id' => $documentIds[0],
+ 'document_title' => $documentTitle,
+ );
+ }
+
+ // Set the query continue, if any.
+ if (isset($response['query-continue'])) {
+ $from = $response['query-continue']['allpages']['apfrom'];
+ } else {
+ $from = null;
+ }
+
+ } while ($from);
+
+ return $allDocuments;
+ }
+
+ /**
* Get the difference between two page revisions.
*
* @uses Scripto_Service_MediaWiki::getRevisionDiff()
@@ -79,6 +79,9 @@ class Scripto_Service_MediaWiki extends Zend_Service_Abstract
'uccontinue', 'ucdir', 'uclimit', 'ucnamespace', 'ucshow',
'rcprop', 'rcstart', 'rcend', 'rcdir', 'rclimit', 'rcnamespace',
'rcuser', 'rcexcludeuser', 'rctype', 'rcshow',
+ 'aplimit', 'apminsize', 'apmaxsize', 'apprefix', 'apfrom',
+ 'apnamespace', 'apfilterredir', 'apfilterlanglinks', 'apprtype',
+ 'apprlevel', 'apdir',
),
'login' => array(
'lgname', 'lgpassword', 'lgtoken'
@@ -178,6 +181,19 @@ public function getRecentChanges(array $params = array())
}
/**
+ * Gets a list of pages.
+ *
+ * @link http://www.mediawiki.org/wiki/API:Allpages
+ * @param array $params
+ * @return array
+ */
+ public function getAllPages(array $params = array())
+ {
+ $params['list'] = 'allpages';
+ return $this->query($params);
+ }
+
+ /**
* Gets basic page information.
*
* @link http://www.mediawiki.org/wiki/API:Properties#info_.2F_in

0 comments on commit dea12e1

Please sign in to comment.