Skip to content

Commit

Permalink
Merge pull request #148 from michitux/indexer
Browse files Browse the repository at this point in the history
Indexer: Add the PID to INDEXER_PAGE_ADD and add getPID/getPageFromPID functions
  • Loading branch information
michitux committed Jan 6, 2013
2 parents b9ad156 + bff17c5 commit fdf855d
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 69 deletions.
2 changes: 2 additions & 0 deletions _test/core/DokuWikiTest.php
Expand Up @@ -30,6 +30,8 @@ public static function setUpBeforeClass() {

// remove any leftovers from the last run
if(is_dir(DOKU_TMP_DATA)){
// clear indexer data and cache
idx_get_indexer()->clear();
TestUtils::rdelete(DOKU_TMP_DATA);
}

Expand Down
18 changes: 18 additions & 0 deletions _test/tests/inc/indexer_pid.test.php
@@ -0,0 +1,18 @@
<?php
/**
* Tests the pid functions of the indexer.
*
* @author Michael Hamann <michael@content-space.de>
*/
class indexer_pid_test extends DokuWikiTest {
function test_pid() {
$indexer = idx_get_indexer();
$syntaxPID = $indexer->getPID('wiki:syntax');
$this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\'');
$dokuwikiPID = $indexer->getPID('wiki:dokuwiki');
$this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\' after getting the PID for wiki:dokuwiki');
$this->assertEquals($syntaxPID, $indexer->getPID('wiki:syntax'), 'getPID(\'wiki:syntax\') didn\'t returned different PIDs when called twice');
$this->assertNotEquals($syntaxPID, $dokuwikiPID, 'Same PID returned for different pages');
$this->assertTrue(is_numeric($syntaxPID) && is_numeric($dokuwikiPID), 'PIDs are not numeric');
}
}
62 changes: 1 addition & 61 deletions bin/indexer.php
Expand Up @@ -5,11 +5,6 @@
ini_set('memory_limit','128M');
if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
require_once(DOKU_INC.'inc/init.php');
require_once(DOKU_INC.'inc/common.php');
require_once(DOKU_INC.'inc/pageutils.php');
require_once(DOKU_INC.'inc/search.php');
require_once(DOKU_INC.'inc/indexer.php');
require_once(DOKU_INC.'inc/auth.php');
require_once(DOKU_INC.'inc/cliopts.php');
session_write_close();

Expand Down Expand Up @@ -67,10 +62,6 @@ function _usage() {

function _update(){
global $conf;
global $INDEXER;

$INDEXER = idx_get_indexer();

$data = array();
_quietecho("Searching pages... ");
search($data,$conf['datadir'],'search_allpages',array('skipacl' => true));
Expand All @@ -82,7 +73,6 @@ function _update(){
}

function _index($id){
global $INDEXER;
global $CLEAR;
global $QUIET;

Expand All @@ -91,63 +81,13 @@ function _index($id){
_quietecho("done.\n");
}

/**
* lock the indexer system
*/
function _lock(){
global $conf;
$lock = $conf['lockdir'].'/_indexer.lock';
$said = false;
while(!@mkdir($lock, $conf['dmode'])){
if(time()-@filemtime($lock) > 60*5){
// looks like a stale lock - remove it
@rmdir($lock);
}else{
if($said){
_quietecho(".");
}else{
_quietecho("Waiting for lockfile (max. 5 min)");
$said = true;
}
sleep(15);
}
}
if($conf['dperm']) chmod($lock, $conf['dperm']);
if($said) _quietecho("\n");
}

/**
* unlock the indexer sytem
*/
function _unlock(){
global $conf;
$lock = $conf['lockdir'].'/_indexer.lock';
@rmdir($lock);
}

/**
* Clear all index files
*/
function _clearindex(){
global $conf;
_lock();
_quietecho("Clearing index... ");
io_saveFile($conf['indexdir'].'/page.idx','');
io_saveFile($conf['indexdir'].'/title.idx','');
io_saveFile($conf['indexdir'].'/pageword.idx','');
io_saveFile($conf['indexdir'].'/metadata.idx','');
$dir = @opendir($conf['indexdir']);
if($dir!==false){
while(($f = readdir($dir)) !== false){
if(substr($f,-4)=='.idx' &&
(substr($f,0,1)=='i' || substr($f,0,1)=='w'
|| substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx'))
@unlink($conf['indexdir']."/$f");
}
}
@unlink($conf['indexdir'].'/lengths.idx');
idx_get_indexer()->clear();
_quietecho("done.\n");
_unlock();
}

function _quietecho($msg) {
Expand Down
109 changes: 101 additions & 8 deletions inc/indexer.php
Expand Up @@ -102,6 +102,10 @@ function wordlen($w){
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
class Doku_Indexer {
/**
* @var array $pidCache Cache for getPID()
*/
protected $pidCache = array();

/**
* Adds the contents of a page to the fulltext index
Expand All @@ -120,7 +124,7 @@ public function addPageWords($page, $text) {
return "locked";

// load known documents
$pid = $this->addIndexKey('page', '', $page);
$pid = $this->getPIDNoLock($page);
if ($pid === false) {
$this->unlock();
return false;
Expand Down Expand Up @@ -256,7 +260,7 @@ public function addMetaKeys($page, $key, $value=null) {
return "locked";

// load known documents
$pid = $this->addIndexKey('page', '', $page);
$pid = $this->getPIDNoLock($page);
if ($pid === false) {
$this->unlock();
return false;
Expand Down Expand Up @@ -348,7 +352,7 @@ public function deletePage($page) {
return "locked";

// load known documents
$pid = $this->addIndexKey('page', '', $page);
$pid = $this->getPIDNoLock($page);
if ($pid === false) {
$this->unlock();
return false;
Expand Down Expand Up @@ -397,6 +401,38 @@ public function deletePage($page) {
return true;
}

/**
* Clear the whole index
*
* @return bool If the index has been cleared successfully
*/
public function clear() {
global $conf;

if (!$this->lock()) return false;

@unlink($conf['indexdir'].'/page.idx');
@unlink($conf['indexdir'].'/title.idx');
@unlink($conf['indexdir'].'/pageword.idx');
@unlink($conf['indexdir'].'/metadata.idx');
$dir = @opendir($conf['indexdir']);
if($dir!==false){
while(($f = readdir($dir)) !== false){
if(substr($f,-4)=='.idx' &&
(substr($f,0,1)=='i' || substr($f,0,1)=='w'
|| substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx'))
@unlink($conf['indexdir']."/$f");
}
}
@unlink($conf['indexdir'].'/lengths.idx');

// clear the pid cache
$this->pidCache = array();

$this->unlock();
return true;
}

/**
* Split the text into words for fulltext search
*
Expand Down Expand Up @@ -453,6 +489,58 @@ public function tokenizer($text, $wc=false) {
return array_values($wordlist);
}

/**
* Get the numeric PID of a page
*
* @param string $page The page to get the PID for
* @return bool|int The page id on success, false on error
*/
public function getPID($page) {
// return PID without locking when it is in the cache
if (isset($this->pidCache[$page])) return $this->pidCache[$page];

if (!$this->lock())
return false;

// load known documents
$pid = $this->getPIDNoLock($page);
if ($pid === false) {
$this->unlock();
return false;
}

$this->unlock();
return $pid;
}

/**
* Get the numeric PID of a page without locking the index.
* Only use this function when the index is already locked.
*
* @param string $page The page to get the PID for
* @return bool|int The page id on success, false on error
*/
protected function getPIDNoLock($page) {
// avoid expensive addIndexKey operation for the most recently requested pages by using a cache
if (isset($this->pidCache[$page])) return $this->pidCache[$page];
$pid = $this->addIndexKey('page', '', $page);
// limit cache to 10 entries by discarding the oldest element as in DokuWiki usually only the most recently
// added item will be requested again
if (count($this->pidCache) > 10) array_shift($this->pidCache);
$this->pidCache[$page] = $pid;
return $pid;
}

/**
* Get the page id of a numeric PID
*
* @param int $pid The PID to get the page id for
* @return string The page id
*/
public function getPageFromPID($pid) {
return $this->getIndexKey('page', '', $pid);
}

/**
* Find pages in the fulltext index containing the words,
*
Expand Down Expand Up @@ -946,7 +1034,7 @@ protected function saveIndexKey($idx, $suffix, $id, $line) {
* @param string $idx name of the index
* @param string $suffix subpart identifier
* @param string $value line to find in the index
* @return int line number of the value in the index
* @return int|bool line number of the value in the index or false if writing the index failed
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function addIndexKey($idx, $suffix, $value) {
Expand Down Expand Up @@ -1140,8 +1228,8 @@ protected function countTuples($line) {
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
function idx_get_indexer() {
static $Indexer = null;
if (is_null($Indexer)) {
static $Indexer;
if (!isset($Indexer)) {
$Indexer = new Doku_Indexer();
}
return $Indexer;
Expand Down Expand Up @@ -1223,21 +1311,26 @@ function idx_addPage($page, $verbose=false, $force=false) {
return $result;
}

$Indexer = idx_get_indexer();
$pid = $Indexer->getPID($page);
if ($pid === false) {
if ($verbose) print("Indexer: getting the PID failed for $page".DOKU_LF);
return false;
}
$body = '';
$metadata = array();
$metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED);
if (($references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED)) !== null)
$metadata['relation_references'] = array_keys($references);
else
$metadata['relation_references'] = array();
$data = compact('page', 'body', 'metadata');
$data = compact('page', 'body', 'metadata', 'pid');
$evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
if ($evt->advise_before()) $data['body'] = $data['body'] . " " . rawWiki($page);
$evt->advise_after();
unset($evt);
extract($data);

$Indexer = idx_get_indexer();
$result = $Indexer->addPageWords($page, $body);
if ($result === "locked") {
if ($verbose) print("Indexer: locked".DOKU_LF);
Expand Down

0 comments on commit fdf855d

Please sign in to comment.