-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #507 from geekwright/addstopwords
Add Xmf\StopWords class
- Loading branch information
Showing
4 changed files
with
149 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
<?php | ||
/* | ||
You may not change or alter any portion of this comment or credits | ||
of supporting developers from this source code or any supporting source code | ||
which is considered copyrighted (c) material of the original comment or credit authors. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
*/ | ||
|
||
namespace Xmf; | ||
|
||
/** | ||
* StopWords - facilitate filtering of common or purely connective words for natural language processing | ||
* | ||
* @category Xmf\StopWords | ||
* @package Xmf | ||
* @author Richard Griffith <richard@geekwright.com> | ||
* @author trabis <lusopoemas@gmail.com> | ||
* @copyright 2011-2016 XOOPS Project (http://xoops.org) | ||
* @license GNU GPL 2 or later (http://www.gnu.org/licenses/gpl-2.0.html) | ||
* @link http://xoops.org | ||
* @see https://en.wikipedia.org/wiki/Stop_words | ||
*/ | ||
class StopWords | ||
{ | ||
|
||
/** | ||
* mbstring encoding | ||
*/ | ||
const ENCODING = 'UTF-8'; | ||
|
||
/** @var string[] */ | ||
protected $stopwordList = array(); | ||
|
||
/** | ||
* StopWords constructor - load stop words for current locale | ||
* | ||
* @todo specify locale to constructor, will require shift away from defined constant | ||
*/ | ||
public function __construct() | ||
{ | ||
if (!defined('_XMF_STOPWORDS')) { | ||
Language::load('stopwords'); | ||
} | ||
if (defined('_XMF_STOPWORDS')) { | ||
$sw = explode(' ', _XMF_STOPWORDS); | ||
$this->stopwordList = array_fill_keys($sw, true); | ||
} | ||
} | ||
|
||
/** | ||
* check - look up a word in a list of stop words and | ||
* classify it as a significant word or a stop word. | ||
* | ||
* @param string $key the word to check | ||
* | ||
* @return bool True if word is significant, false if it is a stop word | ||
*/ | ||
public function check($key) | ||
{ | ||
$key = function_exists('mb_strtolower') | ||
? mb_strtolower($key, static::ENCODING) | ||
: strtolower($key); | ||
return !isset($this->stopwordList[$key]); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
<?php | ||
namespace Xmf; | ||
|
||
require_once(dirname(__FILE__).'/../../init_new.php'); | ||
|
||
/** | ||
* Generated by PHPUnit_SkeletonGenerator 1.2.1 on 2014-05-22 at 19:56:36. | ||
*/ | ||
|
||
/** | ||
* PHPUnit special settings : | ||
* @backupGlobals disabled | ||
* @backupStaticAttributes disabled | ||
*/ | ||
|
||
class StopWordsTest extends \PHPUnit_Framework_TestCase | ||
{ | ||
/** | ||
* @var StopWords | ||
*/ | ||
protected $object; | ||
|
||
/** | ||
* Sets up the fixture, for example, opens a network connection. | ||
* This method is called before a test is executed. | ||
*/ | ||
protected function setUp() | ||
{ | ||
$this->object = new StopWords; | ||
} | ||
|
||
/** | ||
* Tears down the fixture, for example, closes a network connection. | ||
* This method is called after a test is executed. | ||
*/ | ||
protected function tearDown() | ||
{ | ||
} | ||
|
||
/** | ||
* @covers Xmf\StopWords::check | ||
*/ | ||
public function testCheck() | ||
{ | ||
$this->assertTrue($this->object->check('XOOPS')); | ||
$this->assertFalse($this->object->check('is')); | ||
$this->assertFalse($this->object->check('IS')); | ||
} | ||
} |