Skip to content

Commit

Permalink
Merge pull request #507 from geekwright/addstopwords
Browse files Browse the repository at this point in the history
Add Xmf\StopWords class
  • Loading branch information
geekwright committed Jan 11, 2017
2 parents 602fa9f + 5370af6 commit 7be879d
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 50 deletions.
70 changes: 32 additions & 38 deletions htdocs/xoops_lib/Xmf/Metagen.php
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ public static function generateKeywords(
);

foreach ($originalKeywords as $originalKeyword) {
if (static::checkStopWords($originalKeyword)) {
if (static::stopWordsObject()->check($originalKeyword)) {
$secondRoundKeywords = explode("'", $originalKeyword);
foreach ($secondRoundKeywords as $secondRoundKeyword) {
if (static::checkStopWords($secondRoundKeyword)
if (static::stopWordsObject()->check($secondRoundKeyword)
&& strlen($secondRoundKeyword) >= $minLength
) {
$keyCount[$secondRoundKeyword] =
Expand All @@ -173,39 +173,6 @@ public static function generateKeywords(
return $keywords;
}

/**
* checkStopWords - look up a word in a list of stop words and
* classify it as a significant word or a stop word.
*
* @param string $key the word to check
*
* @return bool True if word is significant, false if it is a stop word
*/
public static function checkStopWords($key)
{
static $stopwords = null;

if (!$stopwords) {
if (!defined('_XMF_STOPWORDS')) {
Language::load('stopwords');
}
if (defined('_XMF_STOPWORDS')) {
$sw = explode(' ', _XMF_STOPWORDS);
$stopwords = array_fill_keys($sw, true);
} else {
$stopwords = array('_' => true);
}
}
if (!empty($stopwords)) {
if (function_exists('mb_strtolower')) {
return !isset($stopwords[mb_strtolower($key, static::ENCODING)]);
} else {
return !isset($stopwords[strtolower($key)]);
}
}
return true;
}

/**
* generateDescription - generate a short description from a body of text
*
Expand Down Expand Up @@ -306,7 +273,7 @@ public static function generateSeoTitle($title = '', $extension = '')

$tableau = explode("-", $title);
$tableau = array_filter($tableau, 'static::nonEmptyString');
$tableau = array_filter($tableau, 'static::checkStopWords');
$tableau = array_filter($tableau, array(static::stopWordsObject(), 'check'));
$title = implode("-", $tableau);

$title = (empty($title)) ? '' : $title . $extension;
Expand Down Expand Up @@ -432,14 +399,13 @@ protected static function getNeedlePositions($haystack, $needles)
*/
protected static function purifyText($text, $keyword = false)
{
$myts = \MyTextSanitizer::getInstance();
$text = str_replace(' ', ' ', $text);
$text = str_replace('<br />', ' ', $text);
$text = str_replace('<br/>', ' ', $text);
$text = str_replace('<br', ' ', $text);
$text = strip_tags($text);
$text = html_entity_decode($text);
$text = $myts->undoHtmlSpecialChars($text);
$text = htmlspecialchars_decode($text, ENT_QUOTES);
$text = str_replace(')', ' ', $text);
$text = str_replace('(', ' ', $text);
$text = str_replace(':', ' ', $text);
Expand Down Expand Up @@ -518,4 +484,32 @@ function ($matches) {

return $text;
}

/**
* checkStopWords - look up a word in a list of stop words and
* classify it as a significant word or a stop word.
*
* @param string $key the word to check
*
* @return bool True if word is significant, false if it is a stop word
* @deprecated since v1.2.0 - use Xmf\StopWords::check()
*/
public static function checkStopWords($key)
{
return static::stopWordsObject()->check($key);
}

/**
* Get a StopWords object
*
* @return StopWords
*/
protected static function stopWordsObject()
{
static $object;
if (null === $object) {
$object = new StopWords();
}
return $object;
}
}
68 changes: 68 additions & 0 deletions htdocs/xoops_lib/Xmf/StopWords.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php
/*
You may not change or alter any portion of this comment or credits
of supporting developers from this source code or any supporting source code
which is considered copyrighted (c) material of the original comment or credit authors.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/

namespace Xmf;

/**
* StopWords - facilitate filtering of common or purely connective words for natural language processing
*
* @category Xmf\StopWords
* @package Xmf
* @author Richard Griffith <richard@geekwright.com>
* @author trabis <lusopoemas@gmail.com>
* @copyright 2011-2016 XOOPS Project (http://xoops.org)
* @license GNU GPL 2 or later (http://www.gnu.org/licenses/gpl-2.0.html)
* @link http://xoops.org
* @see https://en.wikipedia.org/wiki/Stop_words
*/
class StopWords
{

/**
* mbstring encoding
*/
const ENCODING = 'UTF-8';

/** @var string[] */
protected $stopwordList = array();

/**
* StopWords constructor - load stop words for current locale
*
* @todo specify locale to constructor, will require shift away from defined constant
*/
public function __construct()
{
if (!defined('_XMF_STOPWORDS')) {
Language::load('stopwords');
}
if (defined('_XMF_STOPWORDS')) {
$sw = explode(' ', _XMF_STOPWORDS);
$this->stopwordList = array_fill_keys($sw, true);
}
}

/**
* check - look up a word in a list of stop words and
* classify it as a significant word or a stop word.
*
* @param string $key the word to check
*
* @return bool True if word is significant, false if it is a stop word
*/
public function check($key)
{
$key = function_exists('mb_strtolower')
? mb_strtolower($key, static::ENCODING)
: strtolower($key);
return !isset($this->stopwordList[$key]);
}
}
12 changes: 0 additions & 12 deletions tests/unit/xoopsLib/Xmf/MetagenTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -134,18 +134,6 @@ public function testGenerateKeywords()
$this->assertFalse(in_array('wombat', $keys));
}

/**
* @covers Xmf\Metagen::checkStopWords
*/
public function testCheckStopWords()
{
$method = new \ReflectionMethod('Xmf\Metagen', 'checkStopWords');
$method->setAccessible(true);
$this->assertTrue($method->invokeArgs($this->object, array('XOOPS')));
$this->assertFalse($method->invokeArgs($this->object, array('is')));
$this->assertFalse($method->invokeArgs($this->object, array('IS')));
}

/**
* @covers Xmf\Metagen::generateDescription
*/
Expand Down
49 changes: 49 additions & 0 deletions tests/unit/xoopsLib/Xmf/StopWordsTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php
namespace Xmf;

require_once(dirname(__FILE__).'/../../init_new.php');

/**
* Generated by PHPUnit_SkeletonGenerator 1.2.1 on 2014-05-22 at 19:56:36.
*/

/**
* PHPUnit special settings :
* @backupGlobals disabled
* @backupStaticAttributes disabled
*/

class StopWordsTest extends \PHPUnit_Framework_TestCase
{
/**
* @var StopWords
*/
protected $object;

/**
* Sets up the fixture, for example, opens a network connection.
* This method is called before a test is executed.
*/
protected function setUp()
{
$this->object = new StopWords;
}

/**
* Tears down the fixture, for example, closes a network connection.
* This method is called after a test is executed.
*/
protected function tearDown()
{
}

/**
* @covers Xmf\StopWords::check
*/
public function testCheck()
{
$this->assertTrue($this->object->check('XOOPS'));
$this->assertFalse($this->object->check('is'));
$this->assertFalse($this->object->check('IS'));
}
}

0 comments on commit 7be879d

Please sign in to comment.