Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
drupol committed Feb 5, 2018
1 parent 1c9069c commit 553f570
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 26 deletions.
13 changes: 9 additions & 4 deletions README.md
Expand Up @@ -11,7 +11,7 @@ An n-gram of size 1 is referred to as a "unigram"; size 2 is a "bigram" (or, les

## Requirements

* PHP >= 7.0,
* PHP >= 7.0

## Installation

Expand All @@ -26,9 +26,9 @@ Let's say you want to find all the N-Gram of size 3 of the string **hello world*
```php
$word = 'hello world';
$ngram = new \drupol\phpngrams\NGrams();
$ngrams = $ngram->ngramString($word, 3);
$ngrams = $ngram->ngramsString($word, 3);

print_r($ngrams);
print_r(iterator_to_array($ngrams));
/*
[0] => hel
[1] => ell
Expand All @@ -49,7 +49,7 @@ Instead of using a string, you may also use an array as input.
```php
$word = ['h', 'e', 'l', 'l', 'o'];
$ngram = new \drupol\phpngrams\NGrams();
$ngrams = $ngram->ngramArray($word, 3);
$ngrams = $ngram->ngramsArray($word, 3);

print_r($ngrams);
/*
Expand Down Expand Up @@ -94,6 +94,11 @@ Array
*/
```

To reduce to the maximum the memory footprint, the library returns Generators, if you want to get the complete resulting array, use [iterator_to_array()](https://secure.php.net/manual/en/function.iterator-to-array.php).

The library provides and object Ngrams and a trait NgramsTrait.
It's up to you to decide how you want to use the library.

## Tests

Every time the sources are modified, [Travis](https://travis-ci.org/drupol/phpngrams), the continuous integration
Expand Down
12 changes: 6 additions & 6 deletions spec/drupol/phpngrams/NGramsSpec.php
Expand Up @@ -18,7 +18,7 @@ public function it_can_get_ngram_from_a_string()
'he', 'el', 'll', 'lo', 'oh'
];

$this->ngramString('hello', 2)->shouldBe($result);
$this->ngramsString('hello', 2)->shouldIterateAs(new \ArrayIterator($result));
}

public function it_can_get_ngram_from_a_string_with_big_n()
Expand All @@ -27,7 +27,7 @@ public function it_can_get_ngram_from_a_string_with_big_n()
'hello', 'elloh', 'llohe', 'lohel', 'ohell'
];

$this->ngramString('hello', 10)->shouldBe($result);
$this->ngramsString('hello', 10)->shouldIterateAs(new \ArrayIterator($result));
}

public function it_can_get_ngram_from_a_string_without_cycling()
Expand All @@ -36,7 +36,7 @@ public function it_can_get_ngram_from_a_string_without_cycling()
'hel', 'ell', 'llo', 'lo ', 'o w', ' wo', 'wor', 'orl', 'rld'
];

$this->ngramString('hello world', 3, false)->shouldBe($result);
$this->ngramsString('hello world', 3, false)->shouldIterateAs(new \ArrayIterator($result));
}

public function it_can_get_ngram_from_an_array()
Expand All @@ -45,7 +45,7 @@ public function it_can_get_ngram_from_an_array()
['h', 'e'], ['e','l'], ['l','l'], ['l','o'], ['o','h']
];

$this->ngramArray(['h', 'e', 'l', 'l', 'o'], 2)->shouldBe($result);
$this->ngramsArray(['h', 'e', 'l', 'l', 'o'], 2)->shouldIterateAs(new \ArrayIterator($result));
}

public function it_can_get_ngram_from_an_array_with_big_n()
Expand All @@ -58,7 +58,7 @@ public function it_can_get_ngram_from_an_array_with_big_n()
['o','h','e','l','l'],
];

$this->ngramArray(['h', 'e', 'l', 'l', 'o'], 10)->shouldBe($result);
$this->ngramsArray(['h', 'e', 'l', 'l', 'o'], 10)->shouldIterateAs(new \ArrayIterator($result));
}

public function it_can_get_ngram_from_an_array_without_cycling()
Expand All @@ -75,6 +75,6 @@ public function it_can_get_ngram_from_an_array_without_cycling()
['r','l','d'],
];

$this->ngramArray(['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'], 3, false)->shouldBe($result);
$this->ngramsArray(['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'], 3, false)->shouldIterateAs(new \ArrayIterator($result));
}
}
32 changes: 16 additions & 16 deletions src/NGramsTrait.php
Expand Up @@ -9,48 +9,48 @@ trait NGramsTrait
* @param int $n
* @param bool $cyclic
*
* @return array
* @return \Generator
*/
public function ngramArray(array $data, $n = 1, $cyclic = true)
public function ngramsArray(array $data, $n = 1, $cyclic = true)
{
return $this->doNgram($data, $n, $cyclic);
return $this->doNgrams($data, $n, $cyclic);
}

/**
* @param $data
* @param int $n
* @param bool $cyclic
*
* @return array
* @return \Generator
*/
public function ngramString($data, $n = 1, $cyclic = true)
public function ngramsString($data, $n = 1, $cyclic = true)
{
return array_map(function ($data) {
return implode('', $data);
}, $this->doNgram(str_split($data), $n, $cyclic));
foreach ($this->doNgrams(str_split($data), $n, $cyclic) as $data) {
yield implode('', $data);
}
}

/**
* @param $data
* @param $n
* @param $cyclic
*
* @return array
* @return \Generator
*/
private function doNgram($data, $n, $cyclic)
private function doNgrams($data, $n = 1, $cyclic = true)
{
$dataLength = count($data);

$n = $n > $dataLength ? $dataLength : $n;

foreach ($data as $key => $token) {
for ($i = $key; $i < $n + $key; $i++) {
$length = (false === $cyclic ? $dataLength - $n + 1 : $dataLength);

for ($j = 0; $j < $length; $j++) {
$ngrams = [];
for ($i = $j; $i < $n + $j; $i++) {
$ngrams[] = $data[$i%$dataLength];
}
yield $ngrams;
}

$ngrams = array_chunk($ngrams, $n);

return (false === $cyclic ? array_slice($ngrams, 0, $dataLength - $n + 1) : $ngrams);
}
}

0 comments on commit 553f570

Please sign in to comment.