Skip to content

Commit

Permalink
Improve GetColors step
Browse files Browse the repository at this point in the history
Get all colors, not only the ones making up more than 0.5 percent of the
image. But also add a method `onlyAbovePercentageOfImage()` to the
`GetColors` step, to manually set a custom threshold.

And also improve memory usage of getting colors from an image.
  • Loading branch information
otsch committed Feb 22, 2024
1 parent 02e47d1 commit 5a35db4
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 43 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Expand Up @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [1.1.0] - 2024-02-22
### Added
* Get all colors, not only the ones making up more than 0.5 percent of the image. But also add a method `onlyAbovePercentageOfImage()` to the `GetColors` step, to manually set a custom threshold.

### Fixed
* Improve memory usage of getting colors from an image.

## [1.0.0] - 2024-02-17
### Changed
* Change the output of the `Screenshot` step, from an array `['response' => RespondedRequest, 'screenshotPath' => string]` to a `RespondedRequestWithScreenshot` object, that has a `screenshotPath` property. The problem with the previous solution was: when using the response cache, the step failed, because it gets a cached response from the loader that was not actually loaded in the headless browser. When the step afterwards tries to take a screenshot from the page that is still open in the browser, it just fails because there is no open page. Now, with the new `RespondedRequestWithScreenshot` object, the `screenshotPath` is also saved in the cached response.
Expand Down
11 changes: 10 additions & 1 deletion src/Steps/GetColors.php
Expand Up @@ -10,19 +10,28 @@

class GetColors extends Step
{
protected ?float $onlyAbovePercentageOfImage = null;

public static function fromImage(): self
{
return new self();
}

public function onlyAbovePercentageOfImage(float $percentage): self
{
$this->onlyAbovePercentageOfImage = $percentage;

return $this;
}

/**
* @param string $input
* @return Generator
*/
protected function invoke(mixed $input): Generator
{
try {
yield ['colors' => ImageColors::getFrom($input)];
yield ['colors' => ImageColors::getFrom($input, $this->onlyAbovePercentageOfImage)];
} catch (Exception $exception) {
$this->logger?->error('Failed to get colors from image: ' . $exception->getMessage());
}
Expand Down
62 changes: 22 additions & 40 deletions src/Utils/ImageColors.php
Expand Up @@ -12,15 +12,18 @@ class ImageColors

private int $height = 0;

public function __construct(private readonly string $imagePath) {}
public function __construct(
private readonly string $imagePath,
private readonly ?float $onlyAbovePercentageOfImage = null,
) {}

/**
* @return array<int, array{ red: int, green: int, blue: int, rgb: string, percentage: float }>
* @throws UnknownImageFileTypeException|Exception
*/
public static function getFrom(string $imagePath): array
public static function getFrom(string $imagePath, ?float $onlyAbovePercentageOfImage = null): array
{
return (new self($imagePath))->getColors();
return (new self($imagePath, $onlyAbovePercentageOfImage))->getColors();
}

/**
Expand All @@ -35,15 +38,17 @@ public function getColors(): array

$colors = [];

foreach ($allColors as $colorData) {
$percentageOfImage = round(($colorData['count'] / $totalPixels) * 100, 1);
foreach ($allColors as $rgb => $pixelCount) {
[$red, $green, $blue] = explode(',', $rgb);

if ($percentageOfImage >= 0.5) {
$percentageOfImage = round(($pixelCount / $totalPixels) * 100, 1);

if ($this->onlyAbovePercentageOfImage === null || $percentageOfImage >= $this->onlyAbovePercentageOfImage) {
$colors[] = [
'red' => $colorData['red'],
'green' => $colorData['green'],
'blue' => $colorData['blue'],
'rgb' => $colorData['rgb'],
'red' => (int) $red,
'green' => (int) $green,
'blue' => (int) $blue,
'rgb' => '(' . $rgb . ')',
'percentage' => $percentageOfImage,
];
}
Expand All @@ -53,7 +58,7 @@ public function getColors(): array
}

/**
* @return array<string, array{ red: int, green: int, blue: int, rgb: string, count: int }>
* @return array<string, int>
* @throws UnknownImageFileTypeException|Exception
*/
protected function getAllColors(): array
Expand All @@ -76,23 +81,19 @@ protected function getAllColors(): array

$blue = $rgb & 0xFF;

$rgbString = '(' . $red . ',' . $green . ',' . $blue . ')';
$rgbString = $red . ',' . $green . ',' . $blue;

if (isset($colors[$rgbString])) {
$colors[$rgbString]['count'] += 1;
$colors[$rgbString] += 1;
} else {
$colors[$rgbString] = [
'red' => $red,
'green' => $green,
'blue' => $blue,
'rgb' => $rgbString,
'count' => 1,
];
$colors[$rgbString] = 1;
}
}
}

return $this->sortColorsByCount($colors);
arsort($colors);

return $colors;
}

/**
Expand Down Expand Up @@ -124,23 +125,4 @@ protected function getImage(): GdImage

throw new Exception('Can\'t read image file');
}

/**
* @param array<string, array{ red: int, green: int, blue: int, rgb: string, count: int }> $colors
* @return array<string, array{ red: int, green: int, blue: int, rgb: string, count: int }>
*/
private function sortColorsByCount(array $colors): array
{
uasort($colors, function ($a, $b) {
if ($a['count'] > $b['count']) {
return -1;
} elseif ($a['count'] === $b['count']) {
return 0;
}

return 1;
});

return $colors;
}
}
2 changes: 1 addition & 1 deletion tests/Pest.php
Expand Up @@ -106,7 +106,7 @@ function helper_cleanFiles(): void

if (is_array($scanDir)) {
foreach ($scanDir as $file) {
if ($file === '.' || $file === '..' || $file === 'demo-screenshot.png') {
if ($file === '.' || $file === '..' || str_starts_with($file, 'demo-screenshot')) {
continue;
}

Expand Down
47 changes: 46 additions & 1 deletion tests/_Integration/GetColorsTest.php
@@ -1,5 +1,7 @@
<?php

use Crwlr\Crawler\Crawler;
use Crwlr\Crawler\Result;
use Crwlr\CrawlerExtBrowser\Steps\GetColors;
use Crwlr\CrawlerExtBrowser\Steps\Screenshot;

Expand Down Expand Up @@ -44,7 +46,10 @@

$crawler
->input(['screenshotPath' => helper_testFilePath('demo-screenshot.png')])
->addStep(GetColors::fromImage());
->addStep(
GetColors::fromImage()
->onlyAbovePercentageOfImage(0.4)
);

$results = iterator_to_array($crawler->run());

Expand All @@ -60,3 +65,43 @@
['red' => 0, 'green' => 0, 'blue' => 0, 'rgb' => '(0,0,0)', 'percentage' => 0.5],
]);
});

it('does not run out of memory with a very colorful image and 100MB of memory', function () {
Crawler::setMemoryLimit('500M');

$crawler = helper_getFastCrawler();

$crawler
->input(['screenshotPath' => helper_testFilePath('demo-screenshot2.png')])
->addStep(GetColors::fromImage()->addToResult());

$results = iterator_to_array($crawler->run());

$result = $results[0];

/** @var Result $result */

expect(count($result->get('colors')))->toBe(596002);
});

it('gets colors that make up at least a certain percentage when onlyAbovePercentageOfImage() was used', function () {
Crawler::setMemoryLimit('500M');

$crawler = helper_getFastCrawler();

$crawler
->input(['screenshotPath' => helper_testFilePath('demo-screenshot2.png')])
->addStep(
GetColors::fromImage()
->onlyAbovePercentageOfImage(0.1)
->addToResult()
);

$results = iterator_to_array($crawler->run());

$result = $results[0];

/** @var Result $result */

expect(count($result->get('colors')))->toBe(1);
});
Binary file added tests/_files/demo-screenshot2.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 5a35db4

Please sign in to comment.