Skip to content

Commit

Permalink
Add src_lazy_load_attr in siteconfig
Browse files Browse the repository at this point in the history
This allow to define the image attributes replacement for lazyloaded image
  • Loading branch information
j0k3r committed Mar 4, 2018
1 parent 6b4de37 commit 5ed0fd3
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/Extractor/ContentExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ public function process($html, $url, SiteConfig $siteConfig = null, $smartTidy =
$this->siteConfig = $this->buildSiteConfig($url, $html);
}

// add lazyload information from siteconfig
if ($this->siteConfig->src_lazy_load_attr && !in_array($this->siteConfig->src_lazy_load_attr, $this->config['src_lazy_load_attributes'], true)) {
$this->config['src_lazy_load_attributes'][] = $this->siteConfig->src_lazy_load_attr;
}

// do string replacements
if (!empty($this->siteConfig->find_string)) {
if (count($this->siteConfig->find_string) === count($this->siteConfig->replace_string)) {
Expand Down
2 changes: 1 addition & 1 deletion src/SiteConfig/ConfigBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ public function parseLines(array $lines)
} elseif (in_array($command, ['tidy', 'prune', 'autodetect_on_failure', 'requires_login'], true)) {
$config->$command = ('yes' === $val || 'true' === $val);
// check for single statement commands stored as strings
} elseif (in_array($command, ['parser', 'login_username_field', 'login_password_field', 'not_logged_in_xpath', 'login_uri'], true)) {
} elseif (in_array($command, ['parser', 'login_username_field', 'login_password_field', 'not_logged_in_xpath', 'login_uri', 'src_lazy_load_attr'], true)) {
$config->$command = $val;
// check for replace_string(find): replace
} elseif ((')' === substr($command, -1)) && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match) && 'replace_string' === $match[1]) {
Expand Down
3 changes: 3 additions & 0 deletions src/SiteConfig/SiteConfig.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class SiteConfig
// Strip attributes matching these xpath expressions (0 or more)
public $strip_attr = [];

// Attribute used to replace lazyload image (like `data-lazy-src`)
public $src_lazy_load_attr = null;

// Strip elements which contain these strings (0 or more) in the id or class attribute
public $strip_id_or_class = [];

Expand Down
6 changes: 6 additions & 0 deletions tests/Extractor/ContentExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,11 @@ public function dataForlazyLoad()
'<div>' . str_repeat('this is the best part of the show', 10) . '<img src="data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==" data-sources="http://0.0.0.0/big_image.jpg"/></div>',
'<img src="http://0.0.0.0/big_image.jpg"',
],
// test with img attribute from site config
[
'<div>' . str_repeat('this is the best part of the show', 10) . '<img src="data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==" data-toto-src="http://0.0.0.0/big_image.jpg"/></div>',
'<img src="http://0.0.0.0/big_image.jpg"',
],
];
}

Expand All @@ -758,6 +763,7 @@ public function testConvertLazyLoadImages($html, $htmlExpected)

$config = new SiteConfig();
$config->body = ['//div'];
$config->src_lazy_load_attr = 'data-toto-src';

$res = $contentExtractor->process(
$html,
Expand Down
2 changes: 2 additions & 0 deletions tests/SiteConfig/ConfigBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public function testBuildFromArray()
'title: hoho',
'tidy: yes',
'parser: bob',
'src_lazy_load_attr: data-toto-src',
'date: foo',
'replace_string(toto): titi',
'http_header(user-agent): my-user-agent',
Expand All @@ -47,6 +48,7 @@ public function testBuildFromArray()
$configExpected->title = ['hoho'];
$configExpected->tidy = true;
$configExpected->parser = 'bob';
$configExpected->src_lazy_load_attr = 'data-toto-src';
$configExpected->find_string = ['toto'];
$configExpected->replace_string = ['titi'];
$configExpected->http_header = [
Expand Down

0 comments on commit 5ed0fd3

Please sign in to comment.