Skip to content

Commit

Permalink
fix: Ignore invalid URLs when synchronising feeds
Browse files Browse the repository at this point in the history
  • Loading branch information
marienfressinaud committed Aug 7, 2023
1 parent a82bb53 commit 0021499
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 0 deletions.
25 changes: 25 additions & 0 deletions lib/SpiderBits/src/Url.php
Original file line number Diff line number Diff line change
Expand Up @@ -378,4 +378,29 @@ public static function buildQuery(array $parameters): string
}
return implode('&', $built_parameters);
}

/**
* Return true if the given URL is valid, false otherwise.
*
* @param string[] $accepted_schemes
*/
public static function isValid(string $url, array $accepted_schemes = ['http', 'https']): bool
{
if (filter_var($url, FILTER_VALIDATE_URL) === false) {
return false;
}

$url_components = parse_url($url);

if (
!$url_components ||
!isset($url_components['scheme']) ||
!isset($url_components['host'])
) {
return false;
}

$url_scheme = strtolower($url_components['scheme']);
return in_array($url_scheme, $accepted_schemes);
}
}
4 changes: 4 additions & 0 deletions src/services/FeedFetcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ public function fetch(models\Collection $collection): void
$url = \SpiderBits\Url::absolutize($entry->link, $feed_url);
$url = \SpiderBits\Url::sanitize($url);

if (!\SpiderBits\Url::isValid($url)) {
continue;
}

if (isset($link_ids_by_urls[$url])) {
// The URL is already associated to the collection, we have
// nothing more to do.
Expand Down
51 changes: 51 additions & 0 deletions tests/jobs/scheduled/FeedsSyncTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,57 @@ public function testPerformIgnoresEntriesWithNoLink(): void
$this->assertEmpty($collection->links());
}

public function testPerformIgnoresEntriesWithInvalidUrl(): void
{
$feed_url = 'https://flus.fr/carnet/feeds/all.atom.xml';
$collection = CollectionFactory::create([
'type' => 'feed',
'feed_url' => $feed_url,
'feed_fetched_at' => \Minz\Time::ago(2, 'hours'),
]);
$user = UserFactory::create([
'validated_at' => \Minz\Time::now(),
]);
FollowedCollectionFactory::create([
'collection_id' => $collection->id,
'user_id' => $user->id,
]);
$link_url = 'invalid://example.com';
$hash = \SpiderBits\Cache::hash($feed_url);
$raw_response = <<<XML
HTTP/2 200 OK
Content-Type: application/xml
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>carnet de flus</title>
<link href="https://flus.fr/carnet/feeds/all.atom.xml" rel="self" type="application/atom+xml" />
<link href="https://flus.fr/carnet/" rel="alternate" type="text/html" />
<id>urn:uuid:4c04fe8e-c966-5b7e-af89-74d092a6ccb0</id>
<updated>2021-03-30T11:26:00+02:00</updated>
<entry>
<title>Les nouveautés de mars 2021</title>
<id>urn:uuid:027e66f5-8137-5040-919d-6377c478ae9d</id>
<author><name>Marien</name></author>
<link href="{$link_url}" rel="alternate" type="text/html" />
<published>2021-03-30T11:26:00+02:00</published>
<updated>2021-03-30T11:26:00+02:00</updated>
<content type="html"></content>
</entry>
</feed>
XML;
/** @var string */
$cache_path = \Minz\Configuration::$application['cache_path'];
$cache = new \SpiderBits\Cache($cache_path);
$cache->save($hash, $raw_response);
$feeds_sync_job = new FeedsSync();

$feeds_sync_job->perform();

$collection = $collection->reload();
$this->assertEmpty($collection->links());
}

public function testPerformIgnoresEntriesIfUrlExistsInCollection(): void
{
$support_user = models\User::supportUser();
Expand Down
45 changes: 45 additions & 0 deletions tests/lib/SpiderBits/UrlTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,51 @@ public function testBuildQuery(string $expected_query, array $parameters): void
$this->assertSame($expected_query, $query);
}

public function testIsValid(): void
{
$url = 'https://example.com';

$result = Url::isValid($url);

$this->assertTrue($result);
}

public function testIsValidFailsWithAnEmptyString(): void
{
$url = '';

$result = Url::isValid($url);

$this->assertFalse($result);
}

public function testIsValidFailsWithAnInvalidUrl(): void
{
$url = 'https://example com';

$result = Url::isValid($url);

$this->assertFalse($result);
}

public function testIsValidFailsWithAMissingScheme(): void
{
$url = 'example.com';

$result = Url::isValid($url);

$this->assertFalse($result);
}

public function testIsValidFailsWithAnInvalidScheme(): void
{
$url = 'http://example.com';

$result = Url::isValid($url, ['https']);

$this->assertFalse($result);
}

/**
* @return array<array{string, string, string}>
*/
Expand Down

0 comments on commit 0021499

Please sign in to comment.