Skip to content

Commit

Permalink
Merge c3145ed into fd793a3
Browse files Browse the repository at this point in the history
  • Loading branch information
j0k3r committed Jan 4, 2019
2 parents fd793a3 + c3145ed commit bbce48d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/Extractor/ContentExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,11 @@ private function extractJsonLdInformation($html)
$this->date = $data['datePublished'];
}

// sometimes the date is an array
if (\is_array($this->date)) {
$this->date = reset($this->date);
}

// body should be a DOMNode
if (isset($data['articlebody'])) {
$dom = new \DOMDocument('1.0', 'utf-8');
Expand Down
12 changes: 12 additions & 0 deletions tests/Extractor/ContentExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,18 @@ public function testJsonLdName()
$this->assertSame('name !!', $contentExtractor->getTitle());
}

public function testJsonLdDateArray()
{
$contentExtractor = new ContentExtractor(self::$contentExtractorConfig);

$res = $contentExtractor->process(
' <script type="application/ld+json">{ "@context": "http://schema.org", "@type": "NewsArticle", "description": "Smoke rises from the 998-tonne fuel tanker Shoko Maru after it exploded off the coast of Himeji, western Japan, in this photo taken and released May 29, 2014. REUTERS/5th Regional Coast Guard Headqua", "headline": "Editor&#039;s choice", "url": "https://www.reuters.com/news/picture/editors-choice-idUSRTR3RD95", "thumbnailUrl": "https://s3.reutersmedia.net/resources/r/?m=02&d=20140529&t=2&i=901254582&w=&fh=810&fw=545&ll=&pl=&sq=&r=2014-05-29T132753Z_2_GM1EA5T1BTD01_RTRMADP_0_JAPAN", "dateCreated": "2014-05-29T13:27:53+0000", "dateModified": "2014-05-29T13:27:53+0000", "articleSection": "RCOMUS_24", "creator": ["JaShong King"], "keywords": ["24 HOURS IN PICTURES", "Slideshow"], "about": "Slideshow", "author": ["JaShong King"], "datePublished": ["05/29/2014"] }</script><p>hihi</p>',
'https://nativead.io/jsonld'
);

$this->assertSame('05/29/2014', $contentExtractor->getDate());
}

public function testUniqueAuthors()
{
$url = 'https://www.lemonde.fr/pixels/article/2018/05/30/bloodstained-curse-of-the-moon-delicieux-jeu-de-vampires-a-la-mode-des-annees-1980_5307173_4408996.html';
Expand Down

0 comments on commit bbce48d

Please sign in to comment.