Skip to content

Commit 8043ba5

Browse files
committed
improve authorship discovery
closes #79
1 parent 18dc929 commit 8043ba5

File tree

4 files changed

+1994
-0
lines changed

4 files changed

+1994
-0
lines changed

lib/XRay/Formats/Mf2.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,30 @@ private static function findAuthor($mf2, $item, $http, $url) {
812812

813813
}
814814

815+
// The below is not yet in the authorship algorithm.
816+
817+
// If the top object is an h-feed, check for an author property there
818+
if(isset($mf2['items'][0]['type'][0]) && in_array('h-feed', $mf2['items'][0]['type'])) {
819+
if(isset($mf2['items'][0]['properties']['author'][0])) {
820+
$potentialAuthor = $mf2['items'][0]['properties']['author'][0];
821+
if(is_array($potentialAuthor['type']) && in_array('h-card', $potentialAuthor['type'])) {
822+
return self::parseAsHCard($potentialAuthor, $http, $url)['data'];
823+
}
824+
}
825+
}
826+
827+
// If still no author is found, and this page is a feed (list of h-*),
828+
// then use the first h-card in the list of items.
829+
$items = array_filter($mf2['items'], function($item){
830+
return !in_array('h-card', $item['type']);
831+
});
832+
if(count($items) > 1) {
833+
$card = self::_findFirstOfType($mf2, 'h-card');
834+
if($card) {
835+
return self::parseAsHCard($card, $http, $url)['data'];
836+
}
837+
}
838+
815839
if(!$author['name'] && !$author['photo'] && !$author['url'])
816840
return null;
817841

tests/FeedTest.php

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,4 +414,36 @@ public function testAscraeus() {
414414
$this->assertEquals(20, count($data->items));
415415
}
416416

417+
public function testAdactioLinks() {
418+
$url = 'http://feed.example.com/adactio-links';
419+
$response = $this->parse(['url' => $url, 'expect' => 'feed']);
420+
421+
$body = $response->getContent();
422+
$this->assertEquals(200, $response->getStatusCode());
423+
$data = json_decode($body)->data;
424+
425+
$this->assertEquals('feed', $data->type);
426+
// 20 h-entrys followed by one h-card, which should have been removed and used as the author instead
427+
$this->assertEquals(20, count($data->items));
428+
$this->assertEquals('http://feed.example.com/links/14501', $data->items[0]->url);
429+
$this->assertEquals('http://feed.example.com/links/14445', $data->items[19]->url);
430+
$item = $data->items[0];
431+
$this->assertEquals('Jeremy Keith', $item->author->name);
432+
$this->assertEquals('https://adactio.com/', $item->author->url);
433+
}
434+
435+
public function testWaterpigsFeed() {
436+
$url = 'http://feed.example.com/waterpigs';
437+
$response = $this->parse(['url' => $url, 'expect' => 'feed']);
438+
439+
$body = $response->getContent();
440+
$this->assertEquals(200, $response->getStatusCode());
441+
$data = json_decode($body)->data;
442+
443+
$this->assertEquals('feed', $data->type);
444+
$this->assertEquals(21, count($data->items));
445+
$item = $data->items[16];
446+
$this->assertEquals('Barnaby Walters', $item->author->name);
447+
$this->assertEquals('https://waterpigs.co.uk', $item->author->url);
448+
}
417449
}

0 commit comments

Comments
 (0)