Skip to content

Commit

Permalink
improve authorship discovery
Browse files Browse the repository at this point in the history
closes #79
  • Loading branch information
aaronpk committed Nov 9, 2018
1 parent 18dc929 commit 8043ba5
Show file tree
Hide file tree
Showing 4 changed files with 1,994 additions and 0 deletions.
24 changes: 24 additions & 0 deletions lib/XRay/Formats/Mf2.php
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,30 @@ private static function findAuthor($mf2, $item, $http, $url) {

}

// The below is not yet in the authorship algorithm.

// If the top object is an h-feed, check for an author property there
if(isset($mf2['items'][0]['type'][0]) && in_array('h-feed', $mf2['items'][0]['type'])) {
if(isset($mf2['items'][0]['properties']['author'][0])) {
$potentialAuthor = $mf2['items'][0]['properties']['author'][0];
if(is_array($potentialAuthor['type']) && in_array('h-card', $potentialAuthor['type'])) {
return self::parseAsHCard($potentialAuthor, $http, $url)['data'];
}
}
}

// If still no author is found, and this page is a feed (list of h-*),
// then use the first h-card in the list of items.
$items = array_filter($mf2['items'], function($item){
return !in_array('h-card', $item['type']);
});
if(count($items) > 1) {
$card = self::_findFirstOfType($mf2, 'h-card');
if($card) {
return self::parseAsHCard($card, $http, $url)['data'];
}
}

if(!$author['name'] && !$author['photo'] && !$author['url'])
return null;

Expand Down
32 changes: 32 additions & 0 deletions tests/FeedTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -414,4 +414,36 @@ public function testAscraeus() {
$this->assertEquals(20, count($data->items));
}

public function testAdactioLinks() {
$url = 'http://feed.example.com/adactio-links';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body)->data;

$this->assertEquals('feed', $data->type);
// 20 h-entrys followed by one h-card, which should have been removed and used as the author instead
$this->assertEquals(20, count($data->items));
$this->assertEquals('http://feed.example.com/links/14501', $data->items[0]->url);
$this->assertEquals('http://feed.example.com/links/14445', $data->items[19]->url);
$item = $data->items[0];
$this->assertEquals('Jeremy Keith', $item->author->name);
$this->assertEquals('https://adactio.com/', $item->author->url);
}

public function testWaterpigsFeed() {
$url = 'http://feed.example.com/waterpigs';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body)->data;

$this->assertEquals('feed', $data->type);
$this->assertEquals(21, count($data->items));
$item = $data->items[16];
$this->assertEquals('Barnaby Walters', $item->author->name);
$this->assertEquals('https://waterpigs.co.uk', $item->author->url);
}
}

0 comments on commit 8043ba5

Please sign in to comment.