Skip to content

Commit

Permalink
Improved code climate & coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
jkphl committed Feb 14, 2017
1 parent 2d3faee commit 0d820b8
Show file tree
Hide file tree
Showing 10 changed files with 219 additions and 76 deletions.
5 changes: 3 additions & 2 deletions src/Rdfalite/Domain/Property/PropertyInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

namespace Jkphl\Rdfalite\Domain\Property;

use Jkphl\Rdfalite\Domain\Thing\ThingInterface;
use Jkphl\Rdfalite\Domain\Vocabulary\VocabularyInterface;

/**
Expand All @@ -61,9 +62,9 @@ public function getName();
public function getVocabulary();

/**
* Property value
* Return the property value
*
* @return string Property value
* @return string|ThingInterface Property value
*/
public function getValue();
}
2 changes: 2 additions & 0 deletions src/Rdfalite/Infrastructure/Factories/HtmlDocumentFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ class HtmlDocumentFactory implements DocumentFactoryInterface
public function createDocumentFromString($string)
{
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($string);
libxml_clear_errors();
return $dom;
}
}
94 changes: 59 additions & 35 deletions src/Rdfalite/Infrastructure/Parser/RdfaliteElementProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,28 @@
*/
class RdfaliteElementProcessor implements ElementProcessorInterface
{
/**
* Tag name / attribute map
*
* @var array
*/
protected static $tagNameAttributes = [
'META' => 'content',
'AUDIO' => 'src',
'EMBED' => 'src',
'IFRAME' => 'src',
'IMG' => 'src',
'SOURCE' => 'src',
'TRACK' => 'src',
'VIDEO' => 'src',
'A' => 'href',
'AREA' => 'href',
'LINK' => 'href',
'OBJECT' => 'data',
'DATA' => 'value',
'TIME' => 'datetime'
];

/**
* Process a DOM element
*
Expand Down Expand Up @@ -131,9 +153,18 @@ protected function processProperty(\DOMElement $element, Context $context)
// Try to get a resource ID
$resourceId = trim($element->getAttribute('resource')) ?: null;

// Get the property value
$propertyValue = $this->getPropertyValue($element, $context);
$property = new Property($name, $vocabulary, $propertyValue, $resourceId);

// Add the property to the current parent thing
$property = new Property($name, $vocabulary, $this->getPropertyValue($element, $context), $resourceId);
$context->getParentThing()->addProperty($property);

// If the property value is a thing
if ($propertyValue instanceof ThingInterface) {
// Set the thing as parent thing for nested iterations
$context = $context->setParentThing($propertyValue);
}
}
}

Expand Down Expand Up @@ -179,18 +210,21 @@ protected function getThing($typeof, $resourceId, Context $context)
$prefix = array_pop($typeof);

// Determine the vocabulary to use
$vocabulary = empty($prefix) ? $context->getDefaultVocabulary() : $context->getVocabulary($prefix);
if ($vocabulary instanceof VocabularyInterface) {
// Return a new thing
return new Thing($type, $vocabulary, $resourceId);
}
try {
$vocabulary = empty($prefix) ? $context->getDefaultVocabulary() : $context->getVocabulary($prefix);
if ($vocabulary instanceof VocabularyInterface) {
// Return a new thing
return new Thing($type, $vocabulary, $resourceId);
}

// If the default vocabulary is empty
if (empty($prefix)) {
throw new OutOfBoundsException(
OutOfBoundsException::EMPTY_DEFAULT_VOCABULARY_STR,
OutOfBoundsException::EMPTY_DEFAULT_VOCABULARY
);
// If the default vocabulary is empty
if (empty($prefix)) {
throw new OutOfBoundsException(
OutOfBoundsException::EMPTY_DEFAULT_VOCABULARY_STR,
OutOfBoundsException::EMPTY_DEFAULT_VOCABULARY
);
}
} catch (\Jkphl\Rdfalite\Application\Exceptions\OutOfBoundsException $e) {
}

throw new OutOfBoundsException(
Expand All @@ -209,29 +243,17 @@ protected function getPropertyStringValue(\DOMElement $element)
{
$tagName = strtoupper($element->tagName);

// Else: Depend on the tag name
switch (true) {
case $tagName === 'META':
return strval($element->getAttribute('content'));
case in_array($tagName, ['AUDIO', 'EMBED', 'IFRAME', 'IMG', 'SOURCE', 'TRACK', 'VIDEO']):
return strval($element->getAttribute('src'));

case in_array($tagName, ['A', 'AREA', 'LINK']):
return strval($element->getAttribute('href'));
case $tagName === 'OBJECT':
return strval($element->getAttribute('data'));
case $tagName === 'DATA':
return strval($element->getAttribute('value'));
case $tagName === 'TIME':
$datetime = $element->getAttribute('datetime');
if (!empty($datetime)) {
return strval($datetime);
}
// fall through
default:
// trigger_error(sprintf('RDFa Lite 1.1 element processor: Unhandled tag name "%s"', $element->tagName), E_USER_WARNING);
return $element->textContent;
// Map to an attribute (if applicable)
if (array_key_exists($tagName, self::$tagNameAttributes)) {
$value = strval($element->getAttribute(self::$tagNameAttributes[$tagName]));
if (($tagName != 'TIME') || !empty($value)) {
return $value;
}
}

// Return the text content
// trigger_error(sprintf('RDFa Lite 1.1 element processor: Unhandled tag name "%s"', $element->tagName), E_USER_WARNING);
return $element->textContent;
}

/**
Expand All @@ -256,7 +278,9 @@ public function processElementChildren(\DOMElement $element, Context $context)
*/
protected function processTypeof(\DOMElement $element, Context $context)
{
if ($element->hasAttribute('typeof')) {
if ($element->hasAttribute('typeof') &&
(!$element->hasAttribute('property') || empty($element->getAttribute('property')))
) {
$thing = $this->getThing(
$element->getAttribute('typeof'),
trim($element->getAttribute('resource')) ?: null,
Expand Down
6 changes: 3 additions & 3 deletions src/Rdfalite/Tests/Application/DOMIteratorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class DOMNodeIteratorTest extends ParserIteratorTestBase
public function testDomNodeIteration()
{
$dom = new \DOMDocument();
$dom->loadHTML(self::$html);
$dom->loadHTML(self::$personRdfa);
$context = new Context();

$elementProcessor = $this->getMock(ElementProcessorInterface::class);
Expand Down Expand Up @@ -87,7 +87,7 @@ public function testDomNodeIteration()
public function testRdfaLiteProcessor()
{
$dom = new \DOMDocument();
$dom->loadHTML(self::$html);
$dom->loadHTML(self::$personRdfa);
$context = new Context();
$domNodeIterator = new DOMIterator($dom->childNodes, $context, new RdfaliteElementProcessor());
$this->assertInstanceOf(DOMIterator::class, $domNodeIterator);
Expand All @@ -106,6 +106,6 @@ public function testRdfaLiteProcessor()
}
}
$this->assertEquals(0, count($elements));
$this->validateIteratorResult($context->getChildren());
$this->validatePersonResult($context->getChildren());
}
}
54 changes: 26 additions & 28 deletions src/Rdfalite/Tests/Application/ParserIteratorTestBase.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
use Jkphl\Rdfalite\Domain\Property\PropertyInterface;
use Jkphl\Rdfalite\Domain\Thing\ThingInterface;
use Jkphl\Rdfalite\Domain\Vocabulary\Vocabulary;
use Jkphl\Rdfalite\Domain\Vocabulary\VocabularyInterface;
use Jkphl\Rdfalite\Tests\Domain\VocabularyTest;

/**
Expand All @@ -51,19 +52,19 @@
abstract class ParserIteratorTestBase extends \PHPUnit_Framework_TestCase
{
/**
* Test HTML
* Test HTML with person RDFa Lite 1.1
*
* @var string
*/
protected static $html;
protected static $personRdfa;

/**
* Setup all tests
*/
public static function setUpBeforeClass()
{
self::$html = file_get_contents(
dirname(__DIR__).DIRECTORY_SEPARATOR.'Fixtures'.DIRECTORY_SEPARATOR.'rdfa-lite-1.1.html'
self::$personRdfa = file_get_contents(
dirname(__DIR__).DIRECTORY_SEPARATOR.'Fixtures'.DIRECTORY_SEPARATOR.'person-rdfa-lite.html'
);
}

Expand All @@ -72,7 +73,7 @@ public static function setUpBeforeClass()
*
* @param ThingInterface[] $things Parsing Results
*/
protected function validateIteratorResult(array $things)
protected function validatePersonResult(array $things)
{
$schemaOrgVocabulary = new Vocabulary(VocabularyTest::SCHEMA_ORG);

Expand All @@ -89,29 +90,26 @@ protected function validateIteratorResult(array $things)
$this->assertTrue(is_array($properties));
$this->assertEquals(4, count($properties));

$name = $thing->getProperty('name');
$this->assertTrue(is_array($name));
$this->assertEquals(1, count($name));
$this->assertInstanceOf(PropertyInterface::class, $name[0]);
$this->assertEquals(new Property('name', $schemaOrgVocabulary, 'Joschi Kuphal'), $name[0]);

$telephone = $thing->getProperty('telephone');
$this->assertTrue(is_array($telephone));
$this->assertEquals(1, count($telephone));
$this->assertInstanceOf(PropertyInterface::class, $telephone[0]);
$this->assertEquals(new Property('telephone', $schemaOrgVocabulary, '+49 911 9593945'), $telephone[0]);

$image = $thing->getProperty('image');
$this->assertTrue(is_array($image));
$this->assertEquals(1, count($image));
$this->assertInstanceOf(PropertyInterface::class, $image[0]);
$this->assertEquals(new Property('image', $schemaOrgVocabulary, 'https://jkphl.is/avatar.jpg'), $image[0]);
$this->validateProperty($thing, 'name', $schemaOrgVocabulary, 'Joschi Kuphal');
$this->validateProperty($thing, 'telephone', $schemaOrgVocabulary, '+49 911 9593945');
$this->validateProperty($thing, 'image', $schemaOrgVocabulary, 'https://jkphl.is/avatar.jpg');
$this->validateProperty($thing, 'preferredAnimal', new Vocabulary('http://open.vocab.org/terms/'), 'Unicorn');
}

$preferredAnimal = $thing->getProperty('preferredAnimal');
$this->assertTrue(is_array($preferredAnimal));
$this->assertEquals(1, count($preferredAnimal));
$this->assertInstanceOf(PropertyInterface::class, $preferredAnimal[0]);
$this->assertEquals(new Property('preferredAnimal', new Vocabulary('http://open.vocab.org/terms/'), 'Unicorn'),
$preferredAnimal[0]);
/**
* Validate a single property
*
* @param ThingInterface $thing Thing
* @param string $name Property name
* @param VocabularyInterface $vocabulary Property vocabulary
* @param string $value Property value
*/
protected function validateProperty(ThingInterface $thing, $name, VocabularyInterface $vocabulary, $value)
{
$property = $thing->getProperty($name);
$this->assertTrue(is_array($property));
$this->assertEquals(1, count($property));
$this->assertInstanceOf(PropertyInterface::class, $property[0]);
$this->assertEquals(new Property($name, $vocabulary, $value), $property[0]);
}
}
66 changes: 62 additions & 4 deletions src/Rdfalite/Tests/Application/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,74 @@
class ParserTest extends ParserIteratorTestBase
{
/**
* Test parser context instantiation
* Test person parsing
*/
public function testParser()
public function testPerson()
{
$htmlDocumentFactory = new HtmlDocumentFactory();
$rdfaElementProcessor = new RdfaliteElementProcessor();
$parser = new Parser($htmlDocumentFactory, $rdfaElementProcessor);
$this->assertInstanceOf(Parser::class, $parser);

$things = $parser->parse(self::$html);
$this->validateIteratorResult($things);
$things = $parser->parse(self::$personRdfa);
$this->validatePersonResult($things);
}

/**
* Test article parsing
*/
public function testArticle()
{
$htmlDocumentFactory = new HtmlDocumentFactory();
$rdfaElementProcessor = new RdfaliteElementProcessor();
$parser = new Parser($htmlDocumentFactory, $rdfaElementProcessor);
$this->assertInstanceOf(Parser::class, $parser);

// TODO: Validate result
$parser->parse(
file_get_contents(
dirname(__DIR__).DIRECTORY_SEPARATOR.'Fixtures'.DIRECTORY_SEPARATOR.'article-rdfa-lite.html'
)
);
}

/**
* Test empty default vocabulary parsing
*
* @expectedException \Jkphl\Rdfalite\Infrastructure\Exceptions\OutOfBoundsException
* @expectedExceptionCode 1487030264
*/
public function testEmptyDefaultVocabulary()
{
$htmlDocumentFactory = new HtmlDocumentFactory();
$rdfaElementProcessor = new RdfaliteElementProcessor();
$parser = new Parser($htmlDocumentFactory, $rdfaElementProcessor);
$this->assertInstanceOf(Parser::class, $parser);

$parser->parse(
file_get_contents(
dirname(__DIR__).DIRECTORY_SEPARATOR.'Fixtures'.DIRECTORY_SEPARATOR.'empty-default-vocab-rdfa-lite.html'
)
);
}

/**
* Test unknown vocabulary prefix parsing
*
* @expectedException \Jkphl\Rdfalite\Infrastructure\Exceptions\OutOfBoundsException
* @expectedExceptionCode 1486928423
*/
public function testUnknownVocabularyPrefix()
{
$htmlDocumentFactory = new HtmlDocumentFactory();
$rdfaElementProcessor = new RdfaliteElementProcessor();
$parser = new Parser($htmlDocumentFactory, $rdfaElementProcessor);
$this->assertInstanceOf(Parser::class, $parser);

$parser->parse(
file_get_contents(
dirname(__DIR__).DIRECTORY_SEPARATOR.'Fixtures'.DIRECTORY_SEPARATOR.'unknown-vocab-prefix-rdfa-lite.html'
)
);
}
}
25 changes: 25 additions & 0 deletions src/Rdfalite/Tests/Fixtures/article-rdfa-lite.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>News article</title>
</head>
<body vocab="http://schema.org/">
<article typeof="NewsArticle">
<h1 property="name">Nam dictum eros at libero tempor semper</h1>
<p>Published on
<time property="datePublished" datetime="2014-03-04T09:00:00+00:00">4th March 2014</time>
</p>
<p property="description">Nullam in convallis metus, quis hendrerit velit. Nam mollis congue orci, a
lobortis nibh feugiat id.</p>

<div property="articleBody">
<figure property="associatedMedia" typeof="ImageObject">
<img property="contentURL" src="image.jpg">
<figcaption property="caption">Lorem ipsum dolor.</figcaption>
</figure>
<p>News article content here.</p>
</div>
</article>
</body>
</html>
Loading

0 comments on commit 0d820b8

Please sign in to comment.