Skip to content
Browse files

Updated ARC2 to the latest version, few bug fixes

  • Loading branch information...
1 parent 299af7c commit 9b7fc69e21a5651d8a78801577e1b17456a8bbd1 @WebOrganics committed Mar 17, 2012
Showing with 2,703 additions and 1,549 deletions.
  1. +3 −0 app/arc/.gitignore
  2. +113 −77 app/arc/ARC2.php
  3. +109 −33 app/arc/ARC2_Class.php
  4. +77 −58 app/arc/ARC2_Reader.php
  5. +7 −7 app/arc/ARC2_Resource.php
  6. +434 −0 app/arc/ARC2_TestHandler.php
  7. +1 −1 app/arc/ARC2_getFormat.php
  8. +13 −10 app/arc/ARC2_getPreferredFormat.php
  9. +2 −6 app/arc/extractors/ARC2_DcExtractor.php
  10. +2 −6 app/arc/extractors/ARC2_ErdfExtractor.php
  11. +5 −9 app/arc/extractors/ARC2_MicroformatsExtractor.php
  12. +2 −6 app/arc/extractors/ARC2_OpenidExtractor.php
  13. +2 −6 app/arc/extractors/ARC2_PoshRdfExtractor.php
  14. +2 −14 app/arc/extractors/ARC2_RDFExtractor.php
  15. +2 −6 app/arc/extractors/ARC2_RdfaExtractor.php
  16. +2 −6 app/arc/extractors/ARC2_TwitterProfilePicExtractor.php
  17. +4 −8 app/arc/parsers/ARC2_AtomParser.php
  18. +3 −7 app/arc/parsers/ARC2_CBJSONParser.php
  19. +4 −7 app/arc/parsers/ARC2_JSONParser.php
  20. +5 −9 app/arc/parsers/ARC2_LegacyXMLParser.php
  21. +17 −9 app/arc/parsers/ARC2_RDFParser.php
  22. +8 −11 app/arc/parsers/ARC2_RDFXMLParser.php
  23. +3 −7 app/arc/parsers/ARC2_RSSParser.php
  24. +2 −6 app/arc/parsers/ARC2_SGAJSONParser.php
  25. +57 −61 app/arc/parsers/ARC2_SPARQLParser.php
  26. +2 −6 app/arc/parsers/ARC2_SPARQLPlusParser.php
  27. +2 −6 app/arc/parsers/ARC2_SPARQLXMLResultParser.php
  28. +37 −16 app/arc/parsers/ARC2_SPOGParser.php
  29. +9 −21 app/arc/parsers/ARC2_SemHTMLParser.php
  30. +3 −7 app/arc/parsers/ARC2_TurtleParser.php
  31. +647 −467 app/arc/plugins/namespaces.txt
  32. +64 −6 app/arc/serializers/ARC2_LegacyHTMLSerializer.php
  33. +2 −6 app/arc/serializers/ARC2_LegacyJSONSerializer.php
  34. +2 −6 app/arc/serializers/ARC2_LegacyXMLSerializer.php
  35. +38 −14 app/arc/serializers/ARC2_MicroRDFSerializer.php
  36. +71 −26 app/arc/serializers/ARC2_NTriplesSerializer.php
  37. +2 −6 app/arc/serializers/ARC2_POSHRDFSerializer.php
  38. +5 −8 app/arc/serializers/ARC2_RDFJSONSerializer.php
  39. +3 −7 app/arc/serializers/ARC2_RDFSerializer.php
  40. +17 −19 app/arc/serializers/ARC2_RDFXMLSerializer.php
  41. +2 −6 app/arc/serializers/ARC2_RSS10Serializer.php
  42. +10 −11 app/arc/serializers/ARC2_TurtleSerializer.php
  43. +3 −7 app/arc/sparqlscript/ARC2_SPARQLScriptParser.php
  44. +24 −13 app/arc/sparqlscript/ARC2_SPARQLScriptProcessor.php
  45. +194 −0 app/arc/store/ARC2_MemStore.php
  46. +10 −12 app/arc/store/ARC2_RemoteStore.php
  47. +173 −59 app/arc/store/ARC2_Store.php
  48. +3 −7 app/arc/store/ARC2_StoreAskQueryHandler.php
  49. +2 −6 app/arc/store/ARC2_StoreAtomLoader.php
  50. +10 −12 app/arc/store/ARC2_StoreCBJSONLoader.php
  51. +3 −7 app/arc/store/ARC2_StoreConstructQueryHandler.php
  52. +6 −10 app/arc/store/ARC2_StoreDeleteQueryHandler.php
  53. +3 −7 app/arc/store/ARC2_StoreDescribeQueryHandler.php
  54. +4 −8 app/arc/store/ARC2_StoreDumpQueryHandler.php
  55. +105 −40 app/arc/store/ARC2_StoreDumper.php
  56. +14 −13 app/arc/store/ARC2_StoreEndpoint.php
  57. +21 −25 app/arc/store/ARC2_StoreHelper.php
  58. +20 −17 app/arc/store/ARC2_StoreInsertQueryHandler.php
  59. +77 −57 app/arc/store/ARC2_StoreLoadQueryHandler.php
  60. +15 −10 app/arc/store/ARC2_StoreQueryHandler.php
  61. +2 −6 app/arc/store/ARC2_StoreRDFXMLLoader.php
  62. +2 −6 app/arc/store/ARC2_StoreRSSLoader.php
  63. +2 −6 app/arc/store/ARC2_StoreSGAJSONLoader.php
  64. +3 −7 app/arc/store/ARC2_StoreSPOGLoader.php
  65. +134 −90 app/arc/store/ARC2_StoreSelectQueryHandler.php
  66. +2 −6 app/arc/store/ARC2_StoreSemHTMLLoader.php
  67. +12 −9 app/arc/store/ARC2_StoreTableManager.php
  68. +2 −6 app/arc/store/ARC2_StoreTurtleLoader.php
  69. +2 −1 app/template/direct.php
  70. +0 −94 app/template/foot.php
  71. +60 −0 app/template/head.php
View
3 app/arc/.gitignore
@@ -0,0 +1,3 @@
+*.DS_Store
+plugins/*
+triggers/*
View
190 app/arc/ARC2.php
@@ -6,30 +6,22 @@
* @license <http://arc.semsol.org/license>
* @homepage <http://arc.semsol.org/>
* @package ARC2
- * @version 2010-04-26
-*/
+ */
+
+/* E_STRICT hack */
+if (function_exists('date_default_timezone_get')) {
+ date_default_timezone_set(@date_default_timezone_get());
+}
class ARC2 {
- function getVersion() {
- return '2010-04-26';
+ static function getVersion() {
+ return '2011-12-01';
}
/* */
- function setStatic($val) {
- static $arc_static = '';
- if ($val) $arc_static = $val; /* set */
- if (!$val) return $arc_static; /* get */
- }
-
- function getStatic() {
- return ARC2::setStatic('');
- }
-
- /* */
-
- function getIncPath($f = '') {
+ static function getIncPath($f = '') {
$r = realpath(dirname(__FILE__)) . '/';
$dirs = array(
'plugin' => 'plugins',
@@ -48,84 +40,113 @@ function getIncPath($f = '') {
return $r;
}
- function getScriptURI() {
- if (isset($_SERVER) && isset($_SERVER['SERVER_NAME'])) {
+ static function getScriptURI() {
+ if (isset($_SERVER) && (isset($_SERVER['SERVER_NAME']) || isset($_SERVER['HTTP_HOST']))) {
+ $proto = preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL']));
+ $port = $_SERVER['SERVER_PORT'];
+ $server = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME'];
+ $script = $_SERVER['SCRIPT_NAME'];
+ /* https */
+ if (($proto == 'http') && $port == 443) {
+ $proto = 'https';
+ $port = 80;
+ }
+ return $proto . '://' . $server . ($port != 80 ? ':' . $port : '') . $script;
+ /*
return preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL'])) .
'://' . $_SERVER['SERVER_NAME'] .
($_SERVER['SERVER_PORT'] != 80 ? ':' . $_SERVER['SERVER_PORT'] : '') .
$_SERVER['SCRIPT_NAME'];
+ */
}
elseif (isset($_SERVER['SCRIPT_FILENAME'])) {
return 'file://' . realpath($_SERVER['SCRIPT_FILENAME']);
}
return 'http://localhost/unknown_path';
}
- function getRequestURI() {
+ static function getRequestURI() {
if (isset($_SERVER) && isset($_SERVER['REQUEST_URI'])) {
return preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL'])) .
- '://' . $_SERVER['SERVER_NAME'] .
+ '://' . (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME']) .
($_SERVER['SERVER_PORT'] != 80 ? ':' . $_SERVER['SERVER_PORT'] : '') .
$_SERVER['REQUEST_URI'];
}
return ARC2::getScriptURI();
}
- function inc($f, $path = '') {
+ static function inc($f, $path = '') {
$prefix = 'ARC2';
if (preg_match('/^([^\_]+)\_(.*)$/', $f, $m)) {
$prefix = $m[1];
$f = $m[2];
}
$inc_path = $path ? $path : ARC2::getIncPath($f);
$path = $inc_path . $prefix . '_' . urlencode($f) . '.php';
- if (file_exists($path)) {
- include_once($path);
- return 1;
- }
+ if (file_exists($path)) return include_once($path);
+ /* safe-mode hack */
+ if (@include_once($path)) return 1;
+ /* try other path */
if ($prefix != 'ARC2') {
$path = $inc_path . strtolower($prefix) . '/' . $prefix . '_' . urlencode($f) . '.php';
- if (file_exists($path)) {
- include_once($path);
- return 1;
- }
+ if (file_exists($path)) return include_once($path);
+ /* safe-mode hack */
+ if (@include_once($path)) return 1;
}
return 0;
}
/* */
- function mtime(){
+ static function mtime(){
list($msec, $sec) = explode(" ", microtime());
return ((float)$msec + (float)$sec);
}
- function x($re, $v, $options = 'si') {
+ static function x($re, $v, $options = 'si') {
return preg_match("/^\s*" . $re . "(.*)$/" . $options, $v, $m) ? $m : false;
}
/* */
- function getFormat($val, $mtype = '', $ext = '') {
+ static function getFormat($val, $mtype = '', $ext = '') {
ARC2::inc('getFormat');
return ARC2_getFormat($val, $mtype, $ext);
}
- function getPreferredFormat($default = 'plain') {
+ static function getPreferredFormat($default = 'plain') {
ARC2::inc('getPreferredFormat');
return ARC2_getPreferredFormat($default);
}
/* */
- function toUTF8($v) {
+ static function toUTF8($v) {
if (urlencode($v) === $v) return $v;
//if (utf8_decode($v) == $v) return $v;
- $v = (strpos(utf8_decode(str_replace('?', '', $v)), '?') === false) ? utf8_decode($v) : $v;
+ $v = (strpos(utf8_decode(str_replace('?', '', $v)), '?') === false) ? utf8_decode($v) : $v;
+ /* custom hacks, mainly caused by bugs in PHP's json_decode */
+ $mappings = array(
+ '%18' => '',
+ '%19' => '',
+ '%1C' => '',
+ '%1D' => '',
+ '%1E' => '',
+ '%10' => '',
+ '%12' => '',
+ '%13' => '',
+ '%14' => '',
+ '%26' => '&',
+ );
+ $froms = array_keys($mappings);
+ $tos = array_values($mappings);
+ foreach ($froms as $i => $from) $froms[$i] = urldecode($from);
+ $v = str_replace($froms, $tos, $v);
+ /* utf8 tweaks */
return preg_replace_callback('/([\x00-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3}|[\xf8-\xfb][\x80-\xbf]{4}|[\xfc-\xfd][\x80-\xbf]{5}|[^\x00-\x7f])/', array('ARC2', 'getUTF8Char'), $v);
}
- function getUTF8Char($v) {
+ static function getUTF8Char($v) {
$val = $v[1];
if (strlen(trim($val)) === 1) return utf8_encode($val);
if (preg_match('/^([\x00-\x7f])(.+)/', $val, $m)) return $m[1] . ARC2::toUTF8($m[2]);
@@ -134,7 +155,7 @@ function getUTF8Char($v) {
/* */
- function splitURI($v) {
+ static function splitURI($v) {
/* the following namespaces may lead to conflated URIs,
* we have to set the split position manually
*/
@@ -163,7 +184,7 @@ function splitURI($v) {
/* */
- function getSimpleIndex($triples, $flatten_objects = 1, $vals = '') {
+ static function getSimpleIndex($triples, $flatten_objects = 1, $vals = '') {
$r = array();
foreach ($triples as $t) {
$skip_t = 0;
@@ -211,7 +232,7 @@ function getSimpleIndex($triples, $flatten_objects = 1, $vals = '') {
return $r;
}
- function getTriplesFromIndex($index) {
+ static function getTriplesFromIndex($index) {
$r = array();
foreach ($index as $s => $ps) {
foreach ($ps as $p => $os) {
@@ -231,7 +252,7 @@ function getTriplesFromIndex($index) {
return $r;
}
- function getMergedIndex() {
+ static function getMergedIndex() {
$r = array();
foreach (func_get_args() as $index) {
foreach ($index as $s => $ps) {
@@ -249,7 +270,7 @@ function getMergedIndex() {
return $r;
}
- function getCleanedIndex() {/* removes triples from a given index */
+ static function getCleanedIndex() {/* removes triples from a given index */
$indexes = func_get_args();
$r = $indexes[0];
for ($i = 1, $i_max = count($indexes); $i < $i_max; $i++) {
@@ -296,16 +317,25 @@ function getCleanedIndex() {/* removes triples from a given index */
/* */
- function getStructType($v) {
+ static function getStructType($v) {
/* string */
if (is_string($v)) return 'string';
- /* triples */
- if (isset($v[0]) && isset($v[0]['s']) && isset($v[0]['p'])) return 'triples';
- /* index */
- foreach ($v as $s => $ps) {
- if (is_array($ps)) {
+ /* flat array, numeric keys */
+ if (in_array(0, array_keys($v))) {/* numeric keys */
+ /* simple array */
+ if (!is_array($v[0])) return 'array';
+ /* triples */
+ //if (isset($v[0]) && isset($v[0]['s']) && isset($v[0]['p'])) return 'triples';
+ if (in_array('p', array_keys($v[0]))) return 'triples';
+ }
+ /* associative array */
+ else {
+ /* index */
+ foreach ($v as $s => $ps) {
+ if (!is_array($ps)) break;
foreach ($ps as $p => $os) {
- if (is_array($os) && isset($os[0]) && isset($os[0]['value'])) return 'index';
+ if (!is_array($os) || !is_array($os[0])) break;
+ if (in_array('value', array_keys($os[0]))) return 'index';
}
}
}
@@ -315,7 +345,7 @@ function getStructType($v) {
/* */
- function getComponent($name, $a = '', $caller = '') {
+ static function getComponent($name, $a = '', $caller = '') {
ARC2::inc($name);
$prefix = 'ARC2';
if (preg_match('/^([^\_]+)\_(.+)$/', $name, $m)) {
@@ -329,119 +359,125 @@ function getComponent($name, $a = '', $caller = '') {
/* resource */
- function getResource($a = '') {
+ static function getResource($a = '') {
return ARC2::getComponent('Resource', $a);
}
+ /* reader */
+
+ static function getReader($a = '') {
+ return ARC2::getComponent('Reader', $a);
+ }
+
/* parsers */
- function getParser($prefix, $a = '') {
+ static function getParser($prefix, $a = '') {
return ARC2::getComponent($prefix . 'Parser', $a);
}
- function getRDFParser($a = '') {
+ static function getRDFParser($a = '') {
return ARC2::getParser('RDF', $a);
}
- function getRDFXMLParser($a = '') {
+ static function getRDFXMLParser($a = '') {
return ARC2::getParser('RDFXML', $a);
}
- function getTurtleParser($a = '') {
+ static function getTurtleParser($a = '') {
return ARC2::getParser('Turtle', $a);
}
- function getRSSParser($a = '') {
+ static function getRSSParser($a = '') {
return ARC2::getParser('RSS', $a);
}
- function getSemHTMLParser($a = '') {
+ static function getSemHTMLParser($a = '') {
return ARC2::getParser('SemHTML', $a);
}
- function getSPARQLParser($a = '') {
+ static function getSPARQLParser($a = '') {
return ARC2::getComponent('SPARQLParser', $a);
}
- function getSPARQLPlusParser($a = '') {
+ static function getSPARQLPlusParser($a = '') {
return ARC2::getParser('SPARQLPlus', $a);
}
- function getSPARQLXMLResultParser($a = '') {
+ static function getSPARQLXMLResultParser($a = '') {
return ARC2::getParser('SPARQLXMLResult', $a);
}
- function getJSONParser($a = '') {
+ static function getJSONParser($a = '') {
return ARC2::getParser('JSON', $a);
}
- function getSGAJSONParser($a = '') {
+ static function getSGAJSONParser($a = '') {
return ARC2::getParser('SGAJSON', $a);
}
- function getCBJSONParser($a = '') {
+ static function getCBJSONParser($a = '') {
return ARC2::getParser('CBJSON', $a);
}
- function getSPARQLScriptParser($a = '') {
+ static function getSPARQLScriptParser($a = '') {
return ARC2::getParser('SPARQLScript', $a);
}
/* store */
- function getStore($a = '', $caller = '') {
+ static function getStore($a = '', $caller = '') {
return ARC2::getComponent('Store', $a, $caller);
}
- function getStoreEndpoint($a = '', $caller = '') {
+ static function getStoreEndpoint($a = '', $caller = '') {
return ARC2::getComponent('StoreEndpoint', $a, $caller);
}
- function getRemoteStore($a = '', $caller = '') {
+ static function getRemoteStore($a = '', $caller = '') {
return ARC2::getComponent('RemoteStore', $a, $caller);
}
- function getMemStore($a = '') {
+ static function getMemStore($a = '') {
return ARC2::getComponent('MemStore', $a);
}
/* serializers */
- function getSer($prefix, $a = '') {
+ static function getSer($prefix, $a = '') {
return ARC2::getComponent($prefix . 'Serializer', $a);
}
- function getTurtleSerializer($a = '') {
+ static function getTurtleSerializer($a = '') {
return ARC2::getSer('Turtle', $a);
}
- function getRDFXMLSerializer($a = '') {
+ static function getRDFXMLSerializer($a = '') {
return ARC2::getSer('RDFXML', $a);
}
- function getNTriplesSerializer($a = '') {
+ static function getNTriplesSerializer($a = '') {
return ARC2::getSer('NTriples', $a);
}
- function getRDFJSONSerializer($a = '') {
+ static function getRDFJSONSerializer($a = '') {
return ARC2::getSer('RDFJSON', $a);
}
- function getPOSHRDFSerializer($a = '') {/* deprecated */
+ static function getPOSHRDFSerializer($a = '') {/* deprecated */
return ARC2::getSer('POSHRDF', $a);
}
- function getMicroRDFSerializer($a = '') {
+ static function getMicroRDFSerializer($a = '') {
return ARC2::getSer('MicroRDF', $a);
}
- function getRSS10Serializer($a = '') {
+ static function getRSS10Serializer($a = '') {
return ARC2::getSer('RSS10', $a);
}
/* sparqlscript */
- function getSPARQLScriptProcessor($a = '') {
+ static function getSPARQLScriptProcessor($a = '') {
return ARC2::getComponent('SPARQLScriptProcessor', $a);
}
View
142 app/arc/ARC2_Class.php
@@ -6,74 +6,71 @@
* @license <http://arc.semsol.org/license>
* @homepage <http://arc.semsol.org/>
* @package ARC2
- * @version 2010-04-23
-*/
+ */
class ARC2_Class {
- /* */
-
- function __construct($a = '', &$caller) {
- $a = is_array($a) ? $a : array();
- $this->a = $a;
- $this->caller = &$caller;
+ function __construct($a, &$caller) {
+ $this->a = is_array($a) ? $a : array();
+ $this->caller = $caller;
$this->__init();
}
- function ARC2_Class($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
- function __destruct() {
- //echo "\ndestructing " . get_class($this);
- }
-
function __init() {/* base, time_limit */
if (!$_POST && isset($GLOBALS['HTTP_RAW_POST_DATA'])) parse_str($GLOBALS['HTTP_RAW_POST_DATA'], $_POST); /* php5 bug */
$this->inc_path = ARC2::getIncPath();
$this->ns_count = 0;
- $this->nsp = array('http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf');
- $this->used_ns = array('http://www.w3.org/1999/02/22-rdf-syntax-ns#');
- $this->ns = $this->v('ns', array(), $this->a);
+ $rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->nsp = array($rdf => 'rdf');
+ $this->used_ns = array($rdf);
+ $this->ns = array_merge(array('rdf' => $rdf), $this->v('ns', array(), $this->a));
$this->base = $this->v('base', ARC2::getRequestURI(), $this->a);
$this->errors = array();
$this->warnings = array();
$this->adjust_utf8 = $this->v('adjust_utf8', 0, $this->a);
$this->max_errors = $this->v('max_errors', 25, $this->a);
+ $this->has_pcre_unicode = @preg_match('/\pL/u', 'test');/* \pL = block/point which is a Letter */
}
/* */
function v($name, $default = false, $o = false) {/* value if set */
- if ($o === false) $o =& $this;
+ if ($o === false) $o = $this;
if (is_array($o)) {
return isset($o[$name]) ? $o[$name] : $default;
}
return isset($o->$name) ? $o->$name : $default;
}
function v1($name, $default = false, $o = false) {/* value if 1 (= not empty) */
- if ($o === false) $o =& $this;
+ if ($o === false) $o = $this;
if (is_array($o)) {
return (isset($o[$name]) && $o[$name]) ? $o[$name] : $default;
}
return (isset($o->$name) && $o->$name) ? $o->$name : $default;
}
function m($name, $a = false, $default = false, $o = false) {/* call method */
- if ($o === false) $o =& $this;
+ if ($o === false) $o = $this;
return method_exists($o, $name) ? $o->$name($a) : $default;
}
/* */
- function camelCase($v, $lc_first = 0) {
+ function camelCase($v, $lc_first = 0, $keep_boundaries = 0) {
$r = ucfirst($v);
while (preg_match('/^(.*)[^a-z0-9](.*)$/si', $r, $m)) {
+ /* don't fuse 2 upper-case chars */
+ if ($keep_boundaries && $m[1]) {
+ $boundary = substr($m[1], -1);
+ if (strtoupper($boundary) == $boundary) $m[1] .= 'CAMELCASEBOUNDARY';
+ }
$r = $m[1] . ucfirst($m[2]);
}
- return $r && $lc_first && !preg_match('/[A-Z]/', $r[1]) ? strtolower($r[0]) . substr($r, 1) : $r;
+ $r = str_replace('CAMELCASEBOUNDARY', '_', $r);
+ if ((strlen($r) > 1) && $lc_first && !preg_match('/[A-Z]/', $r[1])) $r = strtolower($r[0]) . substr($r, 1);
+ return $r;
}
function deCamelCase($v, $uc_first = 0) {
@@ -82,15 +79,40 @@ function deCamelCase($v, $uc_first = 0) {
return $uc_first ? ucfirst($r) : $r;
}
+ /**
+ * Tries to extract a somewhat human-readable label from a URI.
+ */
+
function extractTermLabel($uri, $loops = 0) {
list($ns, $r) = $this->splitURI($uri);
- $r = $this->deCamelCase($this->camelCase($r, 1));
+ /* encode apostrophe + s */
+ $r = str_replace('%27s', '_apostrophes_', $r);
+ /* normalize */
+ $r = $this->deCamelCase($this->camelCase($r, 1, 1));
+ /* decode apostrophe + s */
+ $r = str_replace(' apostrophes ', "'s ", $r);
+ /* typical RDF non-info URI */
if (($loops < 1) && preg_match('/^(self|it|this|me)$/i', $r)) {
return $this->extractTermLabel(preg_replace('/\#.+$/', '', $uri), $loops + 1);
}
+ /* trailing hash or slash */
if ($uri && !$r && ($loops < 2)) {
return $this->extractTermLabel(preg_replace('/[\#\/]$/', '', $uri), $loops + 1);
}
+ /* a de-camel-cased URL (will look like "www example com") */
+ if (preg_match('/^www (.+ [a-z]{2,4})$/', $r, $m)) {
+ return $this->getPrettyURL($uri);
+ }
+ return $r;
+ }
+
+ /**
+ * Generates a less ugly in-your-face URL.
+ */
+
+ function getPrettyURL($r) {
+ $r = rtrim($r, '/');
+ $r = preg_replace('/^https?\:\/\/(www\.)?/', '', $r);
return $r;
}
@@ -135,12 +157,14 @@ function splitURI($v) {
function getPName($v, $connector = ':') {
/* is already a pname */
- if ($ns = $this->getPNameNamespace($v, $connector)) {
+ $ns = $this->getPNameNamespace($v, $connector);
+ if ($ns) {
if (!in_array($ns, $this->used_ns)) $this->used_ns[] = $ns;
return $v;
}
/* new pname */
- if ($parts = $this->splitURI($v)) {
+ $parts = $this->splitURI($v);
+ if ($parts) {
/* known prefix */
foreach ($this->ns as $prefix => $ns) {
if ($parts[0] == $ns) {
@@ -180,9 +204,9 @@ function getPrefix($ns) {
function expandPName($v, $connector = ':') {
$re = '/^([a-z0-9\_\-]+)\:([a-z0-9\_\-\.\%]+)$/i';
if ($connector != ':') {
- $connectors = array('\:', '\-', '\_', '\.');
- $chars = join('', array_diff($connectors, array($connector)));
- $re = '/^([a-z0-9' . $chars . ']+)\\' . $connector . '([a-z0-9\_\-\.\%]+)$/i';
+ $connectors = array(':', '-', '_', '.');
+ $chars = '\\' . join('\\', array_diff($connectors, array($connector)));
+ $re = '/^([a-z0-9' . $chars . ']+)\\' . $connector . '([a-z0-9\_\-\.\%]+)$/Ui';
}
if (preg_match($re, $v, $m) && isset($this->ns[$m[1]])) {
return $this->ns[$m[1]] . $m[2];
@@ -376,10 +400,12 @@ function toLegacyHTML($v, $ns = '') {
return $ser->getSerializedArray($v);
}
- function toHTML($v, $ns = '') {
+ function toHTML($v, $ns = '', $label_store = '') {
ARC2::inc('MicroRDFSerializer');
if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
- $ser = new ARC2_MicroRDFSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ $conf = array_merge($this->a, array('ns' => $ns));
+ if ($label_store) $conf['label_store'] = $label_store;
+ $ser = new ARC2_MicroRDFSerializer($conf, $this);
return (isset($v[0]) && isset($v[0]['s'])) ? $ser->getSerializedTriples($v) : $ser->getSerializedIndex($v);
}
@@ -446,6 +472,56 @@ function mdAttrs($id, $type = '') {
return $this->getMicrodataAttrs($id, $type);
}
- /* */
+ /* central DB query hook */
+
+ function queryDB($sql, $con, $log_errors = 0) {
+ $t1 = ARC2::mtime();
+ $r = mysql_query($sql, $con);
+ if (0) {
+ $t2 = ARC2::mtime() - $t1;
+ $call_obj = $this;
+ $call_path = '';
+ while ($call_obj) {
+ $call_path = get_class($call_obj) . ' / ' . $call_path;
+ $call_obj = isset($call_obj->caller) ? $call_obj->caller : false;
+ }
+ echo "\n" . $call_path . " needed " . $t2 . ' secs for ' . str_replace("\n" , ' ', $sql);;
+ }
+ if ($log_errors && ($er = mysql_error($con))) $this->addError($er);
+ return $r;
+ }
+
+ /**
+ * Shortcut method to create an RDF/XML backup dump from an RDF Store object.
+ */
+ function backupStoreData($store, $target_path, $offset = 0) {
+ $limit = 10;
+ $q = '
+ SELECT DISTINCT ?s WHERE {
+ ?s ?p ?o .
+ }
+ ORDER BY ?s
+ LIMIT ' . $limit . '
+ ' . ($offset ? 'OFFSET ' . $offset : '') . '
+ ';
+ $rows = $store->query($q, 'rows');
+ $tc = count($rows);
+ $full_tc = $tc + $offset;
+ $mode = $offset ? 'ab' : 'wb';
+ $fp = fopen($target_path, $mode);
+ foreach ($rows as $row) {
+ $index = $store->query('DESCRIBE <' . $row['s'] . '>', 'raw');
+ if ($index) {
+ $doc = $this->toRDFXML($index);
+ fwrite($fp, $doc . "\n\n");
+ }
+ }
+ fclose($fp);
+ if ($tc == 10) {
+ set_time_limit(300);
+ $this->backupStoreData($store, $target_path, $offset + $limit);
+ }
+ return $full_tc;
+ }
}
View
135 app/arc/ARC2_Reader.php
@@ -6,21 +6,17 @@
* @license <http://arc.semsol.org/license>
* @homepage <http://arc.semsol.org/>
* @package ARC2
- * @version 2010-03-31
+ * @version 2010-11-16
*/
ARC2::inc('Class');
class ARC2_Reader extends ARC2_Class {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_Reader($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {/* inc_path, proxy_host, proxy_port, proxy_skip, http_accept_header, http_user_agent_header, max_redirects */
parent::__init();
$this->http_method = $this->v('http_method', 'GET', $this->a);
@@ -84,64 +80,87 @@ function activate($path, $data = '', $ping_only = 0, $timeout = 0) {
}
}
+ /*
+ * HTTP Basic/Digest + Proxy authorization can be defined in the
+ * arc_reader_credentials config setting:
+
+ 'arc_reader_credentials' => array(
+ 'http://basic.example.com/' => 'user:pass', // shortcut for type=basic
+ 'http://digest.example.com/' => 'user::pass', // shortcut for type=digest
+ 'http://proxy.example.com/' => array('type' => 'basic', 'proxy', 'user' => 'user', 'pass' => 'pass'),
+ ),
+
+ */
+
function setCredentials($url) {
if (!$creds = $this->v('arc_reader_credentials', array(), $this->a)) return 0;
- foreach ($creds as $pattern => $cred) {
+ foreach ($creds as $pattern => $creds) {
+ /* digest shortcut (user::pass) */
+ if (!is_array($creds) && preg_match('/^(.+)\:\:(.+)$/', $creds, $m)) {
+ $creds = array('type' => 'digest', 'user' => $m[1], 'pass' => $m[2]);
+ }
+ /* basic shortcut (user:pass) */
+ if (!is_array($creds) && preg_match('/^(.+)\:(.+)$/', $creds, $m)) {
+ $creds = array('type' => 'basic', 'user' => $m[1], 'pass' => $m[2]);
+ }
+ if (!is_array($creds)) return 0;
$regex = '/' . preg_replace('/([\:\/\.\?])/', '\\\\\1', $pattern) . '/';
if (!preg_match($regex, $url)) continue;
- $parts = parse_url($url);
- $path = $this->v1('path', '/', $parts);
- /* Basic auth */
- $auth = 'Basic ' . base64_encode($cred);
- /* Digest auth */
- if (preg_match('/(.*)\:\:(.*)/', $cred, $m)) {
- $username = $m[1];
- $pwd = $m[2];
- $auth = '';
- $hs = $this->getResponseHeaders();
- /* 401 received */
- $h = $this->v('www-authenticate', '', $hs);
- if ($h && preg_match('/Digest/i', $h)) {
- $auth = 'Digest ';
- /* Digest realm="$realm", nonce="$nonce", qop="auth", opaque="$opaque" */
- $ks = array('realm', 'nonce', 'opaque');/* skipping qop, assuming "auth" */
- foreach ($ks as $i => $k) {
- $$k = preg_match('/' . $k . '=\"?([^\"]+)\"?/i', $h, $m) ? $m[1] : '';
- $auth .= ($i ? ', ' : '') . $k . '="' . $$k . '"';
- $this->auth_infos[$k] = $$k;
- }
- $this->auth_infos['auth'] = $auth;
- $this->auth_infos['request_count'] = 1;
- }
- /* 401 or repeated request */
- if ($this->v('auth', 0, $this->auth_infos)) {
- $qop = 'auth';
- $auth = $this->auth_infos['auth'];
- $rc = $this->auth_infos['request_count'];
- $realm = $this->auth_infos['realm'];
- $nonce = $this->auth_infos['nonce'];
- $ha1 = md5($username . ':' . $realm . ':' . $pwd);
- $ha2 = md5($this->http_method . ':' . $path);
- $nc = dechex($rc);
- $cnonce = dechex($rc * 2);
- $resp = md5($ha1 . ':' . $nonce . ':' . $nc . ':' . $cnonce . ':' . $qop . ':' . $ha2);
- $auth .= ', username="' . $username . '"' .
- ', uri="' . $path . '"' .
- ', qop=' . $qop . '' .
- ', nc=' . $nc .
- ', cnonce="' . $cnonce . '"' .
- ', uri="' . $path . '"' .
- ', response="' . $resp . '"' .
- '';
- $this->auth_infos['request_count'] = $rc + 1;
- }
- }
- /* add header */
- if ($auth) {
- $this->addCustomHeaders('Authorization: ' . $auth);
- break;
+ $mthd = 'set' . $this->camelCase($creds['type']) . 'AuthCredentials';
+ if (method_exists($this, $mthd)) $this->$mthd($creds, $url);
+ }
+ }
+
+ function setBasicAuthCredentials($creds) {
+ $auth = 'Basic ' . base64_encode($creds['user'] . ':' . $creds['pass']);
+ $h = in_array('proxy', $creds) ? 'Proxy-Authorization' : 'Authorization';
+ $this->addCustomHeaders($h . ': ' . $auth);
+ //echo $h . ': ' . $auth . print_r($creds, 1);
+ }
+
+ function setDigestAuthCredentials($creds, $url) {
+ $path = $this->v1('path', '/', parse_url($url));
+ $auth = '';
+ $hs = $this->getResponseHeaders();
+ /* initial 401 */
+ $h = $this->v('www-authenticate', '', $hs);
+ if ($h && preg_match('/Digest/i', $h)) {
+ $auth = 'Digest ';
+ /* Digest realm="$realm", nonce="$nonce", qop="auth", opaque="$opaque" */
+ $ks = array('realm', 'nonce', 'opaque');/* skipping qop, assuming "auth" */
+ foreach ($ks as $i => $k) {
+ $$k = preg_match('/' . $k . '=\"?([^\"]+)\"?/i', $h, $m) ? $m[1] : '';
+ $auth .= ($i ? ', ' : '') . $k . '="' . $$k . '"';
+ $this->auth_infos[$k] = $$k;
}
+ $this->auth_infos['auth'] = $auth;
+ $this->auth_infos['request_count'] = 1;
+ }
+ /* initial 401 or repeated request */
+ if ($this->v('auth', 0, $this->auth_infos)) {
+ $qop = 'auth';
+ $auth = $this->auth_infos['auth'];
+ $rc = $this->auth_infos['request_count'];
+ $realm = $this->auth_infos['realm'];
+ $nonce = $this->auth_infos['nonce'];
+ $ha1 = md5($creds['user'] . ':' . $realm . ':' . $creds['pass']);
+ $ha2 = md5($this->http_method . ':' . $path);
+ $nc = dechex($rc);
+ $cnonce = dechex($rc * 2);
+ $resp = md5($ha1 . ':' . $nonce . ':' . $nc . ':' . $cnonce . ':' . $qop . ':' . $ha2);
+ $auth .= ', username="' . $creds['user'] . '"' .
+ ', uri="' . $path . '"' .
+ ', qop=' . $qop . '' .
+ ', nc=' . $nc .
+ ', cnonce="' . $cnonce . '"' .
+ ', uri="' . $path . '"' .
+ ', response="' . $resp . '"' .
+ '';
+ $this->auth_infos['request_count'] = $rc + 1;
}
+ if (!$auth) return 0;
+ $h = in_array('proxy', $creds) ? 'Proxy-Authorization' : 'Authorization';
+ $this->addCustomHeaders($h . ': ' . $auth);
}
/* */
View
14 app/arc/ARC2_Resource.php
@@ -6,21 +6,17 @@
* @license http://arc.semsol.org/license
* @homepage <http://arc.semsol.org/>
* @package ARC2
- * @version 2010-02-23
+ * @version 2011-01-19
*/
ARC2::inc('Class');
class ARC2_Resource extends ARC2_Class {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_Resource($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
$this->uri = '';
@@ -39,6 +35,10 @@ function setIndex($index) {
$this->index = $index;
}
+ function getIndex($index) {
+ return $this->index;
+ }
+
function setProps($props, $s = '') {
if (!$s) $s = $this->uri;
$this->index[$s] = $props;
@@ -69,7 +69,7 @@ function fetchData($uri = '') {
if (in_array($uri, $this->fetched)) return 0;
$this->index[$uri] = array();
if ($this->store) {
- $index = $this->store->query('DESCRIBE <' . $uri . '>', 'raw');
+ $index = $this->store->query('CONSTRUCT { <' . $uri . '> ?p ?o . } WHERE { <' . $uri . '> ?p ?o . } ', 'raw');
}
else {
$index = $this->toIndex($uri);
View
434 app/arc/ARC2_TestHandler.php
@@ -0,0 +1,434 @@
+<?php
+/*
+homepage: http://arc.web-semantics.org/
+license: http://arc.web-semantics.org/license
+
+class: ARC2 DAWG Test Handler
+author: Benjamin Nowack
+version: 2011-12-01
+*/
+
+ARC2::inc('Class');
+
+class ARC2_TestHandler extends ARC2_Class {
+
+ function __construct($a, &$caller, &$data_store) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ $this->data_store = $data_store;
+ }
+
+ function __init() {
+ parent::__init();
+ $this->store = $this->caller;
+ ARC2::inc('Reader');
+ $this->reader = new ARC2_Reader($this->a, $this);
+ }
+
+ /* */
+
+ function runTest($id) {
+ $type = $this->getTestType($id);
+ $m = 'run' . $type;
+ $r = method_exists($this, $m) ? $this->$m($id) : array('pass' => 0, 'info' => 'not supported');
+ sleep(1);
+ return $r;
+ }
+
+ /* */
+
+ function getTestType($id) {
+ $q = 'SELECT ?type WHERE { <' .$id. '> a ?type . }';
+ $qr = $this->store->query($q);
+ $r = isset($qr['result']['rows'][0]) ? $qr['result']['rows'][0]['type'] : '#QueryEvaluationTest';
+ $r = preg_replace('/^.*\#([^\#]+)$/', '$1', $r);
+ return $r;
+ }
+
+ /* */
+
+ function getFile($url) {
+ $fname = 'f' . crc32($url) . '.txt';
+ if (!file_exists('tmp/' . $fname)) {
+ $r = '';
+ if (!isset($this->reader)) {
+ $this->reader = new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->activate($url);
+ while ($d = $this->reader->readStream()) {
+ $r .= $d;
+ }
+ $this->reader->closeStream();
+ unset($this->reader);
+ $fp = @fopen('tmp/' . $fname, "w");
+ @fwrite($fp, $r);
+ @fclose($fp);
+ return $r;
+ }
+ return file_get_contents('tmp/' . $fname);
+ }
+
+ function runPositiveSyntaxTest($id) {
+ $nl = "\n";
+ $r = '';
+ /* get action */
+ $q = '
+ PREFIX mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+ SELECT DISTINCT ?action WHERE { <' .$id. '> mf:action ?action . }
+ ';
+ $qr = $this->store->query($q);
+ $action = $qr['result']['rows'][0]['action'];
+ /* get code */
+ $q = $this->getFile($action);
+ /* parse */
+ ARC2::inc('SPARQLPlusParser');
+ $parser = new ARC2_SPARQLPlusParser($this->a, $this);
+ $parser->parse($q, $action);
+ $infos = $parser->getQueryInfos();
+ $rest = $parser->getUnparsedCode();
+ $errors = $parser->getErrors();
+ $r .= $nl . '<div style="border: #eee solid 1px ; padding: 5px; ">' . htmlspecialchars($q) . '</div>' . $nl ;
+ if ($errors || $rest) {
+ $pass = 0;
+ $r .= htmlspecialchars($nl . $nl . print_r($errors, 1) . $nl . print_r($rest, 1));
+ }
+ else {
+ $pass = 1;
+ $r .= htmlspecialchars($nl . $nl . print_r($infos, 1));
+ }
+ return array('pass' => $pass, 'info' => $r);
+ }
+
+ /* */
+
+ function runNegativeSyntaxTest($id) {
+ $nl = "\n";
+ $r = '';
+ /* get action */
+ $q = '
+ PREFIX mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+ SELECT DISTINCT ?action WHERE { <' .$id. '> mf:action ?action . }
+ ';
+ $qr = $this->store->query($q);
+ $action = $qr['result']['rows'][0]['action'];
+ /* get code */
+ $q = $this->getFile($action);
+ /* parse */
+ ARC2::inc('SPARQLPlusParser');
+ $parser = new ARC2_SPARQLPlusParser($this->a, $this);
+ $parser->parse($q, $action);
+ $infos = $parser->getQueryInfos();
+ $rest = $parser->getUnparsedCode();
+ $errors = $parser->getErrors();
+ $r .= $nl . '<div style="border: #eee solid 1px ; padding: 5px; ">' . htmlspecialchars($q) . '</div>' . $nl ;
+ if ($errors || $rest) {
+ $pass = 1;
+ $r .= htmlspecialchars($nl . $nl . print_r($errors, 1) . $nl . print_r($rest, 1));
+ }
+ else {
+ $pass = 0;
+ $r .= htmlspecialchars($nl . $nl . print_r($infos, 1));
+ }
+ return array('pass' => $pass, 'info' => $r);
+ }
+
+ /* */
+
+ function runQueryEvaluationTest($id) {
+ $nl = "\n";
+ $r = '';
+ /* get action */
+ $q = '
+ PREFIX mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+ PREFIX qt: <http://www.w3.org/2001/sw/DataAccess/tests/test-query#> .
+ SELECT DISTINCT ?query ?data ?graph_data ?result WHERE {
+ <' .$id. '> mf:action ?action ;
+ mf:result ?result .
+ ?action qt:query ?query .
+ OPTIONAL {
+ ?action qt:data ?data .
+ }
+ OPTIONAL {
+ ?action qt:graphData ?graph_data .
+ }
+ }
+ ';
+ $qr = $this->store->query($q);
+ $rows = $qr['result']['rows'];
+ $infos = array();
+ foreach (array('query', 'data', 'result', 'graph_data') as $var) {
+ $infos[$var] = array();
+ $infos[$var . '_value'] = array();
+ foreach ($rows as $row) {
+ if (isset($row[$var])) {
+ if (!in_array($row[$var], $infos[$var])) {
+ $infos[$var][] = $row[$var];
+ $infos[$var . '_value'][] = $this->getFile($row[$var]);
+ }
+ }
+ }
+ $$var = $infos[$var];
+ ${$var . '_value'} = $infos[$var . '_value'];
+ if (count($infos[$var]) == 1) {
+ $$var = $infos[$var][0];
+ ${$var . '_value'} = $infos[$var . '_value'][0];
+ }
+ if ($$var && ($var != '-result')) {
+ //echo '<pre>' . $$var . $nl . $nl . htmlspecialchars(${$var . '_value'}) . '</pre><hr />';
+ }
+ }
+ /* query infos */
+ ARC2::inc('SPARQLPlusParser');
+ $parser = new ARC2_SPARQLPlusParser($this->a, $this);
+ $parser->parse($query_value, $query);
+ $infos = $parser->getQueryInfos();
+ $rest = $parser->getUnparsedCode();
+ $errors = $parser->getErrors();
+ $q_type = !$errors ? $infos['query']['type'] : '';
+ /* add data */
+ $dsets = array();
+ $gdsets = array();
+ if ($data) {
+ $dsets = is_array($data) ? array_merge($dsets, $data) : array_merge($dsets, array($data));
+ }
+ if ($graph_data) {
+ $gdsets = is_array($graph_data) ? array_merge($gdsets, $graph_data) : array_merge($gdsets, array($graph_data));
+ }
+ if (!$dsets && !$gdsets) {
+ foreach ($infos['query']['dataset'] as $set) {
+ if ($set['named']) {
+ $gdsets[] = $set['graph'];
+ }
+ else {
+ $dsets[] = $set['graph'];
+ }
+ }
+ }
+ $store = $this->data_store;
+ $store->reset();
+ foreach ($dsets as $graph) {
+ $qr = $store->query('LOAD <' .$graph. '>');
+ }
+ foreach ($gdsets as $graph) {
+ $qr = $store->query('LOAD <' .$graph. '> INTO <' .$graph. '>');
+ }
+ /* run query */
+ if ($query) {
+ $sql = $store->query($query_value, 'sql', $query);
+ $qr = $store->query($query_value, '', $query);
+ $qr_result = $qr['result'];
+ if ($q_type == 'select') {
+ $qr_result = $this->adjustBnodes($qr['result'], $id);
+ }
+ elseif ($q_type == 'construct') {
+ $ser = ARC2::getTurtleSerializer($this->a);
+ $qr_result = $ser->getSerializedIndex($qr_result);
+ }
+ }
+ //echo '<pre>query result: ' . $nl . htmlspecialchars(print_r($qr_result, 1)) . '</pre>';
+ if (!$query || $errors || $rest) {
+ return array('pass' => 0, 'info' => 'query could not be parsed' . htmlspecialchars($query_value));
+ }
+ $m = 'isSame' . $q_type . 'Result';
+ $sub_r = $this->$m($qr_result, $result_value, $result, $id);
+ $pass = $sub_r['pass'];
+ if (in_array($id, array(
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest#dawg-sort-6',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest#dawg-sort-8',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest#dawg-sort-builtin',
+ ))) {
+ $pass = 0; /* manually checked 2007-09-18 */
+ }
+ if (in_array($id, array(
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest#dawg-sort-function',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest#reduced-1',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest#reduced-2',
+ ))) {
+ $pass = 1; /* manually checked 2007-11-28 */
+ }
+ $pass_info = $sub_r['info'];
+ $info = print_r($pass_info, 1) . $nl;
+ $info .= '<hr />sql: ' . $nl . htmlspecialchars($sql['result']) . '<hr />';
+ $info .= $pass ? '' : print_r($graph_data, 1) . $nl . htmlspecialchars(print_r($graph_data_value, 1)) . '<hr />';
+ $info .= $pass ? '' : print_r($data, 1) . $nl . htmlspecialchars(print_r($data_value, 1)) . '<hr />';
+ $info .= $pass ? '' : $query . $nl . htmlspecialchars($query_value) . '<hr />';
+ $info .= $pass ? '' : '<pre>query result: ' . $nl . htmlspecialchars(print_r($qr_result, 1)) . '</pre>' . '<hr />';
+ $info .= $pass ? '' : print_r($infos, 1);
+ return array('pass' => $pass, 'info' => $info);
+ }
+
+ /* */
+
+ function isSameSelectResult($qr, $result, $result_base) {
+ if (strpos($result, 'http://www.w3.org/2001/sw/DataAccess/tests/result-set#')) {
+ $parser = ARC2::getRDFParser($this->a);
+ $parser->parse($result_base, $result);
+ $index = $parser->getSimpleIndex(0);
+ //echo '<pre>' . print_r($index, 1) .'</pre>';
+ $valid_qr = $this->buildTurtleSelectQueryResult($index);
+ }
+ else {
+ $parser = ARC2::getSPARQLXMLResultParser($this->a);
+ $parser->parse('', $result);
+ $valid_qr = $parser->getStructure();
+ }
+ if (isset($valid_qr['boolean'])) {
+ $pass = $valid_qr['boolean'] == $this->v('boolean', '', $qr);
+ }
+ else {
+ $pass = 1;
+ if (count($valid_qr['variables']) != count($qr['variables'])) {
+ $pass = 0;
+ }
+ if (count($valid_qr['rows']) != count($qr['rows'])) {
+ $pass = 0;
+ }
+ if ($pass) {
+ foreach ($valid_qr['variables'] as $var) {
+ if (!in_array($var, $qr['variables'])) {
+ $pass = 0;
+ break;
+ }
+ }
+ }
+ if ($pass) {
+ $index = $this->buildArrayHashIndex($qr['rows']);
+ $valid_index = $this->buildArrayHashIndex($valid_qr['rows']);
+ if (($diff = array_diff($index, $valid_index)) || ($diff = array_diff($valid_index, $index))) {
+ $pass = 0;
+ //echo '<pre>' . print_r($diff, 1) . '</pre>';
+ }
+ }
+ }
+ return array('pass' => $pass, 'info' => $valid_qr);
+ }
+
+ /* */
+
+ function isSameConstructResult($qr, $result, $result_base, $test) {
+ $parser = ARC2::getRDFParser($this->a);
+ $parser->parse('', $result);
+ $valid_triples = $parser->getTriples();
+ $parser = ARC2::getRDFParser($this->a);
+ $parser->parse('', $qr);
+ $triples = $parser->getTriples();
+ $info = '<pre>' . print_r($valid_triples, 1) .'</pre>';
+ $info = '';
+
+ //echo '<pre>' . print_r($index, 1) .'</pre>';
+ $pass = 0;
+ if (in_array($test, array(/* manually checked 2007-09-21 */
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/construct/manifest#construct-1',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/construct/manifest#construct-2',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/construct/manifest#construct-3',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/construct/manifest#construct-4',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/construct/manifest#construct-5',
+ ))) {
+ $pass = 1;
+ }
+ return array('pass' => $pass, 'info' => $valid_triples);
+ }
+
+ /* */
+
+ function isSameAskResult($qr, $result, $result_base) {
+ if (preg_match('/(true|false)\.(ttl|n3)$/', $result_base, $m)) {
+ $valid_r = $m[1];
+ }
+ else {
+ $valid_r = preg_match('/boolean\>([^\<]+)/s', $result, $m) ? trim($m[1]) : '-';
+ }
+ $r = ($qr === true) ? 'true' : 'false';
+ $pass = ($r == $valid_r) ? 1 : 0;
+ return array('pass' => $pass, 'info' => $valid_r);
+ }
+
+ /* */
+
+ function buildTurtleSelectQueryResult($index) {
+ $rs = 'http://www.w3.org/2001/sw/DataAccess/tests/result-set#';
+ $rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $r = array('variables' => array(), 'rows' => array());
+ foreach ($index as $node => $props) {
+ $types = $this->v($rdf . 'type', array(), $props);
+ foreach ($types as $type) {
+ if ($type['value'] == $rs . 'ResultSet') {
+ $vars = $this->v($rs . 'resultVariable', array(), $props);
+ foreach ($vars as $var) {
+ $r['variables'][] = $var['value'];
+ }
+ }
+ }
+ $bindings = $this->v($rs . 'binding', array(), $props);
+ if ($bindings) {
+ $row = array();
+ foreach ($bindings as $binding) {
+ $binding_id = $binding['value'];
+ $var = $index[$binding_id][$rs . 'variable'][0]['value'];
+ $val = $index[$binding_id][$rs . 'value'][0]['value'];
+ $val_type = $index[$binding_id][$rs . 'value'][0]['type'];
+ //$val_type = preg_match('/literal/', $val_type) ? 'literal' : $val_type;
+ $row[$var] = $val;
+ $row[$var . ' type'] = $val_type;
+ if ($dt = $this->v('datatype', 0, $index[$binding_id][$rs . 'value'][0])) {
+ $row[$var . ' datatype'] = $dt;
+ }
+ if ($lang = $this->v('lang', 0, $index[$binding_id][$rs . 'value'][0])) {
+ $row[$var . ' lang'] = $lang;
+ }
+ }
+ $r['rows'][] = $row;
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function buildArrayHashIndex($rows) {
+ $r = array();
+ foreach ($rows as $row) {
+ $hash = '';
+ ksort($row);
+ foreach ($row as $k => $v) {
+ $hash .= is_numeric($k) ? '' : ' ' . md5($k) . ' ' . md5($v);
+ }
+ $r[] = $hash;
+ }
+ return $r;
+ }
+
+ /* */
+
+ function adjustBnodes($result, $data) {
+ $mappings = array(
+ '_:b1371233574_bob' => '_:b10',
+ '_:b1114277307_alice' => '_:b1f',
+ '_:b1368422168_eve' => '_:b20',
+ '_:b1638119969_fred' => '_:b21',
+
+ '_:b288335586_a' => array(
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#no-distinct-3' => '_:b0',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-3' => '_:b0',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-9' => '_:b0',
+ 'http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#no-distinct-9' => '_:b0',
+ 'default' => '_:bn5',
+ ),
+ );
+ if (isset($result['rows'])) {
+ foreach ($result['rows'] as $i => $row) {
+ foreach ($result['variables'] as $var) {
+ if (isset($row[$var]) && isset($mappings[$row[$var]])) {
+ if (is_array($mappings[$row[$var]])) {
+ $result['rows'][$i][$var] = isset($mappings[$row[$var]][$data]) ? $mappings[$row[$var]][$data] : $mappings[$row[$var]]['default'];
+ }
+ else {
+ $result['rows'][$i][$var] = $mappings[$row[$var]];
+ }
+ }
+ }
+ }
+ }
+ return $result;
+ }
+
+}
View
2 app/arc/ARC2_getFormat.php
@@ -5,7 +5,7 @@
* @author Benjamin Nowack <bnowack@semsol.com>
* @license http://arc.semsol.org/license
* @package ARC2
- * @version 2010-01-18
+ * @version 2010-11-16
*/
function ARC2_getFormat($v, $mtype = '', $ext = '') {
View
23 app/arc/ARC2_getPreferredFormat.php
@@ -1,28 +1,31 @@
<?php
-/*
-homepage: http://arc.semsol.org/
-license: http://arc.semsol.org/license
-
-function: result format detection
-author: Benjamin Nowack
-version: 2008-08-04
+/**
+ * ARC2 result format detection
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-11-16
*/
function ARC2_getPreferredFormat($default = 'plain') {
$formats = array(
'html' => 'HTML', 'text/html' => 'HTML', 'xhtml+xml' => 'HTML',
'rdfxml' => 'RDFXML', 'rdf+xml' => 'RDFXML',
- 'ntriples' => 'NTriples', 'rdf+n3' => 'Turtle', 'x-turtle' => 'Turtle', 'turtle' => 'Turtle',
+ 'ntriples' => 'NTriples',
+ 'rdf+n3' => 'Turtle', 'x-turtle' => 'Turtle', 'turtle' => 'Turtle', 'text/turtle' => 'Turtle',
'rdfjson' => 'RDFJSON', 'json' => 'RDFJSON',
'xml' => 'XML',
'legacyjson' => 'LegacyJSON'
);
$prefs = array();
$o_vals = array();
/* accept header */
- if ($vals = explode(',', $_SERVER['HTTP_ACCEPT'])) {
+ $vals = explode(',', $_SERVER['HTTP_ACCEPT']);
+ if ($vals) {
foreach ($vals as $val) {
- if (preg_match('/(rdf\+n3|x\-turtle|rdf\+xml|text\/html|xhtml\+xml|xml|json)/', $val, $m)) {
+ if (preg_match('/(rdf\+n3|(x\-|text\/)turtle|rdf\+xml|text\/html|xhtml\+xml|xml|json)/', $val, $m)) {
$o_vals[$m[1]] = 1;
if (preg_match('/\;q\=([0-9\.]+)/', $val, $sub_m)) {
$o_vals[$m[1]] = 1 * $sub_m[1];
View
8 app/arc/extractors/ARC2_DcExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 DC Extractor
author: Benjamin Nowack
-version: 2008-04-09 (Fix: base URL (not doc URL) was used for annotations)
+version: 2010-11-16
*/
ARC2::inc('RDFExtractor');
class ARC2_DcExtractor extends ARC2_RDFExtractor {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_DcExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
$this->a['ns']['dc'] = 'http://purl.org/dc/elements/1.1/';
View
8 app/arc/extractors/ARC2_ErdfExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 eRDF Extractor (w/o link title generation)
author: Benjamin Nowack
-version: 2009-02-09 (Tweak: getRootNode returns 1st node if html tag is not found)
+version: 2010-11-16
*/
ARC2::inc('RDFExtractor');
class ARC2_ErdfExtractor extends ARC2_RDFExtractor {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_ErdfExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
}
View
14 app/arc/extractors/ARC2_MicroformatsExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 microformats Extractor
author: Benjamin Nowack
-version:
+version: 2010-11-16
*/
ARC2::inc('ARC2_PoshRdfExtractor');
class ARC2_MicroformatsExtractor extends ARC2_PoshRdfExtractor {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_MicroformatsExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
$this->terms = $this->getTerms();
@@ -49,8 +45,8 @@ function preProcessNode($n) {
}
}
}
- $n['a']['class m'] = split(' ', $n['a']['class']);
- $n['a']['rel m'] = split(' ', $n['a']['rel']);
+ $n['a']['class m'] = preg_split('/ /', $n['a']['class']);
+ $n['a']['rel m'] = preg_split('/ /', $n['a']['rel']);
return $n;
}
@@ -103,7 +99,7 @@ function getTerms() {
'education' => array('s', 'o', 'scope' => array('hresume')),
'email' => array('s', 'o', 'scope' => array('vcard')),
'entry-title' => array('o', 'scope' => array('hentry')),
- 'entry-content' => array('o', 'scope' => array('hentry')),
+ 'entry-content' => array('o-xml', 'scope' => array('hentry')),
'entry-summary' => array('o', 'scope' => array('hentry')),
'experience' => array('s', 'o', 'scope' => array('hresume')),
'extended-address' => array('o', 'scope' => array('adr')),
View
8 app/arc/extractors/ARC2_OpenidExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 foaf:openid Extractor
author: Benjamin Nowack
-version: 2007-10-08
+version: 2010-11-16
*/
ARC2::inc('RDFExtractor');
class ARC2_OpenidExtractor extends ARC2_RDFExtractor {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_OpenidExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
$this->a['ns']['foaf'] = 'http://xmlns.com/foaf/0.1/';
View
8 app/arc/extractors/ARC2_PoshRdfExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 poshRDF Extractor
author: Benjamin Nowack
-version:
+version: 2010-11-16
*/
ARC2::inc('ARC2_RDFExtractor');
class ARC2_PoshRdfExtractor extends ARC2_RDFExtractor {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_PoshRdfExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
$this->terms = $this->v('posh_terms', array(), $this->a);
View
16 app/arc/extractors/ARC2_RDFExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 RDF Extractor
author: Benjamin Nowack
-version: 2008-11-18 (Fix: Skip comments. Thanks to Masahide Kanzaki)
+version: 2010-11-16
*/
ARC2::inc('Class');
class ARC2_RDFExtractor extends ARC2_Class {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_RDFExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
$this->nodes = $this->caller->getNodes();
@@ -36,14 +32,6 @@ function x($re, $v, $options = 'si') {
return ARC2::x($re, $v, $options);
}
- function camelCase($v) {
- $r = ucfirst($v);
- while (preg_match('/^(.*)[\-\_ ](.*)$/', $r, $m)) {
- $r = $m[1] . ucfirst($m[2]);
- }
- return $r;
- }
-
function createBnodeID(){
$this->bnode_id++;
return '_:' . $this->bnode_prefix . $this->bnode_id;
View
8 app/arc/extractors/ARC2_RdfaExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 RDFa Extractor
author: Benjamin Nowack
-version: 2009-05-29 (Fix: CURIEs support DOTs now)
+version: 2010-11-16
*/
ARC2::inc('RDFExtractor');
class ARC2_RdfaExtractor extends ARC2_RDFExtractor {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_RdfaExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
}
View
8 app/arc/extractors/ARC2_TwitterProfilePicExtractor.php
@@ -5,21 +5,17 @@
class: ARC2 Extractor
author: Benjamin Nowack
-version: 2008-12-09
+version: 2010-11-16
*/
ARC2::inc('RDFExtractor');
class ARC2_TwitterProfilePicExtractor extends ARC2_RDFExtractor {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_TwitterProfilePicExtractor($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {
parent::__init();
$this->a['ns']['foaf'] = 'http://xmlns.com/foaf/0.1/';
View
12 app/arc/parsers/ARC2_AtomParser.php
@@ -5,21 +5,17 @@
class: ARC2 Atom Parser
author: Benjamin Nowack
-version: 2009-04-21 (Addition: support for link types)
+version: 2010-11-16
*/
ARC2::inc('LegacyXMLParser');
class ARC2_AtomParser extends ARC2_LegacyXMLParser {
- function __construct($a = '', &$caller) {
+ function __construct($a, &$caller) {
parent::__construct($a, $caller);
}
- function ARC2_AtomParser($a = '', &$caller) {
- $this->__construct($a, $caller);
- }
-
function __init() {/* reader */
parent::__init();
$this->triples = array();
@@ -42,7 +38,7 @@ function done() {
/* */
function setReader(&$reader) {
- $this->reader =& $reader;
+ $this->reader = $reader;
}
function createBnodeID(){
@@ -239,7 +235,7 @@ function initXMLParser() {
xml_set_character_data_handler($parser, 'cData');
xml_set_start_namespace_decl_handler($parser, 'nsDecl');
xml_set_object($parser, $this);
- $this->xml_parser =& $parser;
+ $this->xml_parser = $parser;
}
}
View
10 app/arc/parsers/ARC2_CBJSONParser.php
@@ -6,21 +6,17 @@
* @license http://arc.semsol.org/license
* @homepage <http://arc.semsol.org/>
* @package ARC2
- * @version 2010-03-25
+ * @version 2010-11-16
*/
ARC2::inc('JSONParser');
class ARC2_CBJSONParser extends ARC2_JSONParser {
-