Skip to content

Commit

Permalink
update search.php to remove check for roman chars, add html and xml f…
Browse files Browse the repository at this point in the history
…ormats, add additional fields per item
  • Loading branch information
wlhays committed Jun 6, 2014
1 parent 635c728 commit 48ae96b
Showing 1 changed file with 177 additions and 85 deletions.
262 changes: 177 additions & 85 deletions tgw/webservice/search.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,57 +7,37 @@
*
*
*/
function search_placename($conn, $name_key, $year_key) {
function search_placename($conn, $name_key, $year_key, $fmt = 'json') {

if (is_roman($name_key)) {

if(substr_compare($name_key, '$', -1, 1) === 0) { //test for last char '$' to indicate no wildcard
$name_key = substr($name_key, 0, -1);
} else {
$name_key = $name_key . '%';
}

$query = "SELECT pn.sys_id, pn.name, pn.transcription, concat(pn.beg_yr, '-', pn.end_yr) years, " .
"pn.ftype_vn 'feature type', pn.ftype_tr 'feature type transcription', " .
"parent.sys_id 'parent sys_id', parent.name 'parent name', parent.transcription 'parent transcription', " .
"concat(parent.beg_yr, '-', parent.end_yr) 'parent years' " .
"FROM mv_pn_srch pn LEFT JOIN part_of pof ON (pn.id = pof.child_id) " .
"LEFT JOIN mv_pn_srch parent ON (parent.id = pof.parent_id) " .
"WHERE pn.transcription LIKE ? ";

if ($year_key) {
$query .= "AND (pn.beg_yr >= ? AND pn.end_yr <= ? ) ";
}
if(substr_compare($name_key, '$', -1, 1) === 0) { //test for last char '$' to indicate no wildcard
$name_key = substr($name_key, 0, -1);
} else {
$name_key = $name_key . '%';
}

$query .= "ORDER BY pn.transcription, pn.sys_id limit " . MAX_SEARCH_HITS . ";";
$query = "SELECT pn.sys_id, pn.data_src, pn.name, pn.transcription, concat(pn.beg_yr, '-', pn.end_yr) years, " .
"pn.ftype_vn, pn.ftype_tr, " .
"parent.sys_id 'parent sys_id', parent.name 'parent name', parent.transcription 'parent transcription' " .
// "parent.beg_yr 'parent begin year', parent.end_yr 'parent end year' " .
"FROM mv_pn_srch pn LEFT JOIN part_of pof ON (pn.id = pof.child_id) " .
"LEFT JOIN mv_pn_srch parent ON (parent.id = pof.parent_id) " .
"WHERE pn.transcription LIKE ? ";

} else {
if ($year_key) {
$query .= "AND (pn.beg_yr >= ? AND pn.end_yr <= ? ) ";
}

$name_key .= '%';
$query = "SELECT pn.sys_id, pn.name, pn.transcription, concat(pn.beg_yr, '-', pn.end_yr) years, " .
"pn.ftype_vn 'feature type', pn.ftype_tr 'feature type transcription', " .
"parent.sys_id, parent.name 'parent name', parent.transcription 'parent transcription', " .
"concat(parent.beg_yr, '-', parent.end_yr) 'parent years' " .
"FROM mv_pn_srch pn LEFT JOIN part_of pof ON (pn.id = pof.child_id) " .
"LEFT JOIN mv_pn_srch parent ON (parent.id = pof.parent_id) " .
"WHERE pn.name LIKE ? ";

if ($year_key) {
$query .= "AND (pn.beg_yr >= ? AND pn.end_yr <= ? ) ";
}
$query .= "ORDER BY pn.transcription, pn.sys_id limit " . MAX_SEARCH_HITS . ";";

$query .= "ORDER BY pn.name, pn.sys_id limit " . MAX_SEARCH_HITS . ";";
}

if (!$stmt = $conn->prepare($query)) {
tlog(E_ERROR, "mysqli prepare failure: " . mysqli_error());
tlog(E_ERROR, "mysqli prepare failure: " . $conn->error);
alt_response(500);
return;
}

//echo 'name key = ' . $name_key;
//echo 'query = ' . $query;

if ($year_key) {
if (!$stmt->bind_param('sii', $name_key, $year_key, $year_key)) { // 'sii' means '1 string and 2 int params'
tlog(E_ERROR, "mysqli binding yr parameters failed: " . $stmt->errno . " - " . $stmt->error);
Expand All @@ -80,37 +60,172 @@ function search_placename($conn, $name_key, $year_key) {

$pns = array(); //indexed

$result = $stmt->get_result();
while ($row = $result->fetch_assoc()) {
$pns[] = $row;
$md = $stmt->result_metadata();
$params = array();
$fvals = array();

while($field = $md->fetch_field()) {
$params[] = &$fvals[$field->name];
}

call_user_func_array(array($stmt, 'bind_result'), $params);

while ($stmt->fetch()) {
// none of the "copy array" functions in PHP actually create new arrays
// hence the simple implementation here
$cp = array();
foreach($fvals as $key => $value) {
$cp[$key] = $value;
}
$pns[] = $cp;
}

$result->free();
$stmt->close();

$pns2 = reduce_parents($pns);
$pns2 = reduce_to_first_parent($pns);

switch($fmt) {
case 'json':
search_to_json($pns2, $name_key, $year_key); break;
case 'html':
search_to_html($pns2, $name_key, $year_key); break;
case 'xml':
search_to_xml($pns2, $name_key, $year_key); break;
default:
tlog(E_WARNING, "Invalid fmt type: " . $fmt);
//FIXME - output?? shouldn't really get here
}
}
}

$wrapper = array(
'memo' => "Results for query matching key '$name_key'" .
( $year_key ? " and year '$year_key'" : "") .
( (count($pns) >= MAX_SEARCH_HITS) ? ' Returned more than the maximum. Please refine your search.' : ''),
'count' => count($pns2),
'hits' => $pns2
);
// input: ordered placenames
// process: for repeated sys_ids, filter for the first parent in each group
// for alternative to display more than the first parent, see the function 'reduce parents' below
function reduce_to_first_parent($pns) {

header('Content-Type: text/json; charset=utf-8');
echo json_encode($wrapper, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT );
$pns3 = array();

$prev = null;
$max_parents = 6; //maximum number of parents to display

foreach($pns as $pn) {

if ($pn['parent sys_id'] == null) {
$parent = '';
} else {
$parent = $pn['parent name'] . " (" . $pn['parent transcription'] . ")";
}

$parent_count = 0;

if (($prev != null) && ($pn['sys_id'] == $prev['sys_id'])) {
//skip later parents
} else {

$pns3[] = array(
'sys_id' => $pn['sys_id'],
'uri' => 'http://chgis.hmdc.harvard.edu/placename/' . $pn['sys_id'],
'name' => $pn['name'],
'transcription' => $pn['transcription'],
'years' => $pn['years'],
'parent' => $parent,
'feature type' => $pn['ftype_vn'] . " (" . $pn['ftype_tr'] . ")",
'data source' => $pn['data_src']
);

$prev = $pn; //keep a reference to this pn for the next iteration
}
}

return $pns3;
}


function search_to_json($pns, $name_key, $year_key) {

$jt = "{\n";
$indent = ' ';
$depth = 0; //depth of indent

$jt .= jline('system', 'CHGIS - Harvard University & Fudan University', 1)
. jline('memo', "Results for query matching key '$name_key'"
. ( $year_key ? " and year '$year_key'" : "")
. ( (count($pns) >= MAX_SEARCH_HITS) ? ' Returned more than the maximum. Please refine your search.' : ''), 1)
. jline('count', count($pns), 1)
// . $indent . "\"hits\" : [\n"
. jarray('hits', $pns, 1, true)
// . $indent . "},\n"
. "}";

header('Content-Type: text/json; charset=utf-8');
echo $jt;
}

function search_to_xml($pns, $name_key, $year_key) {

$t = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
. "<search-results system=\"CHGIS - Harvard University and Fudan University\""
. " count=\"" . count($pns) . "\">\n"

. " <placenames>\n"
. ((count($pns) >= MAX_SEARCH_HITS) ? " <memo>Returned more than the maximum. Please refine your search.</memo>\n" : "");

foreach($pns as $pn) {
$t .= " <placename sys_id=\"" . $pn['sys_id'] . "\">\n"
. " <uri>" . $pn['uri'] . "</uri>\n"
. " <name>" . $pn['name'] . "</name>\n"
. " <transcription>" . $pn['transcription'] . "</transcription>\n"
. " <years>" . $pn['years'] . "</years>\n"
. " <parent>" . $pn['parent'] . "</parent>\n"
. " <feature-type>" . $pn['feature type'] . "</feature-type>\n"
. " <data-source>" . $pn['data source'] . "</data-source>\n"
. " </placename>\n";
}


$t .= " </placenames>\n"
. "</search-results>";



header('Content-Type: text/xml; charset=utf-8');
echo $t;
}

function search_to_html($pns, $name_key, $year_key) {

$t = "<!DOCTYPE html>\n"
. "<html>\n"
. "<body>\n";

// introductory remarks here

// <div class="results">

foreach ($pns as $pn) {
$t .= " <dl class=\"pn\">" . $pn['sys_id']
. " <dt class=\"pnt\"></dt><dd class=\"pnd\"></dd>\n"
// etc.
. "</dl>\n";
}

$t .= "</body>\n"
. "</html>";

header('Content-Type: text/html; charset=utf-8');
echo $t;
}

// use this to capture multiple parents (NOT CURRENTLY USED CODE)
// input: ordered placenames
// process: for repeated sys_ids reformat as a single item with a compound field for multiple parents
/*
function reduce_parents($pns) {
// create new array
$pns2 = array();
$prev = null;
$max = 6; //maximum number of parents to concatenate
$max_parents = 6; //maximum number of parents to display
foreach($pns as $pn) {
Expand All @@ -121,20 +236,25 @@ function reduce_parents($pns) {
'sys_id' => $pn['parent sys_id'],
'name' => $pn['parent name'],
'transcription' => $pn['parent transcription'],
'years' => $pn['parent years']
'years' => $pn['parent begin year'] . " ~ " . $pn['parent end year']
);
}
$parent_count = 0;
if (($prev != null) && ($pn['sys_id'] == $prev['sys_id'])) {
$pns2[count($pns2) - 1]['parents'][] = $parent;
if ($parent_count++ < $max_parents) {
$pns2[count($pns2) - 1]['parents'][] = $parent;
}
} else {
$pns2[] = array(
'sys_id' => $pn['sys_id'],
'name' => $pn['name'],
'transcription' => $pn['transcription'],
'years' => $pn['years'],
'parents' => ($parent == null ? array() : array($parent))
'parents' => ($parent == null ? array() : array($parent)),
'data source' => $pn['data_src']
);
$prev = $pn; //keep a reference to this pn for the next iteration
Expand All @@ -143,34 +263,6 @@ function reduce_parents($pns) {
return $pns2;
}

function is_roman($str) {
$c = mb_substr($str, 0, 1, 'utf-8');
$v = unicode_ord($c);
return ($v > 31 && $v < 254);
}

/*
* found at: http://stackoverflow.com/questions/9361303/can-i-get-the-unicode-value-of-a-character-or-vise-versa-with-php
* true source unknown
*
*/
function unicode_ord($c) {
if (ord($c{0}) >=0 && ord($c{0}) <= 127)
return ord($c{0});
if (ord($c{0}) >= 192 && ord($c{0}) <= 223)
return (ord($c{0})-192)*64 + (ord($c{1})-128);
if (ord($c{0}) >= 224 && ord($c{0}) <= 239)
return (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128);
if (ord($c{0}) >= 240 && ord($c{0}) <= 247)
return (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128);
if (ord($c{0}) >= 248 && ord($c{0}) <= 251)
return (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128);
if (ord($c{0}) >= 252 && ord($c{0}) <= 253)
return (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128);
if (ord($c{0}) >= 254 && ord($c{0}) <= 255) // error
return FALSE;
return 0;
}
*/

?>

0 comments on commit 48ae96b

Please sign in to comment.