Skip to content

Commit

Permalink
initial implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
cebe committed Apr 24, 2014
1 parent 698ffab commit 90226a3
Show file tree
Hide file tree
Showing 12 changed files with 653 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
jssearch.index.js

/vendor/
133 changes: 133 additions & 0 deletions bin/jsindex
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env php
<?php

// Send all errors to stderr
ini_set('display_errors', 'stderr');

// setup composer autoloading
$composerAutoload = [
__DIR__ . '/../vendor/autoload.php', // standalone with "composer install" run
__DIR__ . '/../../../autoload.php', // script is installed as a composer binary
];
foreach ($composerAutoload as $autoload) {
if (file_exists($autoload)) {
require($autoload);
break;
}
}

if (!class_exists('cebe\jssearch\Indexer')) {
error('Autoloading does not seem to work. Looks like you should run `composer install` first.');
}

// check arguments
$src = [];
foreach($argv as $k => $arg) {
if ($k == 0) {
continue;
}
if ($arg[0] == '-') {
$arg = explode('=', $arg);
switch($arg[0]) {
// TODO allow baseUrl to be set via arg
case '-h':
case '--help':
echo "jssearch index builder\n";
echo "----------------------\n\n";
echo "by Carsten Brandt <mail@cebe.cc>\n\n";
usage();
break;
default:
error("Unknown argument " . $arg[0], "usage");
}
} else {
$src[] = $arg;
}
}

if (empty($src)) {
error("You have to give an input directory.", "usage");
}

$indexer = new \cebe\jssearch\Indexer();

$files = [];
foreach($src as $dir) {
$files = array_merge($files, findFiles($dir));

if (empty($files)) {
error("No files where found in $dir.");
}

$indexer->indexFiles($files, $dir);
}

$js = $indexer->exportJs();
file_put_contents('jssearch.index.js', $js);


// functions

/**
* Display usage information
*/
function usage() {
global $argv;
$cmd = $argv[0];
echo <<<EOF
Usage:
$cmd [src-directory]
--help shows this usage information.
creates and jssearch.index.js file in the current directory.
EOF;
exit(1);
}

/**
* Send custom error message to stderr
* @param $message string
* @param $callback mixed called before script exit
* @return void
*/
function error($message, $callback = null) {
$fe = fopen("php://stderr", "w");
fwrite($fe, "Error: " . $message . "\n");

if (is_callable($callback)) {
call_user_func($callback);
}

exit(1);
}

function findFiles($dir, $ext = '.html')
{
if (!is_dir($dir)) {
error("$dir is not a directory.");
}
$dir = rtrim($dir, DIRECTORY_SEPARATOR);
$list = [];
$handle = opendir($dir);
if ($handle === false) {
error('Unable to open directory: ' . $dir);
}
while (($file = readdir($handle)) !== false) {
if ($file === '.' || $file === '..') {
continue;
}
$path = $dir . DIRECTORY_SEPARATOR . $file;
if (substr($file, -($l = strlen($ext)), $l) === $ext) {
if (is_file($path)) {
$list[] = $path;
} else {
$list = array_merge($list, findFiles($path, $ext));
}
}
}
closedir($handle);

return $list;
}
19 changes: 19 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "cebe/js-search",
"description": "A client side search engine for use on static pages.",
"license": "MIT",
"authors": [
{
"name": "Carsten Brandt",
"email": "mail@cebe.cc"
}
],
"require": {
"php": ">=5.4.0"
},
"autoload": {
"psr-4": {
"cebe\\jssearch\\": "lib/"
}
}
}
50 changes: 50 additions & 0 deletions example.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<!doctype html>
<html>
<head>
<meta charset="UTF-8">

<script src="http://code.jquery.com/jquery-1.11.0.min.js"></script>
<script src="./jssearch.js"></script>
<script src="./jssearch.index.js"></script>

<script type="text/javascript">

$( document ).ready(function() {
$('#searchbox').on("keyup", function() {
var result = $.jssearch.search($(this).val());

$('#query').html($.jssearch.queryWords.join(' '));

$('#results').html('');
var i = 0;
result.forEach(function(item) {
if (i++ > 20) {
return;
}
var div = $('#results');
div.html(div.html() + '<li>"' + item.file.title + '" ' + item.file.url + ' w:' + item.weight + '</li>');
});
});
});
</script>

<title>Example</title>
</head>
<body>

<h1>Example</h1>

<label for="searchbox" style="display: inline-block; width: 160px;">Search: </label>
<input id="searchbox" type="text" value="">

<br/>

<label for="query" style="display: inline-block; width: 160px;">Actual query: </label>
<span id="query"></span>

<ul id="results">
<li>No results</li>
</ul>

</body>
</html>
80 changes: 80 additions & 0 deletions jssearch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

$.jssearch = {

/**
* the actual words finally used to query (set by last search call)
*/
queryWords: [],

search: function(query) {
var words = $.jssearch.tokenizeString(query);
var result = {};

$.jssearch.queryWords = words.map(function(i) { return i.t; });

// do not search when no words given
if (words.length == 0) {
return result;
}

// result = $.jssearch.searchForWords(words);
// if ($.isEmptyObject(result)) {
words = $.jssearch.completeWords(words);
$.jssearch.queryWords = words.map(function(i) { return i.t; });
result = $.jssearch.searchForWords(words);
// }

var res = [];
for (var i in result) {
res.push(result[i]);
}
res.sort(function(a,b) { return b.weight - a.weight; });
return res;
},

searchForWords: function(words) {
var result = {};
words.forEach(function(word) {
if ($.jssearch.index[word.t]) {
$.jssearch.index[word.t].forEach(function(file) {
if (result[file.f]) {
result[file.f].weight *= file.w * word.w;
} else {
result[file.f] = {
file: $.jssearch.files[file.f],
weight: file.w * word.w
};
}
});
}
});
return result;
},

completeWords: function(words) {
var result = [];

words.forEach(function(word) {
if (!$.jssearch.index[word.t] && word.t.length > 2) {
// complete words that are not in the index
for(var w in $.jssearch.index) {
if (w.substr(0, word.t.length) == word.t) {
result.push({t: w, w: 1});
}
}
} else {
// keep existing words
result.push(word);
}
});
return result;
},

tokenizeString: function(string)
{
if (console) {
console.log('Error: tokenizeString should have been overwritten by index JS file.')
}
return [{t: string, w: 1}];
}
};
28 changes: 28 additions & 0 deletions lib/AnalyzerInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php
/**
* @copyright Copyright (c) 2014 Carsten Brandt
* @license https://github.com/cebe/js-search/blob/master/LICENSE
* @link https://github.com/cebe/js-search#readme
*/

namespace cebe\jssearch;

/**
* Interface for all Tokenizers.
*
* @author Carsten Brandt <mail@cebe.cc>
*/
interface AnalyzerInterface
{
/**
* Analyzes a string and returns an array of the following format:
*
* TODO
* ```
* ```
*
* @param string $string the string to analyze
* @return array
*/
public function analyze($string, TokenizerInterface $tokenizer);
}
Loading

0 comments on commit 90226a3

Please sign in to comment.