Skip to content

Commit

Permalink
- Improving the core.
Browse files Browse the repository at this point in the history
- Adding a primary-key from dataset file.
  • Loading branch information
crodas committed May 25, 2009
1 parent fd430d3 commit 5da3f76
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 12 deletions.
6 changes: 3 additions & 3 deletions examples/invert-index/task.php
Expand Up @@ -9,16 +9,16 @@ final class InvertIndex extends Job
{
function __config()
{
$this->setInput("noticias/*");
$this->setOutput("index");
$this->setInput("news.txt");
$this->setOutput("invert-index");
$this->setReducers(1);
}

function map($key, &$value)
{
$words = array();
$value = strtolower($value);
foreach (preg_split("/[^a-z]/i", $value) as $word) {
foreach (preg_split("/[^a-záéíóúüñ]/i", $value) as $word) {
$words[$word] = 1;
}
foreach ($words as $word=>$id) {
Expand Down
2 changes: 2 additions & 0 deletions examples/k-means/sample.php
Expand Up @@ -10,6 +10,8 @@

hadoop::setHome("/home/crodas/hadoop/hadoop-0.18.3");

exit();

$hadoop = new Hadoop;

/* create an invert index for fast computation */
Expand Down
16 changes: 16 additions & 0 deletions examples/pk/index.php
@@ -0,0 +1,16 @@
<?php
include("../../src/hadoop.php");

include("task.php");


hadoop::setHome("/home/crodas/hadoop/hadoop-0.18.3");

/* run the task */
try {
new Index();
} catch (Exception $e) {
print $e->getMessage()."\n";
}

?>
33 changes: 33 additions & 0 deletions examples/pk/task.php
@@ -0,0 +1,33 @@
<?php

/**
* Invert Index
*
* This class contains an Invert Index.
*/
final class Index extends Job
{
function __config()
{
$this->setInput("news.txt");
$this->setOutput("index");
$this->setOption(HAS_NO_REDUCER);
$this->setMappers(1);
}

function map($key, &$value)
{
static $i=0;
if (trim($key) == "") {
throw new Exception("Invalid index $key $value");
}
$this->Emit(++$i, $key);
}

function reduce($key, &$values)
{
}

}

?>
28 changes: 24 additions & 4 deletions src/hadoop.php
Expand Up @@ -6,6 +6,8 @@
ini_set("memory_limit", "200M");


define("HAS_NO_REDUCER", 2);

/**
* Hadoop Class.
*
Expand All @@ -14,7 +16,7 @@
*/
abstract class Hadoop
{
private static $_path;
private static $_path = false;
private static $_injob = false;
private static $_fncWatch = false;
private $_ipath = false;
Expand All @@ -23,6 +25,8 @@ abstract class Hadoop
private $_tmp;
private $_reduce;
private $_map = false;
private $_nomap = false;
private $_noreduce = false;

final function __construct()
{
Expand All @@ -33,10 +37,20 @@ final function __construct()
if ($this->_ipath === false || $this->_opath === false) {
throw new Exception("No input or output path configured");
}
if (!self::$_path) {
throw new Exception("No Hadoop-home");
}
$this->_tmp = array(tempnam("/tmp", "map"), tempnam("/tmp", "red"));
$this->_execTask();
}

final protected function setOption($options)
{
if ($options & HAS_NO_REDUCER) {
$this->_noreduce = true;
}
}

final static function import($file)
{
include(dirname(__FILE__)."/$file");
Expand Down Expand Up @@ -156,6 +170,7 @@ private function _buildTempFiles()
/* save the map */
$map = str_replace("/*name*/", $info->getName(), $map);
$map = str_replace("/*include*/", $includ, $map);
$map = str_replace("/*hadoop-home*/", self::$_path, $map);
if (array_search($info->getFileName(), get_included_files()) === 0) {
$map = str_replace("/*class*/", $code, $map);
}
Expand All @@ -165,6 +180,7 @@ private function _buildTempFiles()
/* save the reduce */
$reduce = str_replace("/*name*/", $info->getName(), $reduce);
$reduce = str_replace("/*include*/", $includ, $reduce);
$reduce = str_replace("/*hadoop-home*/", self::$_path, $reduce);
if (array_search($info->getFileName(), get_included_files()) === 0) {
$reduce = str_replace("/*class*/", $code, $reduce);
}
Expand All @@ -184,13 +200,17 @@ private function _getCmd()
$opath = $this->_opath;
$path = self::$_path;

$cmd = sprintf("%sbin/hadoop jar %s -input %s -output %s -mapper %s -reducer %s -file %s -file %s -file %s",
$cmd = sprintf("%sbin/hadoop jar %s -input %s -output %s -file %s -file %s -file %s",
$path, $path.$jarpath, $ipath, $opath,
basename($this->_getFileName("map")),
basename($this->_getFileName("reduce")),
$this->_getFileName("map"), $this->_getFileName("reduce"),__FILE__
);

$cmd .= " -mapper ".basename($this->_getFileName("map"));

if (!$this->_noreduce) {
$cmd .= " -reducer ".basename($this->_getFileName("reducer"));
}

/* set number of mappers */
if (is_int($this->_map)) {
$cmd .= " -jobconf mapred.map.tasks=".$this->_map;
Expand Down
11 changes: 6 additions & 5 deletions src/job.php
Expand Up @@ -40,15 +40,16 @@ final function runMap()
{
$this->__init();
while (($line = fgets(STDIN)) !== false) {
$line = substr($line, 0, strlen($line)-1);
$line = trim($line);
if (strlen($line) == 0) {
continue;
}
$input = $this->mapParser($line);
if (count($input) == 1) {
$input[1] = $input[0];
$input[0] = null;
}

if (trim($input[0]) == "") {
throw new Exception($line);
}

$this->unserialize($input[1]);
$this->map($input[0], $input[1]);
}
Expand Down
1 change: 1 addition & 0 deletions src/map.php
Expand Up @@ -5,6 +5,7 @@
/*class*/

hadoop::initMapper();
hadoop::setHome("/*hadoop-home*/");

$map = new /*name*/;
$map->RunMap();
Expand Down
1 change: 1 addition & 0 deletions src/reduce.php
Expand Up @@ -4,6 +4,7 @@

/*class*/
hadoop::initReducer();
hadoop::setHome("/*hadoop-home*/");

$map = new /*name*/;
$map->RunReduce();
Expand Down

0 comments on commit 5da3f76

Please sign in to comment.