Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial import; extracted from the rest of the project
- Loading branch information
Showing
6 changed files
with
510 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,51 @@ | |||
This code provides some syntactic sugar on top of Hadoop in order to make | |||
it more usable from Scala. Take a look at Examples.scala for more | |||
details. | |||
|
|||
A basic mapper looks like | |||
|
|||
object TokenizerMap extends TypedMapper[LongWritable, Text, Text, LongWritable] { | |||
override def map(k: LongWritable, v: Text, context: ContextType) : Unit = | |||
v split " \t" foreach ((word) => context.write(word, 1L)) | |||
} | |||
|
|||
or, you can also write it as | |||
|
|||
object TokenizerMap1 extends TypedMapper[LongWritable, Text, Text, LongWritable] { | |||
override def doMap : Unit = v split " |\t" foreach ((word) => context.write(word, 1L)) | |||
} | |||
|
|||
and a reducer | |||
|
|||
object SumReducer1 extends TypedReducer[Text, LongWritable, Text, LongWritable] { | |||
override def doReduce :Unit = context.write(k, (0L /: v) ((total, next) => total+next)) | |||
} | |||
|
|||
Note that implicit conversion is used to convert between LongWritable and longs, as well as Text | |||
and Strings. The types of the input and output parameters only need to be stated as the | |||
generic specliazers of the class it extends. | |||
|
|||
These mappers and reducers can be chained together with the --> operator | |||
|
|||
object WordCount extends ScalaHadoopTool{ | |||
def run(args: Array[String]) : Int = { | |||
(MapReduceTaskChain.init() --> | |||
IO.Text[LongWritable, Text](args(0)).input --> | |||
MapReduceTask.MapReduceTask(TokenizerMap1, SumReducer) --> | |||
IO.Text[Text, LongWritable](args(1)).output) execute; | |||
return 0; | |||
} | |||
} | |||
|
|||
Multiple map/reduce runs can be chained together | |||
|
|||
object WordsWithSameCount extends ScalaHadoopTool { | |||
def run(args: Array[String]) : Int = { | |||
(MapReduceTaskChain.init() --> | |||
IO.Text[LongWritable, Text](args(0)).input --> | |||
MapReduceTask.MapReduceTask(TokenizerMap1, SumReducer) --> | |||
MapReduceTask.MapReduceTask(FlipKeyValueMap, WordListReducer) --> | |||
IO.Text[LongWritable, Text](args(1)).output) execute; | |||
return 0; | |||
} | |||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,52 @@ | |||
<project default="jar"> | |||
<target name="clean"> | |||
<delete dir="build"/> | |||
</target> | |||
|
|||
|
|||
<target name="compile"> | |||
<mkdir dir="build/classes"/> | |||
<scalac srcdir="src" destdir="build/classes" classpathref="classpath" deprecation="yes"/> | |||
<javac srcdir="src" destdir="build/classes" classpathref="classpath"/> | |||
<copy todir="build/classes/lib"> | |||
<fileset dir="/opt/local/share/scala-2.8/lib/" includes="scala-lib*"/> | |||
</copy> | |||
</target> | |||
|
|||
<target name="doc"> | |||
<mkdir dir="doc"/> | |||
<scaladoc srcdir="src/ScalaHadoop" destdir="doc" classpathref="classpath"> | |||
<include name="*.scala"/> | |||
<include name="*.java"/> | |||
</scaladoc> | |||
</target> | |||
|
|||
<target name="jar" depends="compile"> | |||
<jar destfile="ScalaHadoop.jar" | |||
basedir="build/classes" | |||
/> | |||
</target> | |||
|
|||
<target name="run"> | |||
<exec executable="/Users/asimma/local/hadoop-0.20.2/bin/hadoop"> | |||
<arg line="jar ScalaHadoop.jar com.asimma.ScalaHadoop.WordCount"/> | |||
</exec> | |||
</target> | |||
|
|||
<path id="classpath"> | |||
<fileset dir="/Users/asimma/local/hadoop-0.20.2" includes="**/*.jar"/> | |||
<fileset dir="/opt/local/share/scala-2.8/lib" includes="*.jar"/> | |||
</path> | |||
|
|||
|
|||
|
|||
<property name="scala.lib.dir" location="/opt/local/share/scala-2.8/lib"/> | |||
<taskdef resource="scala/tools/ant/antlib.xml"> | |||
<classpath> | |||
<pathelement location="${scala.lib.dir}/scala-compiler.jar"/> | |||
<pathelement location="${scala.lib.dir}/scala-library.jar"/> | |||
<pathelement location="${scala.lib.dir}/scalap.jar"/> | |||
</classpath> | |||
</taskdef> | |||
|
|||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,69 @@ | |||
package com.asimma.ScalaHadoop; | |||
|
|||
import org.apache.hadoop.io._; | |||
import MapReduceTaskChain._; | |||
import ImplicitConversion._; | |||
import scala.reflect.Manifest; | |||
import scala.collection.JavaConversions._ | |||
|
|||
|
|||
|
|||
|
|||
object TokenizerMap extends TypedMapper[LongWritable, Text, Text, LongWritable] { | |||
override def map(k: LongWritable, v: Text, context: ContextType) : Unit = | |||
v split " \t" foreach ((word) => context.write(word, 1L)) | |||
} | |||
|
|||
object TokenizerMap1 extends TypedMapper[LongWritable, Text, Text, LongWritable] { | |||
override def doMap : Unit = v split " |\t" foreach ((word) => context.write(word, 1L)) | |||
} | |||
|
|||
object FlipKeyValueMap extends TypedMapper[Text, LongWritable, LongWritable, Text] { | |||
override def map(k: Text, v:LongWritable, context: ContextType) : Unit = | |||
context.write(v, k); | |||
} | |||
|
|||
object SumReducer extends TypedReducer[Text, LongWritable, Text, LongWritable] { | |||
override def reduce(k: Text, v: java.lang.Iterable[LongWritable], context: ContextType) : Unit = | |||
context.write(k, (0L /: v) ((total, next) => total+next)) | |||
} | |||
|
|||
|
|||
object SumReducer1 extends TypedReducer[Text, LongWritable, Text, LongWritable] { | |||
override def doReduce :Unit = context.write(k, (0L /: v) ((total, next) => total+next)) | |||
} | |||
|
|||
object WordListReducer extends TypedReducer[LongWritable, Text, LongWritable, Text] { | |||
override def doReduce: Unit = context write (k, (new StringBuilder /: v) ((soFar, newString) => soFar.append(newString + " "))); | |||
|
|||
/*If you're not comfortable with reduce, could also be written as | |||
val builder = newStringBuilder; | |||
v foreach(t => builder.append (t + " ")) | |||
context write (k,v toString) | |||
*/ | |||
} | |||
|
|||
|
|||
|
|||
object WordCount extends ScalaHadoopTool{ | |||
def run(args: Array[String]) : Int = { | |||
val c = MapReduceTaskChain.init() --> | |||
IO.Text[LongWritable, Text](args(0)).input --> | |||
MapReduceTask.MapReduceTask(TokenizerMap1, SumReducer) --> | |||
IO.Text[Text, LongWritable](args(1)).output; | |||
c.execute(); | |||
return 0; | |||
} | |||
} | |||
|
|||
object WordsWithSameCount extends ScalaHadoopTool { | |||
def run(args: Array[String]) : Int = { | |||
val c = MapReduceTaskChain.init() --> | |||
IO.Text[LongWritable, Text](args(0)).input --> | |||
MapReduceTask.MapReduceTask(TokenizerMap1, SumReducer) --> | |||
MapReduceTask.MapReduceTask(FlipKeyValueMap, WordListReducer) --> | |||
IO.Text[LongWritable, Text](args(1)).output; | |||
c.execute(); | |||
return 0; | |||
} | |||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,39 @@ | |||
// This is inspired by Shadoop | |||
// (http://blog.jonhnnyweslley.net/2008/05/shadoop.html) | |||
package com.asimma.ScalaHadoop | |||
|
|||
import org.apache.hadoop.io._ | |||
|
|||
object ImplicitConversion { | |||
// Handle BooleanWritable | |||
implicit def BooleanWritableUnbox(v: BooleanWritable) = v.get | |||
implicit def BooleanWritableBox (v: Boolean) = new BooleanWritable(v) | |||
|
|||
// Handle DoubleWritable | |||
implicit def DoubleWritableUnbox(v: DoubleWritable) = v.get | |||
implicit def DoubleWritableBox (v: Double) = new DoubleWritable(v) | |||
|
|||
// Handle FloatWritable | |||
implicit def FloatWritableUnbox(v: FloatWritable) = v.get | |||
implicit def FloatWritableBox (v: Float) = new FloatWritable(v) | |||
|
|||
// Handle IntWritable | |||
implicit def IntWritableUnbox(v: IntWritable) = v.get | |||
implicit def IntWritableBox (v: Int) = new IntWritable(v) | |||
|
|||
// Handle LongWritable | |||
implicit def LongWritableUnbox(v: LongWritable) = v.get | |||
implicit def LongWritableBox (v: Long) = new LongWritable(v) | |||
|
|||
// Handle Text | |||
implicit def TextUnbox(v: Text) = v.toString | |||
implicit def TextBox (v: String) = new Text(v) | |||
implicit def StringBuilderBox (v: StringBuilder) = new Text(v.toString) | |||
implicit def StringBufferBox (v: StringBuffer) = new Text(v.toString) | |||
|
|||
|
|||
implicit def MapWritableBox[X <: Writable,Y <: Writable](value: scala.collection.Map[X,Y]): MapWritable = { | |||
var newMap = new MapWritable(); | |||
value.foreach{case (k, v) => newMap.put(k, v)}; | |||
return newMap; } | |||
} |
Oops, something went wrong.