Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
sryza committed Dec 10, 2012
0 parents commit b859565
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 0 deletions.
49 changes: 49 additions & 0 deletions pom.xml
@@ -0,0 +1,49 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.cloudera</groupId>
<artifactId>gapdeduce</artifactId>
<version>1.0-SNAPSHOT</version>
<name>Gap Deduce</name>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.0.0-cdh4.1.0</version>
</dependency>

<!-- For unit testing -->
<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>0.8.0-incubating</version>
</dependency>
</dependencies>

<build>
<plugins>
<!-- Tells maven to use the Java 6 JDK instead of its default -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
</plugins>
</build>

<repositories>
<repository>
<id>maven-hadoop</id>
<name>Hadoop Releases</name>
<url>https://repository.cloudera.com/content/repositories/releases/</url>
</repository>
<repository>
<id>cloudera-repos</id>
<name>Cloudera Repos</name>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
</project>
38 changes: 38 additions & 0 deletions src/main/java/Deducer.java
@@ -0,0 +1,38 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

public class Deducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {

public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
Set<String> attackers = new TreeSet<String>();
while (values.hasNext()) {
String valStr = values.next().toString();
attackers.add(valStr);
}
output.collect(key, new Text(attackers.toString()));
}
}

51 changes: 51 additions & 0 deletions src/main/java/GapDeduceRunner.java
@@ -0,0 +1,51 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

public class GapDeduceRunner {
public static void main(String[] args) throws IOException {
JobConf conf = new JobConf(GapDeduceRunner.class);
conf.setJobName("gapdeduce");

conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);

conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);

conf.setMapperClass(Gapper.class);
conf.setReducerClass(Deducer.class);

// KeyValueTextInputFormat treats each line as an input record,
// and splits the line by the tab character to separate it into key and value
conf.setInputFormat(KeyValueTextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);

FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));

JobClient.runJob(conf);
}
}
31 changes: 31 additions & 0 deletions src/main/java/Gapper.java
@@ -0,0 +1,31 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

public class Gapper extends MapReduceBase implements Mapper<Text, Text, Text, Text> {
public void map(Text attacker, Text victim, OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
output.collect(victim, attacker);
}
}

60 changes: 60 additions & 0 deletions src/test/java/TestGapDeduce.java
@@ -0,0 +1,60 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.MapDriver;
import org.apache.hadoop.mrunit.MapReduceDriver;
import org.apache.hadoop.mrunit.ReduceDriver;
import org.junit.Before;
import org.junit.Test;

public class TestGapDeduce {

private MapDriver<Text, Text, Text, Text> mapDriver;
private ReduceDriver<Text, Text, Text, Text> reduceDriver;

@Before
public void setUp() {
Gapper mapper = new Gapper();
Deducer reducer = new Deducer();
mapDriver = MapDriver.newMapDriver(mapper);;
reduceDriver = ReduceDriver.newReduceDriver(reducer);
}

@Test
public void testMapper() {
mapDriver.withInput(new Text("sanford"), new Text("sage"));
mapDriver.withOutput(new Text("sage"), new Text("sanford"));
mapDriver.runTest();
}

@Test
public void testReducer() {
List<Text> values = new ArrayList<Text>();
values.add(new Text("sage"));
values.add(new Text("ian"));
values.add(new Text("sage"));
reduceDriver.withInput(new Text("sanford"), values);
reduceDriver.withOutput(new Text("sanford"),
new Text("[ian, sage]"));
reduceDriver.runTest();
}
}

0 comments on commit b859565

Please sign in to comment.