Skip to content

Commit

Permalink
1. Fix build.xml to fetch maven ant task properly.
Browse files Browse the repository at this point in the history
2. Remove external Hadoop dependency.
3. Add deprecated files back in.
  • Loading branch information
ahadrana committed Jan 17, 2013
1 parent 1f4f452 commit 5e8cf9f
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 29 deletions.
7 changes: 7 additions & 0 deletions build.properties
Expand Up @@ -10,3 +10,10 @@ hadoop.path=/usr/lib/hadoop
# version ID of this build
commoncrawl.version=1.0

mvn.ant.task.version=2.1.3
# Maven dependency download locations
mvn.repo=http://repo1.maven.org/maven2
apache.repo=https://repository.apache.org/content/repositories/releases
mvn.ant.task.url=${mvn.repo}/org/apache/maven/maven-ant-tasks/${mvn.ant.task.version}
mvn.ant.task.jar=maven-ant-tasks-${mvn.ant.task.version}.jar

21 changes: 4 additions & 17 deletions build.xml
Expand Up @@ -8,23 +8,14 @@
<property file="${basedir}/build.properties" />
<property name="Name" value="commoncrawl"/>
<property name="name" value="commoncrawl"/>
<property name="version" value="1.0"/>
<property name="version" value="1.1"/>
<property name="final.name" value="${name}-${commoncrawl.version}"/>

<property name="commoncrawl.groupid" value="org.commoncrawl"/>

<property name="mvn.repo" value="http://repo1.maven.org/maven2"/>
<property name="apache.repo" value="https://repository.apache.org/content/repositories/releases"/>

<fail message="Please define Hadoop Base Path via hadoop.path in your build.properties file">
<condition>
<not>
<isset property="hadoop.path"/>
</not>
</condition>
</fail>

<property name="src.dir" value="${basedir}/src"/>
<property name="src.dir" value="${basedir}/src/main/java"/>
<property name="lib.dir" value="${basedir}/lib"/>
<property name="conf.dir" value="${basedir}/conf"/>

Expand Down Expand Up @@ -77,11 +68,6 @@
<exclude name="servlet-api-2.5-6.1.14.jar"/>
<exclude name="servlet-api-2.5.jar"/>
</fileset>
<fileset dir="${hadoop.path}">
<include name="lib/**/*.jar"/>
<include name="hadoop-core*.jar"/>
<exclude name="lib/jets3t-*.jar"/>
</fileset>
</path>

<target name="setup.init">
Expand All @@ -107,7 +93,7 @@
<target name="mvn.ant.tasks.download" depends="setup.init,mvn.ant.tasks.check,proxy" unless="mvn.ant.tasks.found">
<property name="mvn.ant.task.version" value="2.1.3"/>
<property name="mvn.ant.task.jar" value="maven-ant-tasks-${mvn.ant.task.version}.jar"/>
<property name="mvn.ant.task.url" value="http://mirror.uoregon.edu/apache/maven/ant-tasks/2.1.3/binaries"/>
<property name="mvn.ant.task.url" value="http://mirror.uoregon.edu/apache/maven/ant-tasks/2.1.3/binaries"/>
<get src="${mvn.ant.task.url}/${mvn.ant.task.jar}" dest="${build.tools.dir}/${mvn.ant.task.jar}" usetimestamp="true"/>
</target>

Expand Down Expand Up @@ -442,6 +428,7 @@
<dependency groupId="junit" artifactId="junit" version="4.10"/>
<dependency groupId="org.apache.thrift" artifactId="libthrift" version="0.7.0"/>
<dependency groupId="log4j" artifactId="log4j" version="1.2.14"/>
<dependency groupId="org.apache.hadoop" artifactId="hadoop-core" version="1.0.3" />
</artifact:pom>

<!-- Generate the pom file -->
Expand Down
Expand Up @@ -13,5 +13,12 @@ public interface Constants {
public static final String ARCFileHeader_FetchTimeStamp = "x_commoncrawl_FetchTimestamp";
public static final String ARCFileHeader_ContentTruncated = "x-commoncrawl-ContentTruncated";
public static final String ARCFileHeader_SOURCE_IS_GZIPED = "x_commoncrawl_SourceIsGZIP";
public static final String ARCFileHeader_ARC_Offset = "x_commoncrawl_ArcOffset";
public static final String ARCFileHeader_ARC_Timestamp = "x_commoncrawl_ArcTimestamp";
public static final String ARCFileHeader_HostIP = "x_commoncrawl_HostIP";
public static final String ARCFileHeader_ARC_MimeType = "x_commoncrawl_ArcMimeType";
public static final String ARCFileHeader_ARC_RecordLen = "x_commoncrawl_ArcRecordLen";
public static final String ARCFileHeader_ARC_PayloadLen = "x_commoncrawl_ArcPayloadLen";


}
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -27,7 +27,6 @@
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;
import org.commoncrawl.util.shared.ArcFileReader;

/**
* A map reduce input format for gzipped ARC files.
Expand Down Expand Up @@ -179,7 +178,7 @@ public InputSplit[] getSplits(JobConf job, int ignored) throws IOException {
*/
public RecordReader getRecordReader(InputSplit split, JobConf job,
Reporter reporter) throws IOException {
return new ARCSplitReader(job, (ARCSplit) split, arcSource, blockSize);
return new ARCSplitReader(job, (ARCSplit) split, arcSource, blockSize);
}

/**
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.DataInput;
import java.io.DataOutput;
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.IOException;
import java.io.InputStream;
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.DataInput;
import java.io.DataOutput;
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.IOException;
import java.util.Collection;
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
Expand All @@ -32,7 +32,6 @@
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.util.StringUtils;
import org.commoncrawl.protocol.shared.ArcFileItem;
import org.commoncrawl.util.shared.ArcFileReader;

/**
* A Hadooop {@link RecordReader} for reading {@link ARCSplit}s.
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.util.shared;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.BufferedReader;
import java.io.EOFException;
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.IOException;
import java.io.InputStream;
Expand Down
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package org.commoncrawl.hadoop.io;
package org.commoncrawl.hadoop.io.deprecated;

import java.io.File;
import java.io.FileInputStream;
Expand Down

0 comments on commit 5e8cf9f

Please sign in to comment.