Permalink
Browse files

added formatting config for Eclipse + applied it to the whole project

  • Loading branch information...
1 parent 44abcab commit e2d6aa6a26fe725b579ac73995fd7ede3a33a00c @jnioche jnioche committed Nov 23, 2010
Showing with 2,486 additions and 2,064 deletions.
  1. +279 −0 eclipse-format.xml
  2. +1 −2 sandbox/solr-indexer/src/main/com/digitalpebble/solr/SOLRIndexerJob.java
  3. +21 −19 sandbox/solr-indexer/src/main/com/digitalpebble/solr/SOLROutputFormat.java
  4. +10 −9 sandbox/solr-indexer/src/main/com/digitalpebble/solr/SOLRWriter.java
  5. +6 −5 src/main/java/com/digitalpebble/behemoth/Annotation.java
  6. +14 −14 src/main/java/com/digitalpebble/behemoth/BehemothConfiguration.java
  7. +5 −5 src/main/java/com/digitalpebble/behemoth/BehemothDocument.java
  8. +6 −5 src/main/java/com/digitalpebble/behemoth/DocumentProcessor.java
  9. +72 −71 src/main/java/com/digitalpebble/behemoth/gate/GATEAnnotationFilters.java
  10. +57 −58 src/main/java/com/digitalpebble/behemoth/gate/GATEDriver.java
  11. +3 −2 src/main/java/com/digitalpebble/behemoth/gate/GATEMapper.java
  12. +7 −5 src/main/java/com/digitalpebble/behemoth/gate/GATEProcessor.java
  13. +71 −69 src/main/java/com/digitalpebble/behemoth/io/nutch/NutchSegmentConverterJob.java
  14. +181 −174 src/main/java/com/digitalpebble/behemoth/io/warc/HttpResponse.java
  15. +60 −59 src/main/java/com/digitalpebble/behemoth/io/warc/WARCConverterJob.java
  16. +17 −16 src/main/java/com/digitalpebble/behemoth/io/warc/WarcFileInputFormat.java
  17. +141 −119 src/main/java/com/digitalpebble/behemoth/io/warc/WarcFileRecordReader.java
  18. +178 −162 src/main/java/com/digitalpebble/behemoth/io/warc/WarcHTMLResponseRecord.java
  19. +555 −499 src/main/java/com/digitalpebble/behemoth/io/warc/WarcRecord.java
  20. +28 −28 src/main/java/com/digitalpebble/behemoth/io/warc/WritableWarcRecord.java
  21. +30 −31 src/main/java/com/digitalpebble/behemoth/tika/TextArrayWritable.java
  22. +2 −2 src/main/java/com/digitalpebble/behemoth/tika/TikaConstants.java
  23. +116 −111 src/main/java/com/digitalpebble/behemoth/tika/TikaDriver.java
  24. +37 −35 src/main/java/com/digitalpebble/behemoth/tika/TikaMapper.java
  25. +110 −100 src/main/java/com/digitalpebble/behemoth/tika/TikaProcessor.java
  26. +46 −46 src/main/java/com/digitalpebble/behemoth/uima/UIMADriver.java
  27. +175 −174 src/main/java/com/digitalpebble/behemoth/uima/UIMAMapper.java
  28. +8 −6 src/main/java/com/digitalpebble/behemoth/uima/UIMAProcessor.java
  29. +92 −83 src/main/java/com/digitalpebble/behemoth/util/CorpusGenerator.java
  30. +22 −20 src/main/java/com/digitalpebble/behemoth/util/CorpusReader.java
  31. +28 −28 src/main/java/com/digitalpebble/behemoth/util/MimeUtil.java
  32. +78 −77 src/test/java/com/digitalpebble/behemoth/gate/GATEProcessorTest.java
  33. +30 −30 src/test/java/com/digitalpebble/behemoth/tika/TikaProcessorTest.java
View
Oops, something went wrong.
@@ -45,8 +45,7 @@
*/
public class SOLRIndexerJob extends Configured implements Tool {
- private static final Log LOG = LogFactory
- .getLog(SOLRIndexerJob.class);
+ private static final Log LOG = LogFactory.getLog(SOLRIndexerJob.class);
public SOLRIndexerJob() {
}
@@ -28,23 +28,25 @@
import com.digitalpebble.behemoth.BehemothDocument;
-public class SOLROutputFormat extends FileOutputFormat<Text,BehemothDocument> {
-
- public RecordWriter<Text,BehemothDocument> getRecordWriter(FileSystem ignored,
- JobConf job, String name, Progressable progress) throws IOException {
-
- final SOLRWriter writer = new SOLRWriter();
- writer.open(job, name);
-
- return new RecordWriter<Text,BehemothDocument>() {
-
- public void close(Reporter reporter) throws IOException {
- writer.close();
- }
-
- public void write(Text key, BehemothDocument doc) throws IOException {
- writer.write(doc);
- }
- };
- }
+public class SOLROutputFormat extends FileOutputFormat<Text, BehemothDocument> {
+
+ public RecordWriter<Text, BehemothDocument> getRecordWriter(
+ FileSystem ignored, JobConf job, String name, Progressable progress)
+ throws IOException {
+
+ final SOLRWriter writer = new SOLRWriter();
+ writer.open(job, name);
+
+ return new RecordWriter<Text, BehemothDocument>() {
+
+ public void close(Reporter reporter) throws IOException {
+ writer.close();
+ }
+
+ public void write(Text key, BehemothDocument doc)
+ throws IOException {
+ writer.write(doc);
+ }
+ };
+ }
}
@@ -67,7 +67,8 @@ public void open(JobConf job, String name) throws IOException {
featureName = val.substring(separator + 1);
featureValMap.put(featureName, fieldName);
fieldMapping.put(entry.getValue(), featureValMap);
- LOG.debug("Adding to mapping "+entry.getValue()+" "+featureName+" "+fieldName);
+ LOG.debug("Adding to mapping " + entry.getValue() + " "
+ + featureName + " " + fieldName);
}
}
@@ -87,9 +88,9 @@ protected SolrInputDocument convertToSOLR(BehemothDocument doc) {
// to match the SOLR schema
inputDoc.setField("id", doc.getUrl());
inputDoc.setField("text", doc.getText());
-
- LOG.info("Adding field : id\t"+doc.getUrl());
- LOG.info("Adding field : text\t"+doc.getText());
+
+ LOG.info("Adding field : id\t" + doc.getUrl());
+ LOG.info("Adding field : text\t" + doc.getText());
// iterate on the annotations of interest and
// create a new field for each one
@@ -121,11 +122,11 @@ protected SolrInputDocument convertToSOLR(BehemothDocument doc) {
// get the value for the feature
else {
value = current.getFeatures().get(targetFeature);
- }
- LOG.debug("Adding field : "+SOLRFieldName+"\t"+value);
- // skip if no value has been found
- if (value!=null)
- inputDoc.setField(SOLRFieldName, value);
+ }
+ LOG.debug("Adding field : " + SOLRFieldName + "\t" + value);
+ // skip if no value has been found
+ if (value != null)
+ inputDoc.setField(SOLRFieldName, value);
}
}
@@ -81,24 +81,25 @@ public void setFeatures(Map<String, String> features) {
public int compareTo(Annotation target) {
long diff = this.start - target.start;
if (diff != 0)
- return (int)diff;
+ return (int) diff;
diff = this.type.compareTo(target.type);
if (diff != 0)
- return (int)diff;
+ return (int) diff;
diff = this.end - target.end;
if (diff != 0)
- return (int)diff;
+ return (int) diff;
// eventually compare based on the features
diff = this.getFeatureNum() - target.getFeatureNum();
if (diff != 0)
- return (int)diff;
+ return (int) diff;
// TODO compare the features one by one?
return 0;
}
public String toString() {
StringBuffer buffer = new StringBuffer();
- buffer.append(this.type).append("\t").append(start).append("\t").append(end);
+ buffer.append(this.type).append("\t").append(start).append("\t")
+ .append(end);
if (features != null) {
Iterator<String> keysiter = features.keySet().iterator();
while (keysiter.hasNext()) {
@@ -21,21 +21,21 @@
public class BehemothConfiguration extends org.apache.hadoop.conf.Configuration {
- private BehemothConfiguration() {
- }
+ private BehemothConfiguration() {
+ }
- /** Create a {@link Configuration} for Behemoth. */
- public static Configuration create() {
- Configuration conf = new Configuration();
- addBehemothResources(conf);
- return conf;
- }
+ /** Create a {@link Configuration} for Behemoth. */
+ public static Configuration create() {
+ Configuration conf = new Configuration();
+ addBehemothResources(conf);
+ return conf;
+ }
- /** Add the standard Behemoth resources to {@link Configuration}. */
- private static Configuration addBehemothResources(Configuration conf) {
- conf.addResource("behemoth-default.xml");
- conf.addResource("behemoth-site.xml");
- return conf;
- }
+ /** Add the standard Behemoth resources to {@link Configuration}. */
+ private static Configuration addBehemothResources(Configuration conf) {
+ conf.addResource("behemoth-default.xml");
+ conf.addResource("behemoth-site.xml");
+ return conf;
+ }
}
@@ -158,11 +158,11 @@ public void writeCommon(DataOutput out) throws IOException {
Text.writeString(out, url); // write url
out.writeInt(content.length); // write content
out.write(content);
- if (contentType != null){
- Text.writeString(out, contentType); // write contentType
- } else {
- Text.writeString(out, "");
- }
+ if (contentType != null) {
+ Text.writeString(out, contentType); // write contentType
+ } else {
+ Text.writeString(out, "");
+ }
out.writeBoolean(text != null);
if (text != null)
Text.writeString(out, text); // write text
@@ -23,10 +23,11 @@
public interface DocumentProcessor extends Configurable {
- /** Returns one or more processed documents**/
- public BehemothDocument[] process(BehemothDocument inputDoc, Reporter reporter);
-
- /** Closes all resources held by the processor **/
- public void close();
+ /** Returns one or more processed documents **/
+ public BehemothDocument[] process(BehemothDocument inputDoc,
+ Reporter reporter);
+
+ /** Closes all resources held by the processor **/
+ public void close();
}
@@ -33,78 +33,79 @@
/** Container for the annotation filters which is built from **/
public class GATEAnnotationFilters {
-
- HashSet<String> types;
- Map<String,Set<String>> featfilts;
- String annotationSetName;
-
- public static GATEAnnotationFilters getFilters(Configuration config) {
- GATEAnnotationFilters filter = new GATEAnnotationFilters();
-
- filter.annotationSetName = config.get("gate.annotationset.output", "");
- String[] stypes = config.get("gate.annotations.filter", "").split(",");
- String[] sFeatFilt = config.get("gate.features.filter", "").split(",");
-
- // the featurefilters have the following form : Type.featureName
- filter.featfilts = new HashMap<String,Set<String>>();
- for (String ff : sFeatFilt) {
- String[] fp = ff.split("\\.");
- if (fp.length != 2) continue;
- Set<String> fnames = filter.featfilts.get(fp[0]);
- if (fnames == null) {
- fnames = new HashSet<String>();
- filter.featfilts.put(fp[0], fnames);
- }
- fnames.add(fp[1]);
+
+ HashSet<String> types;
+ Map<String, Set<String>> featfilts;
+ String annotationSetName;
+
+ public static GATEAnnotationFilters getFilters(Configuration config) {
+ GATEAnnotationFilters filter = new GATEAnnotationFilters();
+
+ filter.annotationSetName = config.get("gate.annotationset.output", "");
+ String[] stypes = config.get("gate.annotations.filter", "").split(",");
+ String[] sFeatFilt = config.get("gate.features.filter", "").split(",");
+
+ // the featurefilters have the following form : Type.featureName
+ filter.featfilts = new HashMap<String, Set<String>>();
+ for (String ff : sFeatFilt) {
+ String[] fp = ff.split("\\.");
+ if (fp.length != 2)
+ continue;
+ Set<String> fnames = filter.featfilts.get(fp[0]);
+ if (fnames == null) {
+ fnames = new HashSet<String>();
+ filter.featfilts.put(fp[0], fnames);
+ }
+ fnames.add(fp[1]);
+ }
+
+ filter.types = new HashSet<String>();
+ for (String s : stypes) {
+ filter.types.add(s);
+ }
+
+ return filter;
}
-
- filter.types = new HashSet<String>();
- for (String s : stypes) {
- filter.types.add(s);
+
+ public HashSet<String> getTypes() {
+ return types;
}
-
- return filter;
- }
-
- public HashSet<String> getTypes() {
- return types;
- }
-
- public Map<String,Set<String>> getFeatfilts() {
- return featfilts;
- }
-
- public String getAnnotationSetName() {
- return annotationSetName;
- }
-
- public void setTypes(HashSet<String> types) {
- this.types = types;
- }
-
- public void setFeatfilts(Map<String,Set<String>> featfilts) {
- this.featfilts = featfilts;
- }
-
- public void setAnnotationSetName(String annotationSetName) {
- this.annotationSetName = annotationSetName;
- }
-
- /**
- * Returns a unmodifiable sorted list of all known annotations and feature
- * names so that we can use their position in the serialisation instead of
- * writing them as strings
- ***/
- public List<String> getLexicon() {
- HashSet<String> lexicon = new HashSet<String>();
- lexicon.addAll(types);
- Iterator<Set<String>> iter = featfilts.values().iterator();
- while (iter.hasNext()) {
- lexicon.addAll(iter.next());
+
+ public Map<String, Set<String>> getFeatfilts() {
+ return featfilts;
+ }
+
+ public String getAnnotationSetName() {
+ return annotationSetName;
}
- ArrayList<String> temp = new ArrayList<String>(lexicon);
- Collections.sort(temp);
- return Collections.unmodifiableList(temp);
- }
-
+
+ public void setTypes(HashSet<String> types) {
+ this.types = types;
+ }
+
+ public void setFeatfilts(Map<String, Set<String>> featfilts) {
+ this.featfilts = featfilts;
+ }
+
+ public void setAnnotationSetName(String annotationSetName) {
+ this.annotationSetName = annotationSetName;
+ }
+
+ /**
+ * Returns a unmodifiable sorted list of all known annotations and feature
+ * names so that we can use their position in the serialisation instead of
+ * writing them as strings
+ ***/
+ public List<String> getLexicon() {
+ HashSet<String> lexicon = new HashSet<String>();
+ lexicon.addAll(types);
+ Iterator<Set<String>> iter = featfilts.values().iterator();
+ while (iter.hasNext()) {
+ lexicon.addAll(iter.next());
+ }
+ ArrayList<String> temp = new ArrayList<String>(lexicon);
+ Collections.sort(temp);
+ return Collections.unmodifiableList(temp);
+ }
+
}
Oops, something went wrong.

0 comments on commit e2d6aa6

Please sign in to comment.