Skip to content

Commit

Permalink
Refactor cli tools to re-use CliTool util
Browse files Browse the repository at this point in the history
  • Loading branch information
aecio committed Oct 19, 2016
1 parent ae2ae51 commit 951f083
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 69 deletions.
31 changes: 5 additions & 26 deletions src/main/java/focusedCrawler/memex/cdr/AcheToCdrFileExporter.java
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
package focusedCrawler.memex.cdr;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.zip.GZIPOutputStream;

import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.ParserProperties;

import com.fasterxml.jackson.core.JsonProcessingException;

import focusedCrawler.target.model.TargetModelJson;
import focusedCrawler.target.repository.FileSystemTargetRepository;
import focusedCrawler.target.repository.FileSystemTargetRepository.DataFormat;
import focusedCrawler.target.repository.FileSystemTargetRepository.FileContentIterator;
import focusedCrawler.util.CliTool;

public class AcheToCdrFileExporter {
public class AcheToCdrFileExporter extends CliTool {

@Option(name="--input-path", usage="Path to ACHE data target folder", required=true)
private String inputPath;
Expand All @@ -36,26 +30,11 @@ public class AcheToCdrFileExporter {
private DataFormat dataFormat = DataFormat.JSON;

public static void main(String[] args) throws Exception {
new AcheToCdrFileExporter().run(args);
CliTool.run(args, new AcheToCdrFileExporter());
}

public void run(String[] args) throws Exception {
ParserProperties properties = ParserProperties.defaults().withUsageWidth(80);
CmdLineParser parser = new CmdLineParser(this, properties);
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
System.err.println();
parser.printUsage(System.err);
System.err.println();
System.exit(1);
}
generateCdrFile();
}

private void generateCdrFile()
throws IOException, FileNotFoundException, JsonProcessingException {
@Override
public void execute() throws Exception {

System.out.println("Reading ACHE data from: "+inputPath);
System.out.println("Generating CDR file at: "+outputFile);
Expand Down
29 changes: 7 additions & 22 deletions src/main/java/focusedCrawler/memex/cdr/CountTlds.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
import java.util.Map.Entry;
import java.util.zip.GZIPInputStream;

import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.ParserProperties;

import com.fasterxml.jackson.databind.ObjectMapper;

import focusedCrawler.link.frontier.LinkRelevance;
import focusedCrawler.util.CliTool;

public class CountTlds {
public class CountTlds extends CliTool {

private static final ObjectMapper mapper = new ObjectMapper();

@Option(name="--input-path", required=true,
usage="Path to folder with multiple CDR files")
Expand All @@ -34,28 +34,13 @@ public class CountTlds {
usage="Text file with TLD counts")
private String outputFile;

private static final ObjectMapper mapper = new ObjectMapper();

public static void main(String[] args) throws Exception {
new CountTlds().run(args);
CliTool.run(args, new CountTlds());
}

public void run(String[] args) throws Exception {
ParserProperties properties = ParserProperties.defaults().withUsageWidth(80);
CmdLineParser parser = new CmdLineParser(this, properties);
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
System.err.println();
parser.printUsage(System.err);
System.err.println();
System.exit(1);
}
generateCdrFile();
}

private void generateCdrFile() throws Exception {
@Override
public void execute() throws Exception {

System.out.println("Reading CDR files from: "+inputPath);
System.out.println("Generating CDR file at: "+outputFile);
Expand Down
25 changes: 4 additions & 21 deletions src/main/java/focusedCrawler/memex/cdr/MergeCdrFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,18 @@
import java.util.zip.GZIPOutputStream;

import org.apache.commons.codec.digest.DigestUtils;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.ParserProperties;

import com.fasterxml.jackson.databind.ObjectMapper;

import focusedCrawler.target.classifier.TargetClassifier;
import focusedCrawler.target.classifier.TargetClassifier.TargetRelevance;
import focusedCrawler.target.classifier.TargetClassifierFactory;
import focusedCrawler.target.model.Page;
import focusedCrawler.util.CliTool;
import focusedCrawler.util.parser.PaginaURL;

public class MergeCdrFiles {
public class MergeCdrFiles extends CliTool {

@Option(name="--input-path", usage="Path to folder with multiple CDR files", required=true)
private String inputPath;
Expand All @@ -56,25 +54,10 @@ public class MergeCdrFiles {
private Iterator<File> files;

public static void main(String[] args) throws Exception {
new MergeCdrFiles().run(args);
CliTool.run(args, new MergeCdrFiles());
}

public void run(String[] args) throws Exception {
ParserProperties properties = ParserProperties.defaults().withUsageWidth(80);
CmdLineParser parser = new CmdLineParser(this, properties);
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
System.err.println();
parser.printUsage(System.err);
System.err.println();
System.exit(1);
}
generateCdrFile();
}

private void generateCdrFile() throws Exception {
public void execute() throws Exception {

System.out.println("Reading CDR files from: "+inputPath);
System.out.println("Generating CDR file at: "+outputFile);
Expand Down

0 comments on commit 951f083

Please sign in to comment.